diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-07 09:40:34 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-07 09:40:34 -0700 |
commit | e1ec517e18acc0aa9795ff92c15f0adabcb12db9 (patch) | |
tree | f3f135b962b3044e48784d061fdccdf561eae80d | |
parent | 19b39c38abf68591edbd698740d410c37ee075cc (diff) | |
parent | f073531070d24bbb82cb2658952d949f4851024b (diff) | |
download | linux-e1ec517e18acc0aa9795ff92c15f0adabcb12db9.tar.gz linux-e1ec517e18acc0aa9795ff92c15f0adabcb12db9.tar.bz2 linux-e1ec517e18acc0aa9795ff92c15f0adabcb12db9.zip |
Merge branch 'hch.init_path' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull init and set_fs() cleanups from Al Viro:
"Christoph's 'getting rid of ksys_...() uses under KERNEL_DS' series"
* 'hch.init_path' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (50 commits)
init: add an init_dup helper
init: add an init_utimes helper
init: add an init_stat helper
init: add an init_mknod helper
init: add an init_mkdir helper
init: add an init_symlink helper
init: add an init_link helper
init: add an init_eaccess helper
init: add an init_chmod helper
init: add an init_chown helper
init: add an init_chroot helper
init: add an init_chdir helper
init: add an init_rmdir helper
init: add an init_unlink helper
init: add an init_umount helper
init: add an init_mount helper
init: mark create_dev as __init
init: mark console_on_rootfs as __init
init: initialize ramdisk_execute_command at compile time
devtmpfs: refactor devtmpfsd()
...
35 files changed, 796 insertions, 768 deletions
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index ce02f92f4ab2..6c12d9fe694e 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -91,8 +91,6 @@ __tagtable(ATAG_VIDEOTEXT, parse_tag_videotext); static int __init parse_tag_ramdisk(const struct tag *tag) { rd_image_start = tag->u.ramdisk.start; - rd_doload = (tag->u.ramdisk.flags & 1) == 0; - rd_prompt = (tag->u.ramdisk.flags & 2) == 0; if (tag->u.ramdisk.size) rd_size = tag->u.ramdisk.size; diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 67f5a3b44c2e..4144be650d41 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -290,8 +290,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_BLK_DEV_RAM rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); #endif if (!MOUNT_ROOT_RDONLY) diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 6d07b85b9e24..eea43a1aef1b 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -353,8 +353,6 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = old_decode_dev(root_dev); #ifdef CONFIG_BLK_DEV_RAM rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif prom_setsync(prom_sync_me); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index f765fda871eb..d87244197d5c 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -659,8 +659,6 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = old_decode_dev(root_dev); #ifdef CONFIG_BLK_DEV_RAM rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif task_thread_info(&init_task)->kregs = &fake_swapper_regs; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3767e74c758..b9a68d8e06d8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -870,8 +870,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_BLK_DEV_RAM rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index c9017e0584c0..eac184e6d657 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -25,6 +25,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/kthread.h> +#include <linux/init_syscalls.h> #include <uapi/linux/mount.h> #include "base.h" @@ -359,7 +360,7 @@ int __init devtmpfs_mount(void) if (!thread) return 0; - err = do_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL); + err = init_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL); if (err) printk(KERN_INFO "devtmpfs: error mounting %i\n", err); else @@ -378,30 +379,8 @@ static int handle(const char *name, umode_t mode, kuid_t uid, kgid_t gid, return handle_remove(name, dev); } -static int devtmpfs_setup(void *p) +static void __noreturn devtmpfs_work_loop(void) { - int err; - - err = ksys_unshare(CLONE_NEWNS); - if (err) - goto out; - err = do_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); - if (err) - goto out; - ksys_chdir("/.."); /* will traverse into overmounted root */ - ksys_chroot("."); -out: - *(int *)p = err; - complete(&setup_done); - return err; -} - -static int devtmpfsd(void *p) -{ - int err = devtmpfs_setup(p); - - if (err) - return err; while (1) { spin_lock(&req_lock); while (requests) { @@ -421,6 +400,38 @@ static int devtmpfsd(void *p) spin_unlock(&req_lock); schedule(); } +} + +static int __init devtmpfs_setup(void *p) +{ + int err; + + err = ksys_unshare(CLONE_NEWNS); + if (err) + goto out; + err = init_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); + if (err) + goto out; + init_chdir("/.."); /* will traverse into overmounted root */ + init_chroot("."); +out: + *(int *)p = err; + complete(&setup_done); + return err; +} + +/* + * The __ref is because devtmpfs_setup needs to be __init for the routines it + * calls. That call is done while devtmpfs_init, which is marked __init, + * synchronously waits for it to complete. + */ +static int __ref devtmpfsd(void *p) +{ + int err = devtmpfs_setup(p); + + if (err) + return err; + devtmpfs_work_loop(); return 0; } diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 31840f95cd40..6d3e234dc46a 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -43,6 +43,9 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_MD_CLUSTER) += md-cluster.o obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BLK_DEV_MD) += md-mod.o +ifeq ($(CONFIG_BLK_DEV_MD),y) +obj-y += md-autodetect.o +endif obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o obj-$(CONFIG_DM_UNSTRIPED) += dm-unstripe.o diff --git a/init/do_mounts_md.c b/drivers/md/md-autodetect.c index b84031528dd4..6bbec89976a7 100644 --- a/init/do_mounts_md.c +++ b/drivers/md/md-autodetect.c @@ -1,9 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#include <linux/mount.h> +#include <linux/major.h> #include <linux/delay.h> +#include <linux/init_syscalls.h> +#include <linux/raid/detect.h> #include <linux/raid/md_u.h> #include <linux/raid/md_p.h> - -#include "do_mounts.h" +#include "md.h" /* * When md (and any require personalities) are compiled into the kernel @@ -21,7 +27,7 @@ static int __initdata raid_noautodetect=1; #endif static int __initdata raid_autopart; -static struct { +static struct md_setup_args { int minor; int partitioned; int level; @@ -114,132 +120,118 @@ static int __init md_setup(char *str) return 1; } -static void __init md_setup_drive(void) +static void __init md_setup_drive(struct md_setup_args *args) { - int minor, i, ent, partitioned; - dev_t dev; - dev_t devices[MD_SB_DISKS+1]; - - for (ent = 0; ent < md_setup_ents ; ent++) { - int fd; - int err = 0; - char *devname; - mdu_disk_info_t dinfo; - char name[16]; - - minor = md_setup_args[ent].minor; - partitioned = md_setup_args[ent].partitioned; - devname = md_setup_args[ent].device_names; - - sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor); - if (partitioned) - dev = MKDEV(mdp_major, minor << MdpMinorShift); - else - dev = MKDEV(MD_MAJOR, minor); - create_dev(name, dev); - for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { - char *p; - char comp_name[64]; - u32 rdev; - - p = strchr(devname, ','); - if (p) - *p++ = 0; - - dev = name_to_dev_t(devname); - if (strncmp(devname, "/dev/", 5) == 0) - devname += 5; - snprintf(comp_name, 63, "/dev/%s", devname); - rdev = bstat(comp_name); - if (rdev) - dev = new_decode_dev(rdev); - if (!dev) { - printk(KERN_WARNING "md: Unknown device name: %s\n", devname); - break; - } - - devices[i] = dev; + char *devname = args->device_names; + dev_t devices[MD_SB_DISKS + 1], mdev; + struct mdu_array_info_s ainfo = { }; + struct block_device *bdev; + struct mddev *mddev; + int err = 0, i; + char name[16]; + + if (args->partitioned) { + mdev = MKDEV(mdp_major, args->minor << MdpMinorShift); + sprintf(name, "md_d%d", args->minor); + } else { + mdev = MKDEV(MD_MAJOR, args->minor); + sprintf(name, "md%d", args->minor); + } - devname = p; + for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { + struct kstat stat; + char *p; + char comp_name[64]; + dev_t dev; + + p = strchr(devname, ','); + if (p) + *p++ = 0; + + dev = name_to_dev_t(devname); + if (strncmp(devname, "/dev/", 5) == 0) + devname += 5; + snprintf(comp_name, 63, "/dev/%s", devname); + if (init_stat(comp_name, &stat, 0) == 0 && S_ISBLK(stat.mode)) + dev = new_decode_dev(stat.rdev); + if (!dev) { + pr_warn("md: Unknown device name: %s\n", devname); + break; } - devices[i] = 0; - if (!i) - continue; + devices[i] = dev; + devname = p; + } + devices[i] = 0; - printk(KERN_INFO "md: Loading md%s%d: %s\n", - partitioned ? "_d" : "", minor, - md_setup_args[ent].device_names); + if (!i) + return; - fd = ksys_open(name, 0, 0); - if (fd < 0) { - printk(KERN_ERR "md: open failed - cannot start " - "array %s\n", name); - continue; - } - if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { - printk(KERN_WARNING - "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", - minor); - ksys_close(fd); - continue; - } + pr_info("md: Loading %s: %s\n", name, args->device_names); - if (md_setup_args[ent].level != LEVEL_NONE) { - /* non-persistent */ - mdu_array_info_t ainfo; - ainfo.level = md_setup_args[ent].level; - ainfo.size = 0; - ainfo.nr_disks =0; - ainfo.raid_disks =0; - while (devices[ainfo.raid_disks]) - ainfo.raid_disks++; - ainfo.md_minor =minor; - ainfo.not_persistent = 1; - - ainfo.state = (1 << MD_SB_CLEAN); - ainfo.layout = 0; - ainfo.chunk_size = md_setup_args[ent].chunk; - err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo); - for (i = 0; !err && i <= MD_SB_DISKS; i++) { - dev = devices[i]; - if (!dev) - break; - dinfo.number = i; - dinfo.raid_disk = i; - dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC); - dinfo.major = MAJOR(dev); - dinfo.minor = MINOR(dev); - err = ksys_ioctl(fd, ADD_NEW_DISK, - (long)&dinfo); - } - } else { - /* persistent */ - for (i = 0; i <= MD_SB_DISKS; i++) { - dev = devices[i]; - if (!dev) - break; - dinfo.major = MAJOR(dev); - dinfo.minor = MINOR(dev); - ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo); - } - } - if (!err) - err = ksys_ioctl(fd, RUN_ARRAY, 0); - if (err) - printk(KERN_WARNING "md: starting md%d failed\n", minor); - else { - /* reread the partition table. - * I (neilb) and not sure why this is needed, but I cannot - * boot a kernel with devfs compiled in from partitioned md - * array without it - */ - ksys_close(fd); - fd = ksys_open(name, 0, 0); - ksys_ioctl(fd, BLKRRPART, 0); + bdev = blkdev_get_by_dev(mdev, FMODE_READ, NULL); + if (IS_ERR(bdev)) { + pr_err("md: open failed - cannot start array %s\n", name); + return; + } + + err = -EIO; + if (WARN(bdev->bd_disk->fops != &md_fops, + "Opening block device %x resulted in non-md device\n", + mdev)) + goto out_blkdev_put; + + mddev = bdev->bd_disk->private_data; + + err = mddev_lock(mddev); + if (err) { + pr_err("md: failed to lock array %s\n", name); + goto out_blkdev_put; + } + + if (!list_empty(&mddev->disks) || mddev->raid_disks) { + pr_warn("md: Ignoring %s, already autodetected. (Use raid=noautodetect)\n", + name); + goto out_unlock; + } + + if (args->level != LEVEL_NONE) { + /* non-persistent */ + ainfo.level = args->level; + ainfo.md_minor = args->minor; + ainfo.not_persistent = 1; + ainfo.state = (1 << MD_SB_CLEAN); + ainfo.chunk_size = args->chunk; + while (devices[ainfo.raid_disks]) + ainfo.raid_disks++; + } + + err = md_set_array_info(mddev, &ainfo); + + for (i = 0; i <= MD_SB_DISKS && devices[i]; i++) { + struct mdu_disk_info_s dinfo = { + .major = MAJOR(devices[i]), + .minor = MINOR(devices[i]), + }; + + if (args->level != LEVEL_NONE) { + dinfo.number = i; + dinfo.raid_disk = i; + dinfo.state = + (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC); } - ksys_close(fd); + + md_add_new_disk(mddev, &dinfo); } + + if (!err) + err = do_md_run(mddev); + if (err) + pr_warn("md: starting %s failed\n", name); +out_unlock: + mddev_unlock(mddev); +out_blkdev_put: + blkdev_put(bdev, FMODE_READ); } static int __init raid_setup(char *str) @@ -274,8 +266,6 @@ __setup("md=", md_setup); static void __init autodetect_raid(void) { - int fd; - /* * Since we don't want to detect and use half a raid array, we need to * wait for the known devices to complete their probing @@ -284,21 +274,18 @@ static void __init autodetect_raid(void) printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n"); wait_for_device_probe(); - - fd = ksys_open("/dev/md0", 0, 0); - if (fd >= 0) { - ksys_ioctl(fd, RAID_AUTORUN, raid_autopart); - ksys_close(fd); - } + md_autostart_arrays(raid_autopart); } void __init md_run_setup(void) { - create_dev("/dev/md0", MKDEV(MD_MAJOR, 0)); + int ent; if (raid_noautodetect) printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n"); else autodetect_raid(); - md_setup_drive(); + + for (ent = 0; ent < md_setup_ents; ent++) + md_setup_drive(&md_setup_args[ent]); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 153bc766bc5a..15bbdc1630ed 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -68,10 +68,6 @@ #include "md-bitmap.h" #include "md-cluster.h" -#ifndef MODULE -static void autostart_arrays(int part); -#endif - /* pers_list is a list of registered personalities protected * by pers_lock. * pers_lock does extra service to protect accesses to @@ -332,8 +328,6 @@ static struct ctl_table raid_root_table[] = { { } }; -static const struct block_device_operations md_fops; - static int start_readonly; /* @@ -4426,7 +4420,6 @@ array_state_show(struct mddev *mddev, char *page) static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev); static int md_set_readonly(struct mddev *mddev, struct block_device *bdev); -static int do_md_run(struct mddev *mddev); static int restart_array(struct mddev *mddev); static ssize_t @@ -6089,7 +6082,7 @@ abort: } EXPORT_SYMBOL_GPL(md_run); -static int do_md_run(struct mddev *mddev) +int do_md_run(struct mddev *mddev) { int err; @@ -6724,7 +6717,7 @@ static int get_disk_info(struct mddev *mddev, void __user * arg) return 0; } -static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) +int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) { char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; struct md_rdev *rdev; @@ -6770,7 +6763,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) } /* - * add_new_disk can be used once the array is assembled + * md_add_new_disk can be used once the array is assembled * to add "hot spares". They must already have a superblock * written */ @@ -6883,7 +6876,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) return err; } - /* otherwise, add_new_disk is only allowed + /* otherwise, md_add_new_disk is only allowed * for major_version==0 superblocks */ if (mddev->major_version != 0) { @@ -7128,7 +7121,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd) } /* - * set_array_info is used two different ways + * md_set_array_info is used two different ways * The original usage is when creating a new array. * In this usage, raid_disks is > 0 and it together with * level, size, not_persistent,layout,chunksize determine the @@ -7140,9 +7133,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd) * The minor and patch _version numbers are also kept incase the * super_block handler wishes to interpret them. */ -static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) +int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info) { - if (info->raid_disks == 0) { /* just setting version number for superblock loading */ if (info->major_version < 0 || @@ -7492,7 +7484,6 @@ static inline bool md_ioctl_valid(unsigned int cmd) case GET_DISK_INFO: case HOT_ADD_DISK: case HOT_REMOVE_DISK: - case RAID_AUTORUN: case RAID_VERSION: case RESTART_ARRAY_RW: case RUN_ARRAY: @@ -7538,13 +7529,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, case RAID_VERSION: err = get_version(argp); goto out; - -#ifndef MODULE - case RAID_AUTORUN: - err = 0; - autostart_arrays(arg); - goto out; -#endif default:; } @@ -7643,7 +7627,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto unlock; } - err = set_array_info(mddev, &info); + err = md_set_array_info(mddev, &info); if (err) { pr_warn("md: couldn't set array info. %d\n", err); goto unlock; @@ -7697,7 +7681,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, /* Need to clear read-only for this */ break; else - err = add_new_disk(mddev, &info); + err = md_add_new_disk(mddev, &info); goto unlock; } break; @@ -7765,7 +7749,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, if (copy_from_user(&info, argp, sizeof(info))) err = -EFAULT; else - err = add_new_disk(mddev, &info); + err = md_add_new_disk(mddev, &info); goto unlock; } @@ -7888,7 +7872,7 @@ static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing) return ret; } -static const struct block_device_operations md_fops = +const struct block_device_operations md_fops = { .owner = THIS_MODULE, .submit_bio = md_submit_bio, @@ -9791,7 +9775,7 @@ void md_autodetect_dev(dev_t dev) } } -static void autostart_arrays(int part) +void md_autostart_arrays(int part) { struct md_rdev *rdev; struct detected_devices_node *node_detected_dev; diff --git a/drivers/md/md.h b/drivers/md/md.h index f79b5b4101ef..d9c4e6b7e939 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -803,4 +803,16 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio !bio->bi_disk->queue->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } + +struct mdu_array_info_s; +struct mdu_disk_info_s; + +extern int mdp_major; +void md_autostart_arrays(int part); +int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info); +int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info); +int do_md_run(struct mddev *mddev); + +extern const struct block_device_operations md_fops; + #endif /* _MD_MD_H */ diff --git a/fs/Makefile b/fs/Makefile index 2ce5112b02c8..1c7b0e3f6daa 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o d_path.o \ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \ - fs_types.o fs_context.o fs_parser.o fsopen.o + fs_types.o fs_context.o fs_parser.o fsopen.o init.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/file.c b/fs/file.c index 4cb9ef4d8571..21c0893f2f1d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1145,7 +1145,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) return ksys_dup3(oldfd, newfd, 0); } -int ksys_dup(unsigned int fildes) +SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; struct file *file = fget_raw(fildes); @@ -1160,11 +1160,6 @@ int ksys_dup(unsigned int fildes) return ret; } -SYSCALL_DEFINE1(dup, unsigned int, fildes) -{ - return ksys_dup(fildes); -} - int f_dupfd(unsigned int from, struct file *file, unsigned flags) { int err; diff --git a/fs/init.c b/fs/init.c new file mode 100644 index 000000000000..e9c320a48cf1 --- /dev/null +++ b/fs/init.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Routines that mimic syscalls, but don't use the user address space or file + * descriptors. Only for init/ and related early init code. + */ +#include <linux/init.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/fs.h> +#include <linux/fs_struct.h> +#include <linux/file.h> +#include <linux/init_syscalls.h> +#include <linux/security.h> +#include "internal.h" + +int __init init_mount(const char *dev_name, const char *dir_name, + const char *type_page, unsigned long flags, void *data_page) +{ + struct path path; + int ret; + + ret = kern_path(dir_name, LOOKUP_FOLLOW, &path); + if (ret) + return ret; + ret = path_mount(dev_name, &path, type_page, flags, data_page); + path_put(&path); + return ret; +} + +int __init init_umount(const char *name, int flags) +{ + int lookup_flags = LOOKUP_MOUNTPOINT; + struct path path; + int ret; + + if (!(flags & UMOUNT_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + ret = kern_path(name, lookup_flags, &path); + if (ret) + return ret; + return path_umount(&path, flags); +} + +int __init init_chdir(const char *filename) +{ + struct path path; + int error; + + error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); + if (error) + return error; + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + if (!error) + set_fs_pwd(current->fs, &path); + path_put(&path); + return error; +} + +int __init init_chroot(const char *filename) +{ + struct path path; + int error; + + error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); + if (error) + return error; + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + if (error) + goto dput_and_out; + error = -EPERM; + if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) + goto dput_and_out; + error = security_path_chroot(&path); + if (error) + goto dput_and_out; + set_fs_root(current->fs, &path); +dput_and_out: + path_put(&path); + return error; +} + +int __init init_chown(const char *filename, uid_t user, gid_t group, int flags) +{ + int lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; + struct path path; + int error; + + error = kern_path(filename, lookup_flags, &path); + if (error) + return error; + error = mnt_want_write(path.mnt); + if (!error) { + error = chown_common(&path, user, group); + mnt_drop_write(path.mnt); + } + path_put(&path); + return error; +} + +int __init init_chmod(const char *filename, umode_t mode) +{ + struct path path; + int error; + + error = kern_path(filename, LOOKUP_FOLLOW, &path); + if (error) + return error; + error = chmod_common(&path, mode); + path_put(&path); + return error; +} + +int __init init_eaccess(const char *filename) +{ + struct path path; + int error; + + error = kern_path(filename, LOOKUP_FOLLOW, &path); + if (error) + return error; + error = inode_permission(d_inode(path.dentry), MAY_ACCESS); + path_put(&path); + return error; +} + +int __init init_stat(const char *filename, struct kstat *stat, int flags) +{ + int lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; + struct path path; + int error; + + error = kern_path(filename, lookup_flags, &path); + if (error) + return error; + error = vfs_getattr(&path, stat, STATX_BASIC_STATS, + flags | AT_NO_AUTOMOUNT); + path_put(&path); + return error; +} + +int __init init_mknod(const char *filename, umode_t mode, unsigned int dev) +{ + struct dentry *dentry; + struct path path; + int error; + + if (S_ISFIFO(mode) || S_ISSOCK(mode)) + dev = 0; + else if (!(S_ISBLK(mode) || S_ISCHR(mode))) + return -EINVAL; + + dentry = kern_path_create(AT_FDCWD, filename, &path, 0); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (!IS_POSIXACL(path.dentry->d_inode)) + mode &= ~current_umask(); + error = security_path_mknod(&path, dentry, mode, dev); + if (!error) + error = vfs_mknod(path.dentry->d_inode, dentry, mode, + new_decode_dev(dev)); + done_path_create(&path, dentry); + return error; +} + +int __init init_link(const char *oldname, const char *newname) +{ + struct dentry *new_dentry; + struct path old_path, new_path; + int error; + + error = kern_path(oldname, 0, &old_path); + if (error) + return error; + + new_dentry = kern_path_create(AT_FDCWD, newname, &new_path, 0); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto out; + + error = -EXDEV; + if (old_path.mnt != new_path.mnt) + goto out_dput; + error = may_linkat(&old_path); + if (unlikely(error)) + goto out_dput; + error = security_path_link(old_path.dentry, &new_path, new_dentry); + if (error) + goto out_dput; + error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, + NULL); +out_dput: + done_path_create(&new_path, new_dentry); +out: + path_put(&old_path); + return error; +} + +int __init init_symlink(const char *oldname, const char *newname) +{ + struct dentry *dentry; + struct path path; + int error; + + dentry = kern_path_create(AT_FDCWD, newname, &path, 0); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + error = security_path_symlink(&path, dentry, oldname); + if (!error) + error = vfs_symlink(path.dentry->d_inode, dentry, oldname); + done_path_create(&path, dentry); + return error; +} + +int __init init_unlink(const char *pathname) +{ + return do_unlinkat(AT_FDCWD, getname_kernel(pathname)); +} + +int __init init_mkdir(const char *pathname, umode_t mode) +{ + struct dentry *dentry; + struct path path; + int error; + + dentry = kern_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + if (!IS_POSIXACL(path.dentry->d_inode)) + mode &= ~current_umask(); + error = security_path_mkdir(&path, dentry, mode); + if (!error) + error = vfs_mkdir(path.dentry->d_inode, dentry, mode); + done_path_create(&path, dentry); + return error; +} + +int __init init_rmdir(const char *pathname) +{ + return do_rmdir(AT_FDCWD, getname_kernel(pathname)); +} + +int __init init_utimes(char *filename, struct timespec64 *ts) +{ + struct path path; + int error; + + error = kern_path(filename, 0, &path); + if (error) + return error; + error = vfs_utimes(&path, ts); + path_put(&path); + return error; +} + +int __init init_dup(struct file *file) +{ + int fd; + + fd = get_unused_fd_flags(0); + if (fd < 0) + return fd; + fd_install(fd, get_file(file)); + return 0; +} diff --git a/fs/internal.h b/fs/internal.h index 969988d3d397..10517ece4516 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -75,15 +75,9 @@ extern int filename_lookup(int dfd, struct filename *name, unsigned flags, struct path *path, struct path *root); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); -long do_mknodat(int dfd, const char __user *filename, umode_t mode, - unsigned int dev); -long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); -long do_rmdir(int dfd, const char __user *pathname); +long do_rmdir(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name); -long do_symlinkat(const char __user *oldname, int newdfd, - const char __user *newname); -int do_linkat(int olddfd, const char __user *oldname, int newdfd, - const char __user *newname, int flags); +int may_linkat(struct path *link); /* * namespace.c @@ -102,6 +96,11 @@ extern int __mnt_want_write_file(struct file *); extern void __mnt_drop_write_file(struct file *); extern void dissolve_on_fput(struct vfsmount *); + +int path_mount(const char *dev_name, struct path *path, + const char *type_page, unsigned long flags, void *data_page); +int path_umount(struct path *path, int flags); + /* * fs_struct.c */ @@ -139,10 +138,10 @@ extern struct open_how build_open_how(int flags, umode_t mode); extern int build_open_flags(const struct open_how *how, struct open_flags *op); long do_sys_ftruncate(unsigned int fd, loff_t length, int small); -int do_fchmodat(int dfd, const char __user *filename, umode_t mode); +int chmod_common(const struct path *path, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); - +int chown_common(const struct path *path, uid_t user, gid_t group); extern int vfs_open(const struct path *, struct file *); /* diff --git a/fs/ioctl.c b/fs/ioctl.c index d69786d1dd91..4e6cc0a7d69c 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -736,7 +736,7 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd, return -ENOIOCTLCMD; } -int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct fd f = fdget(fd); int error; @@ -757,11 +757,6 @@ out: return error; } -SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) -{ - return ksys_ioctl(fd, cmd, arg); -} - #ifdef CONFIG_COMPAT /** * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation diff --git a/fs/namei.c b/fs/namei.c index 72d4219c93ac..fde8fe086c09 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1024,7 +1024,7 @@ static bool safe_hardlink_source(struct inode *inode) * * Returns 0 if successful, -ve on error. */ -static int may_linkat(struct path *link) +int may_linkat(struct path *link) { struct inode *inode = link->dentry->d_inode; @@ -3564,7 +3564,7 @@ static int may_mknod(umode_t mode) } } -long do_mknodat(int dfd, const char __user *filename, umode_t mode, +static long do_mknodat(int dfd, const char __user *filename, umode_t mode, unsigned int dev) { struct dentry *dentry; @@ -3645,7 +3645,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } EXPORT_SYMBOL(vfs_mkdir); -long do_mkdirat(int dfd, const char __user *pathname, umode_t mode) +static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode) { struct dentry *dentry; struct path path; @@ -3720,17 +3720,16 @@ out: } EXPORT_SYMBOL(vfs_rmdir); -long do_rmdir(int dfd, const char __user *pathname) +long do_rmdir(int dfd, struct filename *name) { int error = 0; - struct filename *name; struct dentry *dentry; struct path path; struct qstr last; int type; unsigned int lookup_flags = 0; retry: - name = filename_parentat(dfd, getname(pathname), lookup_flags, + name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (IS_ERR(name)) return PTR_ERR(name); @@ -3781,7 +3780,7 @@ exit1: SYSCALL_DEFINE1(rmdir, const char __user *, pathname) { - return do_rmdir(AT_FDCWD, pathname); + return do_rmdir(AT_FDCWD, getname(pathname)); } /** @@ -3926,8 +3925,7 @@ SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) return -EINVAL; if (flag & AT_REMOVEDIR) - return do_rmdir(dfd, pathname); - + return do_rmdir(dfd, getname(pathname)); return do_unlinkat(dfd, getname(pathname)); } @@ -3957,7 +3955,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) } EXPORT_SYMBOL(vfs_symlink); -long do_symlinkat(const char __user *oldname, int newdfd, +static long do_symlinkat(const char __user *oldname, int newdfd, const char __user *newname) { int error; @@ -4088,7 +4086,7 @@ EXPORT_SYMBOL(vfs_link); * with linux 2.0, and to avoid hard-linking to directories * and other special files. --ADM */ -int do_linkat(int olddfd, const char __user *oldname, int newdfd, +static int do_linkat(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, int flags) { struct dentry *new_dentry; diff --git a/fs/namespace.c b/fs/namespace.c index 4a0f600a3328..016553d0f925 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1706,36 +1706,19 @@ static inline bool may_mandlock(void) } #endif -/* - * Now umount can handle mount points as well as block devices. - * This is important for filesystems which use unnamed block devices. - * - * We now support a flag for forced unmount like the other 'big iron' - * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD - */ - -int ksys_umount(char __user *name, int flags) +int path_umount(struct path *path, int flags) { - struct path path; struct mount *mnt; int retval; - int lookup_flags = LOOKUP_MOUNTPOINT; if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) return -EINVAL; - if (!may_mount()) return -EPERM; - if (!(flags & UMOUNT_NOFOLLOW)) - lookup_flags |= LOOKUP_FOLLOW; - - retval = user_path_at(AT_FDCWD, name, lookup_flags, &path); - if (retval) - goto out; - mnt = real_mount(path.mnt); + mnt = real_mount(path->mnt); retval = -EINVAL; - if (path.dentry != path.mnt->mnt_root) + if (path->dentry != path->mnt->mnt_root) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; @@ -1748,12 +1731,25 @@ int ksys_umount(char __user *name, int flags) retval = do_umount(mnt, flags); dput_and_out: /* we mustn't call path_put() as that would clear mnt_expiry_mark */ - dput(path.dentry); + dput(path->dentry); mntput_no_expire(mnt); -out: return retval; } +static int ksys_umount(char __user *name, int flags) +{ + int lookup_flags = LOOKUP_MOUNTPOINT; + struct path path; + int ret; + + if (!(flags & UMOUNT_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + ret = user_path_at(AT_FDCWD, name, lookup_flags, &path); + if (ret) + return ret; + return path_umount(&path, flags); +} + SYSCALL_DEFINE2(umount, char __user *, name, int, flags) { return ksys_umount(name, flags); @@ -3116,12 +3112,11 @@ char *copy_mount_string(const void __user *data) * Therefore, if this magic number is present, it carries no information * and must be discarded. */ -long do_mount(const char *dev_name, const char __user *dir_name, +int path_mount(const char *dev_name, struct path *path, const char *type_page, unsigned long flags, void *data_page) { - struct path path; unsigned int mnt_flags = 0, sb_flags; - int retval = 0; + int ret; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -3134,19 +3129,13 @@ long do_mount(const char *dev_name, const char __user *dir_name, if (flags & MS_NOUSER) return -EINVAL; - /* ... and get the mountpoint */ - retval = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path); - if (retval) - return retval; - - retval = security_sb_mount(dev_name, &path, - type_page, flags, data_page); - if (!retval && !may_mount()) - retval = -EPERM; - if (!retval && (flags & SB_MANDLOCK) && !may_mandlock()) - retval = -EPERM; - if (retval) - goto dput_out; + ret = security_sb_mount(dev_name, path, type_page, flags, data_page); + if (ret) + return ret; + if (!may_mount()) + return -EPERM; + if ((flags & SB_MANDLOCK) && !may_mandlock()) + return -EPERM; /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) @@ -3173,7 +3162,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | MS_STRICTATIME)) == 0)) { mnt_flags &= ~MNT_ATIME_MASK; - mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; + mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK; } sb_flags = flags & (SB_RDONLY | @@ -3186,22 +3175,32 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_I_VERSION); if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND)) - retval = do_reconfigure_mnt(&path, mnt_flags); - else if (flags & MS_REMOUNT) - retval = do_remount(&path, flags, sb_flags, mnt_flags, - data_page); - else if (flags & MS_BIND) - retval = do_loopback(&path, dev_name, flags & MS_REC); - else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) - retval = do_change_type(&path, flags); - else if (flags & MS_MOVE) - retval = do_move_mount_old(&path, dev_name); - else - retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, - dev_name, data_page); -dput_out: + return do_reconfigure_mnt(path, mnt_flags); + if (flags & MS_REMOUNT) + return do_remount(path, flags, sb_flags, mnt_flags, data_page); + if (flags & MS_BIND) + return do_loopback(path, dev_name, flags & MS_REC); + if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) + return do_change_type(path, flags); + if (flags & MS_MOVE) + return do_move_mount_old(path, dev_name); + + return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name, + data_page); +} + +long do_mount(const char *dev_name, const char __user *dir_name, + const char *type_page, unsigned long flags, void *data_page) +{ + struct path path; + int ret; + + ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path); + if (ret) + return ret; + ret = path_mount(dev_name, &path, type_page, flags, data_page); path_put(&path); - return retval; + return ret; } static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) diff --git a/fs/open.c b/fs/open.c index 5e62f18adc5b..c80e9f497e9b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -394,7 +394,7 @@ static const struct cred *access_override_creds(void) return old_cred; } -long do_faccessat(int dfd, const char __user *filename, int mode, int flags) +static long do_faccessat(int dfd, const char __user *filename, int mode, int flags) { struct path path; struct inode *inode; @@ -482,7 +482,7 @@ SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) return do_faccessat(AT_FDCWD, filename, mode, 0); } -int ksys_chdir(const char __user *filename) +SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; @@ -508,11 +508,6 @@ out: return error; } -SYSCALL_DEFINE1(chdir, const char __user *, filename) -{ - return ksys_chdir(filename); -} - SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); @@ -535,7 +530,7 @@ out: return error; } -int ksys_chroot(const char __user *filename) +SYSCALL_DEFINE1(chroot, const char __user *, filename) { struct path path; int error; @@ -568,12 +563,7 @@ out: return error; } -SYSCALL_DEFINE1(chroot, const char __user *, filename) -{ - return ksys_chroot(filename); -} - -static int chmod_common(const struct path *path, umode_t mode) +int chmod_common(const struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; @@ -602,25 +592,25 @@ out_unlock: return error; } -int ksys_fchmod(unsigned int fd, umode_t mode) +int vfs_fchmod(struct file *file, umode_t mode) +{ + audit_file(file); + return chmod_common(&file->f_path, mode); +} + +SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) { struct fd f = fdget(fd); int err = -EBADF; if (f.file) { - audit_file(f.file); - err = chmod_common(&f.file->f_path, mode); + err = vfs_fchmod(f.file, mode); fdput(f); } return err; } -SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) -{ - return ksys_fchmod(fd, mode); -} - -int do_fchmodat(int dfd, const char __user *filename, umode_t mode) +static int do_fchmodat(int dfd, const char __user *filename, umode_t mode) { struct path path; int error; @@ -649,7 +639,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) return do_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(const struct path *path, uid_t user, gid_t group) +int chown_common(const struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; @@ -740,23 +730,28 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group AT_SYMLINK_NOFOLLOW); } +int vfs_fchown(struct file *file, uid_t user, gid_t group) +{ + int error; + + error = mnt_want_write_file(file); + if (error) + return error; + audit_file(file); + error = chown_common(&file->f_path, user, group); + mnt_drop_write_file(file); + return error; +} + int ksys_fchown(unsigned int fd, uid_t user, gid_t group) { struct fd f = fdget(fd); int error = -EBADF; - if (!f.file) - goto out; - - error = mnt_want_write_file(f.file); - if (error) - goto out_fput; - audit_file(f.file); - error = chown_common(&f.file->f_path, user, group); - mnt_drop_write_file(f.file); -out_fput: - fdput(f); -out: + if (f.file) { + error = vfs_fchown(f.file, user, group); + fdput(f); + } return error; } @@ -1198,7 +1193,9 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) { - return ksys_open(filename, flags, mode); + if (force_o_largefile()) + flags |= O_LARGEFILE; + return do_sys_open(AT_FDCWD, filename, flags, mode); } SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, @@ -1260,9 +1257,12 @@ COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, fla */ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) { - return ksys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); -} + int flags = O_CREAT | O_WRONLY | O_TRUNC; + if (force_o_largefile()) + flags |= O_LARGEFILE; + return do_sys_open(AT_FDCWD, pathname, flags, mode); +} #endif /* diff --git a/fs/read_write.c b/fs/read_write.c index 4fb797822567..5db58b8c78d0 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -301,7 +301,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); -off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) +static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) { off_t retval; struct fd f = fdget_pos(fd); diff --git a/fs/readdir.c b/fs/readdir.c index a49f07c11cfb..19434b3c982c 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -348,8 +348,8 @@ efault: return -EFAULT; } -int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, - unsigned int count) +SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) { struct fd f; struct getdents_callback64 buf = { @@ -380,13 +380,6 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, return error; } - -SYSCALL_DEFINE3(getdents64, unsigned int, fd, - struct linux_dirent64 __user *, dirent, unsigned int, count) -{ - return ksys_getdents64(fd, dirent, count); -} - #ifdef CONFIG_COMPAT struct compat_old_linux_dirent { compat_ulong_t d_ino; diff --git a/fs/utimes.c b/fs/utimes.c index b7b927502d6e..fd3cc4226224 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -16,21 +16,26 @@ static bool nsec_valid(long nsec) return nsec >= 0 && nsec <= 999999999; } -static int utimes_common(const struct path *path, struct timespec64 *times) +int vfs_utimes(const struct path *path, struct timespec64 *times) { int error; struct iattr newattrs; struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; + if (times) { + if (!nsec_valid(times[0].tv_nsec) || + !nsec_valid(times[1].tv_nsec)) + return -EINVAL; + if (times[0].tv_nsec == UTIME_NOW && + times[1].tv_nsec == UTIME_NOW) + times = NULL; + } + error = mnt_want_write(path->mnt); if (error) goto out; - if (times && times[0].tv_nsec == UTIME_NOW && - times[1].tv_nsec == UTIME_NOW) - times = NULL; - newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { if (times[0].tv_nsec == UTIME_OMIT) @@ -70,6 +75,51 @@ out: return error; } +static int do_utimes_path(int dfd, const char __user *filename, + struct timespec64 *times, int flags) +{ + struct path path; + int lookup_flags = 0, error; + + if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) + return -EINVAL; + + if (!(flags & AT_SYMLINK_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + +retry: + error = user_path_at(dfd, filename, lookup_flags, &path); + if (error) + return error; + + error = vfs_utimes(&path, times); + path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } + + return error; +} + +static int do_utimes_fd(int fd, struct timespec64 *times, int flags) +{ + struct fd f; + int error; + + if (flags) + return -EINVAL; + + f = fdget(fd); + if (!f.file) + return -EBADF; + error = vfs_utimes(&f.file->f_path, times); + fdput(f); + return error; +} + /* * do_utimes - change times on filename or file descriptor * @dfd: open file descriptor, -1 or AT_FDCWD @@ -88,52 +138,9 @@ out: long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, int flags) { - int error = -EINVAL; - - if (times && (!nsec_valid(times[0].tv_nsec) || - !nsec_valid(times[1].tv_nsec))) { - goto out; - } - - if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) - goto out; - - if (filename == NULL && dfd != AT_FDCWD) { - struct fd f; - - if (flags) - goto out; - - f = fdget(dfd); - error = -EBADF; - if (!f.file) - goto out; - - error = utimes_common(&f.file->f_path, times); - fdput(f); - } else { - struct path path; - int lookup_flags = 0; - - if (!(flags & AT_SYMLINK_NOFOLLOW)) - lookup_flags |= LOOKUP_FOLLOW; - if (flags & AT_EMPTY_PATH) - lookup_flags |= LOOKUP_EMPTY; -retry: - error = user_path_at(dfd, filename, lookup_flags, &path); - if (error) - goto out; - - error = utimes_common(&path, times); - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - } - -out: - return error; + if (filename == NULL && dfd != AT_FDCWD) + return do_utimes_fd(dfd, times, flags); + return do_utimes_path(dfd, filename, times, flags); } SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename, diff --git a/include/linux/fs.h b/include/linux/fs.h index 488c3ef93601..6d1976916635 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1712,6 +1712,10 @@ int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), void *); +int vfs_fchown(struct file *file, uid_t user, gid_t group); +int vfs_fchmod(struct file *file, umode_t mode); +int vfs_utimes(const struct path *path, struct timespec64 *times); + extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h new file mode 100644 index 000000000000..92045d18cbfc --- /dev/null +++ b/include/linux/init_syscalls.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +int __init init_mount(const char *dev_name, const char *dir_name, + const char *type_page, unsigned long flags, void *data_page); +int __init init_umount(const char *name, int flags); +int __init init_chdir(const char *filename); +int __init init_chroot(const char *filename); +int __init init_chown(const char *filename, uid_t user, gid_t group, int flags); +int __init init_chmod(const char *filename, umode_t mode); +int __init init_eaccess(const char *filename); +int __init init_stat(const char *filename, struct kstat *stat, int flags); +int __init init_mknod(const char *filename, umode_t mode, unsigned int dev); +int __init init_link(const char *oldname, const char *newname); +int __init init_symlink(const char *oldname, const char *newname); +int __init init_unlink(const char *pathname); +int __init init_mkdir(const char *pathname, umode_t mode); +int __init init_rmdir(const char *pathname); +int __init init_utimes(char *filename, struct timespec64 *ts); +int __init init_dup(struct file *file); diff --git a/include/linux/initrd.h b/include/linux/initrd.h index aa5914355728..8db6f8c8030b 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -2,12 +2,6 @@ #define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ -/* 1 = load ramdisk, 0 = don't load */ -extern int rd_doload; - -/* 1 = prompt for ramdisk, 0 = don't prompt */ -extern int rd_prompt; - /* starting block # of image */ extern int rd_image_start; diff --git a/include/linux/raid/detect.h b/include/linux/raid/detect.h index 37dd3f40cd31..1f029a71c3ef 100644 --- a/include/linux/raid/detect.h +++ b/include/linux/raid/detect.h @@ -1,3 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ void md_autodetect_dev(dev_t dev); + +#ifdef CONFIG_BLK_DEV_MD +void md_run_setup(void); +#else +static inline void md_run_setup(void) +{ +} +#endif diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h deleted file mode 100644 index 8dfec085a20e..000000000000 --- a/include/linux/raid/md_u.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - md_u.h : user <=> kernel API between Linux raidtools and RAID drivers - Copyright (C) 1998 Ingo Molnar - -*/ -#ifndef _MD_U_H -#define _MD_U_H - -#include <uapi/linux/raid/md_u.h> - -extern int mdp_major; -#endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e07a83f9f093..a2429d336593 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1237,18 +1237,8 @@ asmlinkage long sys_ni_syscall(void); * Instead, use one of the functions which work equivalently, such as * the ksys_xyzyyz() functions prototyped below. */ - -int ksys_umount(char __user *name, int flags); -int ksys_dup(unsigned int fildes); -int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); -int ksys_chdir(const char __user *filename); -int ksys_fchmod(unsigned int fd, umode_t mode); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); -int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, - unsigned int count); -int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); -off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); int ksys_unshare(unsigned long unshare_flags); @@ -1282,68 +1272,6 @@ int compat_ksys_ipc(u32 call, int first, int second, * The following kernel syscall equivalents are just wrappers to fs-internal * functions. Therefore, provide stubs to be inlined at the callsites. */ -extern long do_unlinkat(int dfd, struct filename *name); - -static inline long ksys_unlink(const char __user *pathname) -{ - return do_unlinkat(AT_FDCWD, getname(pathname)); -} - -extern long do_rmdir(int dfd, const char __user *pathname); - -static inline long ksys_rmdir(const char __user *pathname) -{ - return do_rmdir(AT_FDCWD, pathname); -} - -extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); - -static inline long ksys_mkdir(const char __user *pathname, umode_t mode) -{ - return do_mkdirat(AT_FDCWD, pathname, mode); -} - -extern long do_symlinkat(const char __user *oldname, int newdfd, - const char __user *newname); - -static inline long ksys_symlink(const char __user *oldname, - const char __user *newname) -{ - return do_symlinkat(oldname, AT_FDCWD, newname); -} - -extern long do_mknodat(int dfd, const char __user *filename, umode_t mode, - unsigned int dev); - -static inline long ksys_mknod(const char __user *filename, umode_t mode, - unsigned int dev) -{ - return do_mknodat(AT_FDCWD, filename, mode, dev); -} - -extern int do_linkat(int olddfd, const char __user *oldname, int newdfd, - const char __user *newname, int flags); - -static inline long ksys_link(const char __user *oldname, - const char __user *newname) -{ - return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); -} - -extern int do_fchmodat(int dfd, const char __user *filename, umode_t mode); - -static inline int ksys_chmod(const char __user *filename, umode_t mode) -{ - return do_fchmodat(AT_FDCWD, filename, mode); -} - -long do_faccessat(int dfd, const char __user *filename, int mode, int flags); - -static inline long ksys_access(const char __user *filename, int mode) -{ - return do_faccessat(AT_FDCWD, filename, mode, 0); -} - extern int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); @@ -1379,17 +1307,6 @@ static inline int ksys_close(unsigned int fd) return __close_fd(current->files, fd); } -extern long do_sys_open(int dfd, const char __user *filename, int flags, - umode_t mode); - -static inline long ksys_open(const char __user *filename, int flags, - umode_t mode) -{ - if (force_o_largefile()) - flags |= O_LARGEFILE; - return do_sys_open(AT_FDCWD, filename, flags, mode); -} - extern long do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) diff --git a/init/Makefile b/init/Makefile index 57499b1ff471..6bc37f64b361 100644 --- a/init/Makefile +++ b/init/Makefile @@ -18,7 +18,6 @@ obj-y += init_task.o mounts-y := do_mounts.o mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o -mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o # dependencies on generated files need to be listed explicitly $(obj)/version.o: include/generated/compile.h diff --git a/init/do_mounts.c b/init/do_mounts.c index 29d326b6c29d..b5f9604d0c98 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -23,12 +23,11 @@ #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> #include <linux/nfs_mount.h> +#include <linux/raid/detect.h> #include <uapi/linux/mount.h> #include "do_mounts.h" -int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */ - int root_mountflags = MS_RDONLY | MS_SILENT; static char * __initdata root_device_name; static char __initdata saved_root_name[64]; @@ -38,7 +37,7 @@ dev_t ROOT_DEV; static int __init load_ramdisk(char *str) { - rd_doload = simple_strtol(str,NULL,0) & 3; + pr_warn("ignoring the deprecated load_ramdisk= option\n"); return 1; } __setup("load_ramdisk=", load_ramdisk); @@ -396,20 +395,20 @@ static int __init do_mount_root(const char *name, const char *fs, int ret; if (data) { - /* do_mount() requires a full page as fifth argument */ + /* init_mount() requires a full page as fifth argument */ p = alloc_page(GFP_KERNEL); if (!p) return -ENOMEM; data_page = page_address(p); - /* zero-pad. do_mount() will make sure it's terminated */ + /* zero-pad. init_mount() will make sure it's terminated */ strncpy(data_page, data, PAGE_SIZE); } - ret = do_mount(name, "/root", fs, flags, data_page); + ret = init_mount(name, "/root", fs, flags, data_page); if (ret) goto out; - ksys_chdir("/root"); + init_chdir("/root"); s = current->fs->pwd.dentry->d_sb; ROOT_DEV = s->s_dev; printk(KERN_INFO @@ -552,66 +551,20 @@ static int __init mount_cifs_root(void) } #endif -#if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD) -void __init change_floppy(char *fmt, ...) -{ - struct termios termios; - char buf[80]; - char c; - int fd; - va_list args; - va_start(args, fmt); - vsprintf(buf, fmt, args); - va_end(args); - fd = ksys_open("/dev/root", O_RDWR | O_NDELAY, 0); - if (fd >= 0) { - ksys_ioctl(fd, FDEJECT, 0); - ksys_close(fd); - } - printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf); - fd = ksys_open("/dev/console", O_RDWR, 0); - if (fd >= 0) { - ksys_ioctl(fd, TCGETS, (long)&termios); - termios.c_lflag &= ~ICANON; - ksys_ioctl(fd, TCSETSF, (long)&termios); - ksys_read(fd, &c, 1); - termios.c_lflag |= ICANON; - ksys_ioctl(fd, TCSETSF, (long)&termios); - ksys_close(fd); - } -} -#endif - void __init mount_root(void) { #ifdef CONFIG_ROOT_NFS if (ROOT_DEV == Root_NFS) { - if (mount_nfs_root()) - return; - - printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n"); - ROOT_DEV = Root_FD0; + if (!mount_nfs_root()) + printk(KERN_ERR "VFS: Unable to mount root fs via NFS.\n"); + return; } #endif #ifdef CONFIG_CIFS_ROOT if (ROOT_DEV == Root_CIFS) { - if (mount_cifs_root()) - return; - - printk(KERN_ERR "VFS: Unable to mount root fs via SMB, trying floppy.\n"); - ROOT_DEV = Root_FD0; - } -#endif -#ifdef CONFIG_BLK_DEV_FD - if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) { - /* rd_doload is 2 for a dual initrd/ramload setup */ - if (rd_doload==2) { - if (rd_load_disk(1)) { - ROOT_DEV = Root_RAM1; - root_device_name = NULL; - } - } else - change_floppy("root floppy"); + if (!mount_cifs_root()) + printk(KERN_ERR "VFS: Unable to mount root fs via SMB.\n"); + return; } #endif #ifdef CONFIG_BLOCK @@ -630,8 +583,6 @@ void __init mount_root(void) */ void __init prepare_namespace(void) { - int is_floppy; - if (root_delay) { printk(KERN_INFO "Waiting %d sec before mounting root device...\n", root_delay); @@ -674,16 +625,11 @@ void __init prepare_namespace(void) async_synchronize_full(); } - is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; - - if (is_floppy && rd_doload && rd_load_disk(0)) - ROOT_DEV = Root_RAM0; - mount_root(); out: devtmpfs_mount(); - do_mount(".", "/", NULL, MS_MOVE, NULL); - ksys_chroot("."); + init_mount(".", "/", NULL, MS_MOVE, NULL); + init_chroot("."); } static bool is_tmpfs; diff --git a/init/do_mounts.h b/init/do_mounts.h index 0bb0806de4ce..7a29ac3e427b 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -8,26 +8,16 @@ #include <linux/mount.h> #include <linux/major.h> #include <linux/root_dev.h> +#include <linux/init_syscalls.h> -void change_floppy(char *fmt, ...); void mount_block_root(char *name, int flags); void mount_root(void); extern int root_mountflags; -static inline int create_dev(char *name, dev_t dev) +static inline __init int create_dev(char *name, dev_t dev) { - ksys_unlink(name); - return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); -} - -static inline u32 bstat(char *name) -{ - struct kstat stat; - if (vfs_stat(name, &stat) != 0) - return 0; - if (!S_ISBLK(stat.mode)) - return 0; - return stat.rdev; + init_unlink(name); + return init_mknod(name, S_IFBLK | 0600, new_encode_dev(dev)); } #ifdef CONFIG_BLK_DEV_RAM @@ -51,13 +41,3 @@ bool __init initrd_load(void); static inline bool initrd_load(void) { return false; } #endif - -#ifdef CONFIG_BLK_DEV_MD - -void md_run_setup(void); - -#else - -static inline void md_run_setup(void) {} - -#endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 53314d7da4be..533d81ed74d4 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -51,14 +51,14 @@ static int __init early_initrd(char *p) } early_param("initrd", early_initrd); -static int init_linuxrc(struct subprocess_info *info, struct cred *new) +static int __init init_linuxrc(struct subprocess_info *info, struct cred *new) { ksys_unshare(CLONE_FS | CLONE_FILES); console_on_rootfs(); /* move initrd over / and chdir/chroot in initrd root */ - ksys_chdir("/root"); - do_mount(".", "/", NULL, MS_MOVE, NULL); - ksys_chroot("."); + init_chdir("/root"); + init_mount(".", "/", NULL, MS_MOVE, NULL); + init_chroot("."); ksys_setsid(); return 0; } @@ -70,12 +70,14 @@ static void __init handle_initrd(void) extern char *envp_init[]; int error; + pr_warn("using deprecated initrd support, will be removed in 2021.\n"); + real_root_dev = new_encode_dev(ROOT_DEV); create_dev("/dev/root.old", Root_RAM0); /* mount initrd on rootfs' /root */ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); - ksys_mkdir("/old", 0700); - ksys_chdir("/old"); + init_mkdir("/old", 0700); + init_chdir("/old"); /* * In case that a resume from disk is carried out by linuxrc or one of @@ -92,39 +94,30 @@ static void __init handle_initrd(void) current->flags &= ~PF_FREEZER_SKIP; /* move initrd to rootfs' /old */ - do_mount("..", ".", NULL, MS_MOVE, NULL); + init_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ - ksys_chroot(".."); + init_chroot(".."); if (new_decode_dev(real_root_dev) == Root_RAM0) { - ksys_chdir("/old"); + init_chdir("/old"); return; } - ksys_chdir("/"); + init_chdir("/"); ROOT_DEV = new_decode_dev(real_root_dev); mount_root(); printk(KERN_NOTICE "Trying to move old root to /initrd ... "); - error = do_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); + error = init_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); if (!error) printk("okay\n"); else { - int fd = ksys_open("/dev/root.old", O_RDWR, 0); if (error == -ENOENT) printk("/initrd does not exist. Ignored.\n"); else printk("failed\n"); printk(KERN_NOTICE "Unmounting old root\n"); - ksys_umount("/old", MNT_DETACH); - printk(KERN_NOTICE "Trying to free ramdisk memory ... "); - if (fd < 0) { - error = fd; - } else { - error = ksys_ioctl(fd, BLKFLSBUF, 0); - ksys_close(fd); - } - printk(!error ? "okay\n" : "failed\n"); + init_umount("/old", MNT_DETACH); } } @@ -139,11 +132,11 @@ bool __init initrd_load(void) * mounted in the normal path. */ if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { - ksys_unlink("/initrd.image"); + init_unlink("/initrd.image"); handle_initrd(); return true; } } - ksys_unlink("/initrd.image"); + init_unlink("/initrd.image"); return false; } diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 32fb049d18f9..ac021ae6e6fa 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -14,12 +14,12 @@ #include <linux/decompress/generic.h> - -int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ +static struct file *in_file, *out_file; +static loff_t in_pos, out_pos; static int __init prompt_ramdisk(char *str) { - rd_prompt = simple_strtol(str,NULL,0) & 1; + pr_warn("ignoring the deprecated prompt_ramdisk= option\n"); return 1; } __setup("prompt_ramdisk=", prompt_ramdisk); @@ -33,7 +33,7 @@ static int __init ramdisk_start_setup(char *str) } __setup("ramdisk_start=", ramdisk_start_setup); -static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); +static int __init crd_load(decompress_fn deco); /* * This routine tries to find a RAM disk image to load, and returns the @@ -55,7 +55,8 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); * lz4 */ static int __init -identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) +identify_ramdisk_image(struct file *file, loff_t pos, + decompress_fn *decompressor) { const int size = 512; struct minix_super_block *minixsb; @@ -66,6 +67,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) unsigned char *buf; const char *compress_name; unsigned long n; + int start_block = rd_image_start; buf = kmalloc(size, GFP_KERNEL); if (!buf) @@ -80,8 +82,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read block 0 to test for compressed kernel */ - ksys_lseek(fd, start_block * BLOCK_SIZE, 0); - ksys_read(fd, buf, size); + pos = start_block * BLOCK_SIZE; + kernel_read(file, buf, size, &pos); *decompressor = decompress_method(buf, size, &compress_name); if (compress_name) { @@ -126,8 +128,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read 512 bytes further to check if cramfs is padded */ - ksys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0); - ksys_read(fd, buf, size); + pos = start_block * BLOCK_SIZE + 0x200; + kernel_read(file, buf, size, &pos); if (cramfsb->magic == CRAMFS_MAGIC) { printk(KERN_NOTICE @@ -140,8 +142,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) /* * Read block 1 to test for minix and ext2 superblock */ - ksys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); - ksys_read(fd, buf, size); + pos = (start_block + 1) * BLOCK_SIZE; + kernel_read(file, buf, size, &pos); /* Try minix */ if (minixsb->s_magic == MINIX_SUPER_MAGIC || @@ -168,17 +170,24 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) start_block); done: - ksys_lseek(fd, start_block * BLOCK_SIZE, 0); kfree(buf); return nblocks; } +static unsigned long nr_blocks(struct file *file) +{ + struct inode *inode = file->f_mapping->host; + + if (!S_ISBLK(inode->i_mode)) + return 0; + return i_size_read(inode) >> 10; +} + int __init rd_load_image(char *from) { int res = 0; - int in_fd, out_fd; unsigned long rd_blocks, devblocks; - int nblocks, i, disk; + int nblocks, i; char *buf = NULL; unsigned short rotate = 0; decompress_fn decompressor = NULL; @@ -186,20 +195,21 @@ int __init rd_load_image(char *from) char rotator[4] = { '|' , '/' , '-' , '\\' }; #endif - out_fd = ksys_open("/dev/ram", O_RDWR, 0); - if (out_fd < 0) + out_file = filp_open("/dev/ram", O_RDWR, 0); + if (IS_ERR(out_file)) goto out; - in_fd = ksys_open(from, O_RDONLY, 0); - if (in_fd < 0) + in_file = filp_open(from, O_RDONLY, 0); + if (IS_ERR(in_file)) goto noclose_input; - nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor); + in_pos = rd_image_start * BLOCK_SIZE; + nblocks = identify_ramdisk_image(in_file, in_pos, &decompressor); if (nblocks < 0) goto done; if (nblocks == 0) { - if (crd_load(in_fd, out_fd, decompressor) == 0) + if (crd_load(decompressor) == 0) goto successful_load; goto done; } @@ -208,11 +218,7 @@ int __init rd_load_image(char *from) * NOTE NOTE: nblocks is not actually blocks but * the number of kibibytes of data to load into a ramdisk. */ - if (ksys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0) - rd_blocks = 0; - else - rd_blocks >>= 1; - + rd_blocks = nr_blocks(out_file); if (nblocks > rd_blocks) { printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n", nblocks, rd_blocks); @@ -222,13 +228,10 @@ int __init rd_load_image(char *from) /* * OK, time to copy in the data */ - if (ksys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0) - devblocks = 0; - else - devblocks >>= 1; - if (strcmp(from, "/initrd.image") == 0) devblocks = nblocks; + else + devblocks = nr_blocks(in_file); if (devblocks == 0) { printk(KERN_ERR "RAMDISK: could not determine device size\n"); @@ -243,24 +246,15 @@ int __init rd_load_image(char *from) printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ", nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); - for (i = 0, disk = 1; i < nblocks; i++) { + for (i = 0; i < nblocks; i++) { if (i && (i % devblocks == 0)) { - pr_cont("done disk #%d.\n", disk++); + pr_cont("done disk #1.\n"); rotate = 0; - if (ksys_close(in_fd)) { - printk("Error closing the disk.\n"); - goto noclose_input; - } - change_floppy("disk #%d", disk); - in_fd = ksys_open(from, O_RDONLY, 0); - if (in_fd < 0) { - printk("Error opening disk.\n"); - goto noclose_input; - } - printk("Loading disk #%d... ", disk); + fput(in_file); + break; } - ksys_read(in_fd, buf, BLOCK_SIZE); - ksys_write(out_fd, buf, BLOCK_SIZE); + kernel_read(in_file, buf, BLOCK_SIZE, &in_pos); + kernel_write(out_file, buf, BLOCK_SIZE, &out_pos); #if !defined(CONFIG_S390) if (!(i % 16)) { pr_cont("%c\b", rotator[rotate & 0x3]); @@ -273,19 +267,17 @@ int __init rd_load_image(char *from) successful_load: res = 1; done: - ksys_close(in_fd); + fput(in_file); noclose_input: - ksys_close(out_fd); + fput(out_file); out: kfree(buf); - ksys_unlink("/dev/ram"); + init_unlink("/dev/ram"); return res; } int __init rd_load_disk(int n) { - if (rd_prompt) - change_floppy("root floppy disk to be loaded into RAM disk"); create_dev("/dev/root", ROOT_DEV); create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n)); return rd_load_image("/dev/root"); @@ -293,11 +285,10 @@ int __init rd_load_disk(int n) static int exit_code; static int decompress_error; -static int crd_infd, crd_outfd; static long __init compr_fill(void *buf, unsigned long len) { - long r = ksys_read(crd_infd, buf, len); + long r = kernel_read(in_file, buf, len, &in_pos); if (r < 0) printk(KERN_ERR "RAMDISK: error while reading compressed data"); else if (r == 0) @@ -307,7 +298,7 @@ static long __init compr_fill(void *buf, unsigned long len) static long __init compr_flush(void *window, unsigned long outcnt) { - long written = ksys_write(crd_outfd, window, outcnt); + long written = kernel_write(out_file, window, outcnt, &out_pos); if (written != outcnt) { if (decompress_error == 0) printk(KERN_ERR @@ -326,11 +317,9 @@ static void __init error(char *x) decompress_error = 1; } -static int __init crd_load(int in_fd, int out_fd, decompress_fn deco) +static int __init crd_load(decompress_fn deco) { int result; - crd_infd = in_fd; - crd_outfd = out_fd; if (!deco) { pr_emerg("Invalid ramdisk decompression routine. " diff --git a/init/initramfs.c b/init/initramfs.c index 7a38012e1af7..e6dbfb767057 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -11,14 +11,17 @@ #include <linux/utime.h> #include <linux/file.h> #include <linux/memblock.h> +#include <linux/namei.h> +#include <linux/init_syscalls.h> -static ssize_t __init xwrite(int fd, const char *p, size_t count) +static ssize_t __init xwrite(struct file *file, const char *p, size_t count, + loff_t *pos) { ssize_t out = 0; /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */ while (count) { - ssize_t rv = ksys_write(fd, p, count); + ssize_t rv = kernel_write(file, p, count, pos); if (rv < 0) { if (rv == -EINTR || rv == -EAGAIN) @@ -108,8 +111,7 @@ static long __init do_utime(char *filename, time64_t mtime) t[0].tv_nsec = 0; t[1].tv_sec = mtime; t[1].tv_nsec = 0; - - return do_utimes(AT_FDCWD, filename, t, AT_SYMLINK_NOFOLLOW); + return init_utimes(filename, t); } static __initdata LIST_HEAD(dir_list); @@ -200,7 +202,6 @@ static inline void __init eat(unsigned n) byte_count -= n; } -static __initdata char *vcollected; static __initdata char *collected; static long remains __initdata; static __initdata char *collect; @@ -296,11 +297,12 @@ static void __init clean_path(char *path, umode_t fmode) { struct kstat st; - if (!vfs_lstat(path, &st) && (st.mode ^ fmode) & S_IFMT) { + if (init_stat(path, &st, AT_SYMLINK_NOFOLLOW) && + (st.mode ^ fmode) & S_IFMT) { if (S_ISDIR(st.mode)) - ksys_rmdir(path); + init_rmdir(path); else - ksys_unlink(path); + init_unlink(path); } } @@ -310,13 +312,14 @@ static int __init maybe_link(void) char *old = find_link(major, minor, ino, mode, collected); if (old) { clean_path(collected, 0); - return (ksys_link(old, collected) < 0) ? -1 : 1; + return (init_link(old, collected) < 0) ? -1 : 1; } } return 0; } -static __initdata int wfd; +static __initdata struct file *wfile; +static __initdata loff_t wfile_pos; static int __init do_name(void) { @@ -333,28 +336,28 @@ static int __init do_name(void) int openflags = O_WRONLY|O_CREAT; if (ml != 1) openflags |= O_TRUNC; - wfd = ksys_open(collected, openflags, mode); - - if (wfd >= 0) { - ksys_fchown(wfd, uid, gid); - ksys_fchmod(wfd, mode); - if (body_len) - ksys_ftruncate(wfd, body_len); - vcollected = kstrdup(collected, GFP_KERNEL); - state = CopyFile; - } + wfile = filp_open(collected, openflags, mode); + if (IS_ERR(wfile)) + return 0; + wfile_pos = 0; + + vfs_fchown(wfile, uid, gid); + vfs_fchmod(wfile, mode); + if (body_len) + vfs_truncate(&wfile->f_path, body_len); + state = CopyFile; } } else if (S_ISDIR(mode)) { - ksys_mkdir(collected, mode); - ksys_chown(collected, uid, gid); - ksys_chmod(collected, mode); + init_mkdir(collected, mode); + init_chown(collected, uid, gid, 0); + init_chmod(collected, mode); dir_add(collected, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { - ksys_mknod(collected, mode, rdev); - ksys_chown(collected, uid, gid); - ksys_chmod(collected, mode); + init_mknod(collected, mode, rdev); + init_chown(collected, uid, gid, 0); + init_chmod(collected, mode); do_utime(collected, mtime); } } @@ -364,16 +367,20 @@ static int __init do_name(void) static int __init do_copy(void) { if (byte_count >= body_len) { - if (xwrite(wfd, victim, body_len) != body_len) + struct timespec64 t[2] = { }; + if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len) error("write error"); - ksys_close(wfd); - do_utime(vcollected, mtime); - kfree(vcollected); + + t[0].tv_sec = mtime; + t[1].tv_sec = mtime; + vfs_utimes(&wfile->f_path, t); + + fput(wfile); eat(body_len); state = SkipIt; return 0; } else { - if (xwrite(wfd, victim, byte_count) != byte_count) + if (xwrite(wfile, victim, byte_count, &wfile_pos) != byte_count) error("write error"); body_len -= byte_count; eat(byte_count); @@ -385,8 +392,8 @@ static int __init do_symlink(void) { collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); - ksys_symlink(collected + N_ALIGN(name_len), collected); - ksys_lchown(collected, uid, gid); + init_symlink(collected + N_ALIGN(name_len), collected); + init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW); do_utime(collected, mtime); state = SkipIt; next_state = Reset; @@ -572,82 +579,26 @@ static inline bool kexec_free_initrd(void) #endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_BLK_DEV_RAM -#define BUF_SIZE 1024 -static void __init clean_rootfs(void) -{ - int fd; - void *buf; - struct linux_dirent64 *dirp; - int num; - - fd = ksys_open("/", O_RDONLY, 0); - WARN_ON(fd < 0); - if (fd < 0) - return; - buf = kzalloc(BUF_SIZE, GFP_KERNEL); - WARN_ON(!buf); - if (!buf) { - ksys_close(fd); - return; - } - - dirp = buf; - num = ksys_getdents64(fd, dirp, BUF_SIZE); - while (num > 0) { - while (num > 0) { - struct kstat st; - int ret; - - ret = vfs_lstat(dirp->d_name, &st); - WARN_ON_ONCE(ret); - if (!ret) { - if (S_ISDIR(st.mode)) - ksys_rmdir(dirp->d_name); - else - ksys_unlink(dirp->d_name); - } - - num -= dirp->d_reclen; - dirp = (void *)dirp + dirp->d_reclen; - } - dirp = buf; - memset(buf, 0, BUF_SIZE); - num = ksys_getdents64(fd, dirp, BUF_SIZE); - } - - ksys_close(fd); - kfree(buf); -} -#else -static inline void clean_rootfs(void) -{ -} -#endif /* CONFIG_BLK_DEV_RAM */ - -#ifdef CONFIG_BLK_DEV_RAM static void __init populate_initrd_image(char *err) { ssize_t written; - int fd; + struct file *file; + loff_t pos = 0; unpack_to_rootfs(__initramfs_start, __initramfs_size); printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n", err); - fd = ksys_open("/initrd.image", O_WRONLY | O_CREAT, 0700); - if (fd < 0) + file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700); + if (IS_ERR(file)) return; - written = xwrite(fd, (char *)initrd_start, initrd_end - initrd_start); + written = xwrite(file, (char *)initrd_start, initrd_end - initrd_start, + &pos); if (written != initrd_end - initrd_start) pr_err("/initrd.image: incomplete write (%zd != %ld)\n", written, initrd_end - initrd_start); - ksys_close(fd); -} -#else -static void __init populate_initrd_image(char *err) -{ - printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); + fput(file); } #endif /* CONFIG_BLK_DEV_RAM */ @@ -668,8 +619,11 @@ static int __init populate_rootfs(void) err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start); if (err) { - clean_rootfs(); +#ifdef CONFIG_BLK_DEV_RAM populate_initrd_image(err); +#else + printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); +#endif } done: diff --git a/init/main.c b/init/main.c index 15bd0efff3df..276ca3160c8c 100644 --- a/init/main.c +++ b/init/main.c @@ -96,6 +96,7 @@ #include <linux/jump_label.h> #include <linux/mem_encrypt.h> #include <linux/kcsan.h> +#include <linux/init_syscalls.h> #include <asm/io.h> #include <asm/bugs.h> @@ -154,7 +155,7 @@ static bool initargs_found; #endif static char *execute_command; -static char *ramdisk_execute_command; +static char *ramdisk_execute_command = "/init"; /* * Used to generate warnings if static_key manipulation functions are used @@ -1457,15 +1458,19 @@ static int __ref kernel_init(void *unused) "See Linux Documentation/admin-guide/init.rst for guidance."); } -void console_on_rootfs(void) +/* Open /dev/console, for stdin/stdout/stderr, this should never fail */ +void __init console_on_rootfs(void) { - /* Open the /dev/console as stdin, this should never fail */ - if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - pr_err("Warning: unable to open an initial console.\n"); + struct file *file = filp_open("/dev/console", O_RDWR, 0); - /* create stdout/stderr */ - (void) ksys_dup(0); - (void) ksys_dup(0); + if (IS_ERR(file)) { + pr_err("Warning: unable to open an initial console.\n"); + return; + } + init_dup(file); + init_dup(file); + init_dup(file); + fput(file); } static noinline void __init kernel_init_freeable(void) @@ -1510,12 +1515,7 @@ static noinline void __init kernel_init_freeable(void) * check if there is an early userspace init. If yes, let it do all * the work */ - - if (!ramdisk_execute_command) - ramdisk_execute_command = "/init"; - - if (ksys_access((const char __user *) - ramdisk_execute_command, 0) != 0) { + if (init_eaccess(ramdisk_execute_command) != 0) { ramdisk_execute_command = NULL; prepare_namespace(); } diff --git a/init/noinitramfs.c b/init/noinitramfs.c index fa9cdfa7101d..3d62b07f3bb9 100644 --- a/init/noinitramfs.c +++ b/init/noinitramfs.c @@ -9,6 +9,7 @@ #include <linux/stat.h> #include <linux/kdev_t.h> #include <linux/syscalls.h> +#include <linux/init_syscalls.h> /* * Create a simple rootfs that is similar to the default initramfs @@ -17,17 +18,16 @@ static int __init default_rootfs(void) { int err; - err = ksys_mkdir((const char __user __force *) "/dev", 0755); + err = init_mkdir("/dev", 0755); if (err < 0) goto out; - err = ksys_mknod((const char __user __force *) "/dev/console", - S_IFCHR | S_IRUSR | S_IWUSR, + err = init_mknod("/dev/console", S_IFCHR | S_IRUSR | S_IWUSR, new_encode_dev(MKDEV(5, 1))); if (err < 0) goto out; - err = ksys_mkdir((const char __user __force *) "/root", 0700); + err = init_mkdir("/root", 0700); if (err < 0) goto out; |