summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--0000_README4
-rw-r--r--1175_linux-5.10.176.patch8752
2 files changed, 8756 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index f3c2cbaf..50964ce7 100644
--- a/0000_README
+++ b/0000_README
@@ -743,6 +743,10 @@ Patch: 1174_linux-5.10.175.patch
From: https://www.kernel.org
Desc: Linux 5.10.175
+Patch: 1175_linux-5.10.176.patch
+From: https://www.kernel.org
+Desc: Linux 5.10.176
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1175_linux-5.10.176.patch b/1175_linux-5.10.176.patch
new file mode 100644
index 00000000..8c46ce2a
--- /dev/null
+++ b/1175_linux-5.10.176.patch
@@ -0,0 +1,8752 @@
+diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
+index ca52c82e5bb54..f7b69a0e71e1c 100644
+--- a/Documentation/filesystems/vfs.rst
++++ b/Documentation/filesystems/vfs.rst
+@@ -1188,7 +1188,7 @@ defined:
+ return
+ -ECHILD and it will be called again in ref-walk mode.
+
+-``_weak_revalidate``
++``d_weak_revalidate``
+ called when the VFS needs to revalidate a "jumped" dentry. This
+ is called when a path-walk ends at dentry that was not acquired
+ by doing a lookup in the parent directory. This includes "/",
+diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
+index 87cf5c010d5dd..ed2e45f9b7627 100644
+--- a/Documentation/trace/ftrace.rst
++++ b/Documentation/trace/ftrace.rst
+@@ -2923,7 +2923,7 @@ Produces::
+ bash-1994 [000] .... 4342.324898: ima_get_action <-process_measurement
+ bash-1994 [000] .... 4342.324898: ima_match_policy <-ima_get_action
+ bash-1994 [000] .... 4342.324899: do_truncate <-do_last
+- bash-1994 [000] .... 4342.324899: should_remove_suid <-do_truncate
++ bash-1994 [000] .... 4342.324899: setattr_should_drop_suidgid <-do_truncate
+ bash-1994 [000] .... 4342.324899: notify_change <-do_truncate
+ bash-1994 [000] .... 4342.324900: current_fs_time <-notify_change
+ bash-1994 [000] .... 4342.324900: current_kernel_time <-current_fs_time
+diff --git a/Makefile b/Makefile
+index e6b09052f222b..71caf59383615 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 10
+-SUBLEVEL = 175
++SUBLEVEL = 176
+ EXTRAVERSION =
+ NAME = Dare mighty things
+
+diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
+index 0b4965573656f..88bacf4999c47 100644
+--- a/arch/s390/boot/ipl_report.c
++++ b/arch/s390/boot/ipl_report.c
+@@ -57,11 +57,19 @@ repeat:
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+ intersects(INITRD_START, INITRD_SIZE, safe_addr, size))
+ safe_addr = INITRD_START + INITRD_SIZE;
++ if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) {
++ safe_addr = (unsigned long)comps + comps->len;
++ goto repeat;
++ }
+ for_each_rb_entry(comp, comps)
+ if (intersects(safe_addr, size, comp->addr, comp->len)) {
+ safe_addr = comp->addr + comp->len;
+ goto repeat;
+ }
++ if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) {
++ safe_addr = (unsigned long)certs + certs->len;
++ goto repeat;
++ }
+ for_each_rb_entry(cert, certs)
+ if (intersects(safe_addr, size, cert->addr, cert->len)) {
+ safe_addr = cert->addr + cert->len;
+diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
+index 1906387a0faf4..0b7c81389c50a 100644
+--- a/arch/x86/kernel/cpu/mce/core.c
++++ b/arch/x86/kernel/cpu/mce/core.c
+@@ -2309,6 +2309,7 @@ static void mce_restart(void)
+ {
+ mce_timer_delete_all();
+ on_each_cpu(mce_cpu_restart, NULL, 1);
++ mce_schedule_work();
+ }
+
+ /* Toggle features for corrected errors */
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 91371b01eae0c..c165ddbb672fe 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2998,7 +2998,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12,
+ enum vm_entry_failure_code *entry_failure_code)
+ {
+- bool ia32e;
++ bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
+
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+
+@@ -3024,6 +3024,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ vmcs12->guest_ia32_perf_global_ctrl)))
+ return -EINVAL;
+
++ if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
++ return -EINVAL;
++
++ if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
++ CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
++ return -EINVAL;
++
+ /*
+ * If the load IA32_EFER VM-entry control is 1, the following checks
+ * are performed on the field for the IA32_EFER MSR:
+@@ -3035,7 +3042,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ */
+ if (to_vmx(vcpu)->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
+- ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
+ if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
+ CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
+ CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
+diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
+index 011e042b47ba7..5ec47af786ddb 100644
+--- a/arch/x86/mm/mem_encrypt_identity.c
++++ b/arch/x86/mm/mem_encrypt_identity.c
+@@ -586,7 +586,8 @@ void __init sme_enable(struct boot_params *bp)
+ cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+ ((u64)bp->ext_cmd_line_ptr << 32));
+
+- cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
++ if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
++ return;
+
+ if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
+ sme_me_mask = me_mask;
+diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
+index 40c53632512b7..9617688b58b32 100644
+--- a/drivers/block/Kconfig
++++ b/drivers/block/Kconfig
+@@ -16,13 +16,7 @@ menuconfig BLK_DEV
+
+ if BLK_DEV
+
+-config BLK_DEV_NULL_BLK
+- tristate "Null test block driver"
+- select CONFIGFS_FS
+-
+-config BLK_DEV_NULL_BLK_FAULT_INJECTION
+- bool "Support fault injection for Null test block driver"
+- depends on BLK_DEV_NULL_BLK && FAULT_INJECTION
++source "drivers/block/null_blk/Kconfig"
+
+ config BLK_DEV_FD
+ tristate "Normal floppy disk support"
+diff --git a/drivers/block/Makefile b/drivers/block/Makefile
+index e1f63117ee94f..a3170859e01d4 100644
+--- a/drivers/block/Makefile
++++ b/drivers/block/Makefile
+@@ -41,12 +41,7 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
+ obj-$(CONFIG_ZRAM) += zram/
+ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/
+
+-obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
+-null_blk-objs := null_blk_main.o
+-ifeq ($(CONFIG_BLK_DEV_ZONED), y)
+-null_blk-$(CONFIG_TRACING) += null_blk_trace.o
+-endif
+-null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o
++obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/
+
+ skd-y := skd_main.o
+ swim_mod-y := swim.o swim_asm.o
+diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
+deleted file mode 100644
+index 7de703f28617b..0000000000000
+--- a/drivers/block/null_blk.h
++++ /dev/null
+@@ -1,137 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef __BLK_NULL_BLK_H
+-#define __BLK_NULL_BLK_H
+-
+-#undef pr_fmt
+-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+-
+-#include <linux/blkdev.h>
+-#include <linux/slab.h>
+-#include <linux/blk-mq.h>
+-#include <linux/hrtimer.h>
+-#include <linux/configfs.h>
+-#include <linux/badblocks.h>
+-#include <linux/fault-inject.h>
+-
+-struct nullb_cmd {
+- struct request *rq;
+- struct bio *bio;
+- unsigned int tag;
+- blk_status_t error;
+- struct nullb_queue *nq;
+- struct hrtimer timer;
+- bool fake_timeout;
+-};
+-
+-struct nullb_queue {
+- unsigned long *tag_map;
+- wait_queue_head_t wait;
+- unsigned int queue_depth;
+- struct nullb_device *dev;
+- unsigned int requeue_selection;
+-
+- struct nullb_cmd *cmds;
+-};
+-
+-struct nullb_device {
+- struct nullb *nullb;
+- struct config_item item;
+- struct radix_tree_root data; /* data stored in the disk */
+- struct radix_tree_root cache; /* disk cache data */
+- unsigned long flags; /* device flags */
+- unsigned int curr_cache;
+- struct badblocks badblocks;
+-
+- unsigned int nr_zones;
+- unsigned int nr_zones_imp_open;
+- unsigned int nr_zones_exp_open;
+- unsigned int nr_zones_closed;
+- struct blk_zone *zones;
+- sector_t zone_size_sects;
+- spinlock_t zone_lock;
+- unsigned long *zone_locks;
+-
+- unsigned long size; /* device size in MB */
+- unsigned long completion_nsec; /* time in ns to complete a request */
+- unsigned long cache_size; /* disk cache size in MB */
+- unsigned long zone_size; /* zone size in MB if device is zoned */
+- unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
+- unsigned int zone_nr_conv; /* number of conventional zones */
+- unsigned int zone_max_open; /* max number of open zones */
+- unsigned int zone_max_active; /* max number of active zones */
+- unsigned int submit_queues; /* number of submission queues */
+- unsigned int home_node; /* home node for the device */
+- unsigned int queue_mode; /* block interface */
+- unsigned int blocksize; /* block size */
+- unsigned int irqmode; /* IRQ completion handler */
+- unsigned int hw_queue_depth; /* queue depth */
+- unsigned int index; /* index of the disk, only valid with a disk */
+- unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
+- bool blocking; /* blocking blk-mq device */
+- bool use_per_node_hctx; /* use per-node allocation for hardware context */
+- bool power; /* power on/off the device */
+- bool memory_backed; /* if data is stored in memory */
+- bool discard; /* if support discard */
+- bool zoned; /* if device is zoned */
+-};
+-
+-struct nullb {
+- struct nullb_device *dev;
+- struct list_head list;
+- unsigned int index;
+- struct request_queue *q;
+- struct gendisk *disk;
+- struct blk_mq_tag_set *tag_set;
+- struct blk_mq_tag_set __tag_set;
+- unsigned int queue_depth;
+- atomic_long_t cur_bytes;
+- struct hrtimer bw_timer;
+- unsigned long cache_flush_pos;
+- spinlock_t lock;
+-
+- struct nullb_queue *queues;
+- unsigned int nr_queues;
+- char disk_name[DISK_NAME_LEN];
+-};
+-
+-blk_status_t null_process_cmd(struct nullb_cmd *cmd,
+- enum req_opf op, sector_t sector,
+- unsigned int nr_sectors);
+-
+-#ifdef CONFIG_BLK_DEV_ZONED
+-int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
+-int null_register_zoned_dev(struct nullb *nullb);
+-void null_free_zoned_dev(struct nullb_device *dev);
+-int null_report_zones(struct gendisk *disk, sector_t sector,
+- unsigned int nr_zones, report_zones_cb cb, void *data);
+-blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
+- enum req_opf op, sector_t sector,
+- sector_t nr_sectors);
+-size_t null_zone_valid_read_len(struct nullb *nullb,
+- sector_t sector, unsigned int len);
+-#else
+-static inline int null_init_zoned_dev(struct nullb_device *dev,
+- struct request_queue *q)
+-{
+- pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
+- return -EINVAL;
+-}
+-static inline int null_register_zoned_dev(struct nullb *nullb)
+-{
+- return -ENODEV;
+-}
+-static inline void null_free_zoned_dev(struct nullb_device *dev) {}
+-static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
+- enum req_opf op, sector_t sector, sector_t nr_sectors)
+-{
+- return BLK_STS_NOTSUPP;
+-}
+-static inline size_t null_zone_valid_read_len(struct nullb *nullb,
+- sector_t sector,
+- unsigned int len)
+-{
+- return len;
+-}
+-#define null_report_zones NULL
+-#endif /* CONFIG_BLK_DEV_ZONED */
+-#endif /* __NULL_BLK_H */
+diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig
+new file mode 100644
+index 0000000000000..6bf1f8ca20a24
+--- /dev/null
++++ b/drivers/block/null_blk/Kconfig
+@@ -0,0 +1,12 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Null block device driver configuration
++#
++
++config BLK_DEV_NULL_BLK
++ tristate "Null test block driver"
++ select CONFIGFS_FS
++
++config BLK_DEV_NULL_BLK_FAULT_INJECTION
++ bool "Support fault injection for Null test block driver"
++ depends on BLK_DEV_NULL_BLK && FAULT_INJECTION
+diff --git a/drivers/block/null_blk/Makefile b/drivers/block/null_blk/Makefile
+new file mode 100644
+index 0000000000000..84c36e512ab89
+--- /dev/null
++++ b/drivers/block/null_blk/Makefile
+@@ -0,0 +1,11 @@
++# SPDX-License-Identifier: GPL-2.0
++
++# needed for trace events
++ccflags-y += -I$(src)
++
++obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
++null_blk-objs := main.o
++ifeq ($(CONFIG_BLK_DEV_ZONED), y)
++null_blk-$(CONFIG_TRACING) += trace.o
++endif
++null_blk-$(CONFIG_BLK_DEV_ZONED) += zoned.o
+diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
+new file mode 100644
+index 0000000000000..25db095e943b7
+--- /dev/null
++++ b/drivers/block/null_blk/main.c
+@@ -0,0 +1,2036 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
++ * Shaohua Li <shli@fb.com>
++ */
++#include <linux/module.h>
++
++#include <linux/moduleparam.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include "null_blk.h"
++
++#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
++#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
++#define SECTOR_MASK (PAGE_SECTORS - 1)
++
++#define FREE_BATCH 16
++
++#define TICKS_PER_SEC 50ULL
++#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
++
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++static DECLARE_FAULT_ATTR(null_timeout_attr);
++static DECLARE_FAULT_ATTR(null_requeue_attr);
++static DECLARE_FAULT_ATTR(null_init_hctx_attr);
++#endif
++
++static inline u64 mb_per_tick(int mbps)
++{
++ return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
++}
++
++/*
++ * Status flags for nullb_device.
++ *
++ * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
++ * UP: Device is currently on and visible in userspace.
++ * THROTTLED: Device is being throttled.
++ * CACHE: Device is using a write-back cache.
++ */
++enum nullb_device_flags {
++ NULLB_DEV_FL_CONFIGURED = 0,
++ NULLB_DEV_FL_UP = 1,
++ NULLB_DEV_FL_THROTTLED = 2,
++ NULLB_DEV_FL_CACHE = 3,
++};
++
++#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
++/*
++ * nullb_page is a page in memory for nullb devices.
++ *
++ * @page: The page holding the data.
++ * @bitmap: The bitmap represents which sector in the page has data.
++ * Each bit represents one block size. For example, sector 8
++ * will use the 7th bit
++ * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
++ * page is being flushing to storage. FREE means the cache page is freed and
++ * should be skipped from flushing to storage. Please see
++ * null_make_cache_space
++ */
++struct nullb_page {
++ struct page *page;
++ DECLARE_BITMAP(bitmap, MAP_SZ);
++};
++#define NULLB_PAGE_LOCK (MAP_SZ - 1)
++#define NULLB_PAGE_FREE (MAP_SZ - 2)
++
++static LIST_HEAD(nullb_list);
++static struct mutex lock;
++static int null_major;
++static DEFINE_IDA(nullb_indexes);
++static struct blk_mq_tag_set tag_set;
++
++enum {
++ NULL_IRQ_NONE = 0,
++ NULL_IRQ_SOFTIRQ = 1,
++ NULL_IRQ_TIMER = 2,
++};
++
++enum {
++ NULL_Q_BIO = 0,
++ NULL_Q_RQ = 1,
++ NULL_Q_MQ = 2,
++};
++
++static int g_no_sched;
++module_param_named(no_sched, g_no_sched, int, 0444);
++MODULE_PARM_DESC(no_sched, "No io scheduler");
++
++static int g_submit_queues = 1;
++module_param_named(submit_queues, g_submit_queues, int, 0444);
++MODULE_PARM_DESC(submit_queues, "Number of submission queues");
++
++static int g_home_node = NUMA_NO_NODE;
++module_param_named(home_node, g_home_node, int, 0444);
++MODULE_PARM_DESC(home_node, "Home node for the device");
++
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++/*
++ * For more details about fault injection, please refer to
++ * Documentation/fault-injection/fault-injection.rst.
++ */
++static char g_timeout_str[80];
++module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
++MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
++
++static char g_requeue_str[80];
++module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
++MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
++
++static char g_init_hctx_str[80];
++module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
++MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
++#endif
++
++static int g_queue_mode = NULL_Q_MQ;
++
++static int null_param_store_val(const char *str, int *val, int min, int max)
++{
++ int ret, new_val;
++
++ ret = kstrtoint(str, 10, &new_val);
++ if (ret)
++ return -EINVAL;
++
++ if (new_val < min || new_val > max)
++ return -EINVAL;
++
++ *val = new_val;
++ return 0;
++}
++
++static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
++{
++ return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
++}
++
++static const struct kernel_param_ops null_queue_mode_param_ops = {
++ .set = null_set_queue_mode,
++ .get = param_get_int,
++};
++
++device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
++MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
++
++static int g_gb = 250;
++module_param_named(gb, g_gb, int, 0444);
++MODULE_PARM_DESC(gb, "Size in GB");
++
++static int g_bs = 512;
++module_param_named(bs, g_bs, int, 0444);
++MODULE_PARM_DESC(bs, "Block size (in bytes)");
++
++static unsigned int nr_devices = 1;
++module_param(nr_devices, uint, 0444);
++MODULE_PARM_DESC(nr_devices, "Number of devices to register");
++
++static bool g_blocking;
++module_param_named(blocking, g_blocking, bool, 0444);
++MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
++
++static bool shared_tags;
++module_param(shared_tags, bool, 0444);
++MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
++
++static bool g_shared_tag_bitmap;
++module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
++MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
++
++static int g_irqmode = NULL_IRQ_SOFTIRQ;
++
++static int null_set_irqmode(const char *str, const struct kernel_param *kp)
++{
++ return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
++ NULL_IRQ_TIMER);
++}
++
++static const struct kernel_param_ops null_irqmode_param_ops = {
++ .set = null_set_irqmode,
++ .get = param_get_int,
++};
++
++device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
++MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
++
++static unsigned long g_completion_nsec = 10000;
++module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
++MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
++
++static int g_hw_queue_depth = 64;
++module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
++MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
++
++static bool g_use_per_node_hctx;
++module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
++MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
++
++static bool g_zoned;
++module_param_named(zoned, g_zoned, bool, S_IRUGO);
++MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
++
++static unsigned long g_zone_size = 256;
++module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
++MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
++
++static unsigned long g_zone_capacity;
++module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
++MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
++
++static unsigned int g_zone_nr_conv;
++module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
++MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
++
++static unsigned int g_zone_max_open;
++module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
++MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
++
++static unsigned int g_zone_max_active;
++module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
++MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
++
++static struct nullb_device *null_alloc_dev(void);
++static void null_free_dev(struct nullb_device *dev);
++static void null_del_dev(struct nullb *nullb);
++static int null_add_dev(struct nullb_device *dev);
++static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
++
++static inline struct nullb_device *to_nullb_device(struct config_item *item)
++{
++ return item ? container_of(item, struct nullb_device, item) : NULL;
++}
++
++static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
++{
++ return snprintf(page, PAGE_SIZE, "%u\n", val);
++}
++
++static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
++ char *page)
++{
++ return snprintf(page, PAGE_SIZE, "%lu\n", val);
++}
++
++static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
++{
++ return snprintf(page, PAGE_SIZE, "%u\n", val);
++}
++
++static ssize_t nullb_device_uint_attr_store(unsigned int *val,
++ const char *page, size_t count)
++{
++ unsigned int tmp;
++ int result;
++
++ result = kstrtouint(page, 0, &tmp);
++ if (result < 0)
++ return result;
++
++ *val = tmp;
++ return count;
++}
++
++static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
++ const char *page, size_t count)
++{
++ int result;
++ unsigned long tmp;
++
++ result = kstrtoul(page, 0, &tmp);
++ if (result < 0)
++ return result;
++
++ *val = tmp;
++ return count;
++}
++
++static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
++ size_t count)
++{
++ bool tmp;
++ int result;
++
++ result = kstrtobool(page, &tmp);
++ if (result < 0)
++ return result;
++
++ *val = tmp;
++ return count;
++}
++
++/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
++#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
++static ssize_t \
++nullb_device_##NAME##_show(struct config_item *item, char *page) \
++{ \
++ return nullb_device_##TYPE##_attr_show( \
++ to_nullb_device(item)->NAME, page); \
++} \
++static ssize_t \
++nullb_device_##NAME##_store(struct config_item *item, const char *page, \
++ size_t count) \
++{ \
++ int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
++ struct nullb_device *dev = to_nullb_device(item); \
++ TYPE new_value = 0; \
++ int ret; \
++ \
++ ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
++ if (ret < 0) \
++ return ret; \
++ if (apply_fn) \
++ ret = apply_fn(dev, new_value); \
++ else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
++ ret = -EBUSY; \
++ if (ret < 0) \
++ return ret; \
++ dev->NAME = new_value; \
++ return count; \
++} \
++CONFIGFS_ATTR(nullb_device_, NAME);
++
++static int nullb_apply_submit_queues(struct nullb_device *dev,
++ unsigned int submit_queues)
++{
++ struct nullb *nullb = dev->nullb;
++ struct blk_mq_tag_set *set;
++
++ if (!nullb)
++ return 0;
++
++ /*
++ * Make sure that null_init_hctx() does not access nullb->queues[] past
++ * the end of that array.
++ */
++ if (submit_queues > nr_cpu_ids)
++ return -EINVAL;
++ set = nullb->tag_set;
++ blk_mq_update_nr_hw_queues(set, submit_queues);
++ return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
++}
++
++NULLB_DEVICE_ATTR(size, ulong, NULL);
++NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
++NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
++NULLB_DEVICE_ATTR(home_node, uint, NULL);
++NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
++NULLB_DEVICE_ATTR(blocksize, uint, NULL);
++NULLB_DEVICE_ATTR(irqmode, uint, NULL);
++NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
++NULLB_DEVICE_ATTR(index, uint, NULL);
++NULLB_DEVICE_ATTR(blocking, bool, NULL);
++NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
++NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
++NULLB_DEVICE_ATTR(discard, bool, NULL);
++NULLB_DEVICE_ATTR(mbps, uint, NULL);
++NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
++NULLB_DEVICE_ATTR(zoned, bool, NULL);
++NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
++NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
++NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
++NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
++NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
++
++static ssize_t nullb_device_power_show(struct config_item *item, char *page)
++{
++ return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
++}
++
++static ssize_t nullb_device_power_store(struct config_item *item,
++ const char *page, size_t count)
++{
++ struct nullb_device *dev = to_nullb_device(item);
++ bool newp = false;
++ ssize_t ret;
++
++ ret = nullb_device_bool_attr_store(&newp, page, count);
++ if (ret < 0)
++ return ret;
++
++ if (!dev->power && newp) {
++ if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
++ return count;
++ if (null_add_dev(dev)) {
++ clear_bit(NULLB_DEV_FL_UP, &dev->flags);
++ return -ENOMEM;
++ }
++
++ set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
++ dev->power = newp;
++ } else if (dev->power && !newp) {
++ if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
++ mutex_lock(&lock);
++ dev->power = newp;
++ null_del_dev(dev->nullb);
++ mutex_unlock(&lock);
++ }
++ clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
++ }
++
++ return count;
++}
++
++CONFIGFS_ATTR(nullb_device_, power);
++
++static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
++{
++ struct nullb_device *t_dev = to_nullb_device(item);
++
++ return badblocks_show(&t_dev->badblocks, page, 0);
++}
++
++static ssize_t nullb_device_badblocks_store(struct config_item *item,
++ const char *page, size_t count)
++{
++ struct nullb_device *t_dev = to_nullb_device(item);
++ char *orig, *buf, *tmp;
++ u64 start, end;
++ int ret;
++
++ orig = kstrndup(page, count, GFP_KERNEL);
++ if (!orig)
++ return -ENOMEM;
++
++ buf = strstrip(orig);
++
++ ret = -EINVAL;
++ if (buf[0] != '+' && buf[0] != '-')
++ goto out;
++ tmp = strchr(&buf[1], '-');
++ if (!tmp)
++ goto out;
++ *tmp = '\0';
++ ret = kstrtoull(buf + 1, 0, &start);
++ if (ret)
++ goto out;
++ ret = kstrtoull(tmp + 1, 0, &end);
++ if (ret)
++ goto out;
++ ret = -EINVAL;
++ if (start > end)
++ goto out;
++ /* enable badblocks */
++ cmpxchg(&t_dev->badblocks.shift, -1, 0);
++ if (buf[0] == '+')
++ ret = badblocks_set(&t_dev->badblocks, start,
++ end - start + 1, 1);
++ else
++ ret = badblocks_clear(&t_dev->badblocks, start,
++ end - start + 1);
++ if (ret == 0)
++ ret = count;
++out:
++ kfree(orig);
++ return ret;
++}
++CONFIGFS_ATTR(nullb_device_, badblocks);
++
++static struct configfs_attribute *nullb_device_attrs[] = {
++ &nullb_device_attr_size,
++ &nullb_device_attr_completion_nsec,
++ &nullb_device_attr_submit_queues,
++ &nullb_device_attr_home_node,
++ &nullb_device_attr_queue_mode,
++ &nullb_device_attr_blocksize,
++ &nullb_device_attr_irqmode,
++ &nullb_device_attr_hw_queue_depth,
++ &nullb_device_attr_index,
++ &nullb_device_attr_blocking,
++ &nullb_device_attr_use_per_node_hctx,
++ &nullb_device_attr_power,
++ &nullb_device_attr_memory_backed,
++ &nullb_device_attr_discard,
++ &nullb_device_attr_mbps,
++ &nullb_device_attr_cache_size,
++ &nullb_device_attr_badblocks,
++ &nullb_device_attr_zoned,
++ &nullb_device_attr_zone_size,
++ &nullb_device_attr_zone_capacity,
++ &nullb_device_attr_zone_nr_conv,
++ &nullb_device_attr_zone_max_open,
++ &nullb_device_attr_zone_max_active,
++ NULL,
++};
++
++static void nullb_device_release(struct config_item *item)
++{
++ struct nullb_device *dev = to_nullb_device(item);
++
++ null_free_device_storage(dev, false);
++ null_free_dev(dev);
++}
++
++static struct configfs_item_operations nullb_device_ops = {
++ .release = nullb_device_release,
++};
++
++static const struct config_item_type nullb_device_type = {
++ .ct_item_ops = &nullb_device_ops,
++ .ct_attrs = nullb_device_attrs,
++ .ct_owner = THIS_MODULE,
++};
++
++static struct
++config_item *nullb_group_make_item(struct config_group *group, const char *name)
++{
++ struct nullb_device *dev;
++
++ dev = null_alloc_dev();
++ if (!dev)
++ return ERR_PTR(-ENOMEM);
++
++ config_item_init_type_name(&dev->item, name, &nullb_device_type);
++
++ return &dev->item;
++}
++
++static void
++nullb_group_drop_item(struct config_group *group, struct config_item *item)
++{
++ struct nullb_device *dev = to_nullb_device(item);
++
++ if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
++ mutex_lock(&lock);
++ dev->power = false;
++ null_del_dev(dev->nullb);
++ mutex_unlock(&lock);
++ }
++
++ config_item_put(item);
++}
++
++static ssize_t memb_group_features_show(struct config_item *item, char *page)
++{
++ return snprintf(page, PAGE_SIZE,
++ "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n");
++}
++
++CONFIGFS_ATTR_RO(memb_group_, features);
++
++static struct configfs_attribute *nullb_group_attrs[] = {
++ &memb_group_attr_features,
++ NULL,
++};
++
++static struct configfs_group_operations nullb_group_ops = {
++ .make_item = nullb_group_make_item,
++ .drop_item = nullb_group_drop_item,
++};
++
++static const struct config_item_type nullb_group_type = {
++ .ct_group_ops = &nullb_group_ops,
++ .ct_attrs = nullb_group_attrs,
++ .ct_owner = THIS_MODULE,
++};
++
++static struct configfs_subsystem nullb_subsys = {
++ .su_group = {
++ .cg_item = {
++ .ci_namebuf = "nullb",
++ .ci_type = &nullb_group_type,
++ },
++ },
++};
++
++static inline int null_cache_active(struct nullb *nullb)
++{
++ return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
++}
++
++static struct nullb_device *null_alloc_dev(void)
++{
++ struct nullb_device *dev;
++
++ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
++ if (!dev)
++ return NULL;
++ INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
++ INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
++ if (badblocks_init(&dev->badblocks, 0)) {
++ kfree(dev);
++ return NULL;
++ }
++
++ dev->size = g_gb * 1024;
++ dev->completion_nsec = g_completion_nsec;
++ dev->submit_queues = g_submit_queues;
++ dev->home_node = g_home_node;
++ dev->queue_mode = g_queue_mode;
++ dev->blocksize = g_bs;
++ dev->irqmode = g_irqmode;
++ dev->hw_queue_depth = g_hw_queue_depth;
++ dev->blocking = g_blocking;
++ dev->use_per_node_hctx = g_use_per_node_hctx;
++ dev->zoned = g_zoned;
++ dev->zone_size = g_zone_size;
++ dev->zone_capacity = g_zone_capacity;
++ dev->zone_nr_conv = g_zone_nr_conv;
++ dev->zone_max_open = g_zone_max_open;
++ dev->zone_max_active = g_zone_max_active;
++ return dev;
++}
++
++static void null_free_dev(struct nullb_device *dev)
++{
++ if (!dev)
++ return;
++
++ null_free_zoned_dev(dev);
++ badblocks_exit(&dev->badblocks);
++ kfree(dev);
++}
++
++static void put_tag(struct nullb_queue *nq, unsigned int tag)
++{
++ clear_bit_unlock(tag, nq->tag_map);
++
++ if (waitqueue_active(&nq->wait))
++ wake_up(&nq->wait);
++}
++
++static unsigned int get_tag(struct nullb_queue *nq)
++{
++ unsigned int tag;
++
++ do {
++ tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
++ if (tag >= nq->queue_depth)
++ return -1U;
++ } while (test_and_set_bit_lock(tag, nq->tag_map));
++
++ return tag;
++}
++
++static void free_cmd(struct nullb_cmd *cmd)
++{
++ put_tag(cmd->nq, cmd->tag);
++}
++
++static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
++
++static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
++{
++ struct nullb_cmd *cmd;
++ unsigned int tag;
++
++ tag = get_tag(nq);
++ if (tag != -1U) {
++ cmd = &nq->cmds[tag];
++ cmd->tag = tag;
++ cmd->error = BLK_STS_OK;
++ cmd->nq = nq;
++ if (nq->dev->irqmode == NULL_IRQ_TIMER) {
++ hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
++ HRTIMER_MODE_REL);
++ cmd->timer.function = null_cmd_timer_expired;
++ }
++ return cmd;
++ }
++
++ return NULL;
++}
++
++static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
++{
++ struct nullb_cmd *cmd;
++ DEFINE_WAIT(wait);
++
++ cmd = __alloc_cmd(nq);
++ if (cmd || !can_wait)
++ return cmd;
++
++ do {
++ prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
++ cmd = __alloc_cmd(nq);
++ if (cmd)
++ break;
++
++ io_schedule();
++ } while (1);
++
++ finish_wait(&nq->wait, &wait);
++ return cmd;
++}
++
++static void end_cmd(struct nullb_cmd *cmd)
++{
++ int queue_mode = cmd->nq->dev->queue_mode;
++
++ switch (queue_mode) {
++ case NULL_Q_MQ:
++ blk_mq_end_request(cmd->rq, cmd->error);
++ return;
++ case NULL_Q_BIO:
++ cmd->bio->bi_status = cmd->error;
++ bio_endio(cmd->bio);
++ break;
++ }
++
++ free_cmd(cmd);
++}
++
++static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
++{
++ end_cmd(container_of(timer, struct nullb_cmd, timer));
++
++ return HRTIMER_NORESTART;
++}
++
++static void null_cmd_end_timer(struct nullb_cmd *cmd)
++{
++ ktime_t kt = cmd->nq->dev->completion_nsec;
++
++ hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
++}
++
++static void null_complete_rq(struct request *rq)
++{
++ end_cmd(blk_mq_rq_to_pdu(rq));
++}
++
++static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
++{
++ struct nullb_page *t_page;
++
++ t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
++ if (!t_page)
++ goto out;
++
++ t_page->page = alloc_pages(gfp_flags, 0);
++ if (!t_page->page)
++ goto out_freepage;
++
++ memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
++ return t_page;
++out_freepage:
++ kfree(t_page);
++out:
++ return NULL;
++}
++
++static void null_free_page(struct nullb_page *t_page)
++{
++ __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
++ if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
++ return;
++ __free_page(t_page->page);
++ kfree(t_page);
++}
++
++static bool null_page_empty(struct nullb_page *page)
++{
++ int size = MAP_SZ - 2;
++
++ return find_first_bit(page->bitmap, size) == size;
++}
++
++static void null_free_sector(struct nullb *nullb, sector_t sector,
++ bool is_cache)
++{
++ unsigned int sector_bit;
++ u64 idx;
++ struct nullb_page *t_page, *ret;
++ struct radix_tree_root *root;
++
++ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
++ idx = sector >> PAGE_SECTORS_SHIFT;
++ sector_bit = (sector & SECTOR_MASK);
++
++ t_page = radix_tree_lookup(root, idx);
++ if (t_page) {
++ __clear_bit(sector_bit, t_page->bitmap);
++
++ if (null_page_empty(t_page)) {
++ ret = radix_tree_delete_item(root, idx, t_page);
++ WARN_ON(ret != t_page);
++ null_free_page(ret);
++ if (is_cache)
++ nullb->dev->curr_cache -= PAGE_SIZE;
++ }
++ }
++}
++
++static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
++ struct nullb_page *t_page, bool is_cache)
++{
++ struct radix_tree_root *root;
++
++ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
++
++ if (radix_tree_insert(root, idx, t_page)) {
++ null_free_page(t_page);
++ t_page = radix_tree_lookup(root, idx);
++ WARN_ON(!t_page || t_page->page->index != idx);
++ } else if (is_cache)
++ nullb->dev->curr_cache += PAGE_SIZE;
++
++ return t_page;
++}
++
++static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
++{
++ unsigned long pos = 0;
++ int nr_pages;
++ struct nullb_page *ret, *t_pages[FREE_BATCH];
++ struct radix_tree_root *root;
++
++ root = is_cache ? &dev->cache : &dev->data;
++
++ do {
++ int i;
++
++ nr_pages = radix_tree_gang_lookup(root,
++ (void **)t_pages, pos, FREE_BATCH);
++
++ for (i = 0; i < nr_pages; i++) {
++ pos = t_pages[i]->page->index;
++ ret = radix_tree_delete_item(root, pos, t_pages[i]);
++ WARN_ON(ret != t_pages[i]);
++ null_free_page(ret);
++ }
++
++ pos++;
++ } while (nr_pages == FREE_BATCH);
++
++ if (is_cache)
++ dev->curr_cache = 0;
++}
++
++static struct nullb_page *__null_lookup_page(struct nullb *nullb,
++ sector_t sector, bool for_write, bool is_cache)
++{
++ unsigned int sector_bit;
++ u64 idx;
++ struct nullb_page *t_page;
++ struct radix_tree_root *root;
++
++ idx = sector >> PAGE_SECTORS_SHIFT;
++ sector_bit = (sector & SECTOR_MASK);
++
++ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
++ t_page = radix_tree_lookup(root, idx);
++ WARN_ON(t_page && t_page->page->index != idx);
++
++ if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
++ return t_page;
++
++ return NULL;
++}
++
++static struct nullb_page *null_lookup_page(struct nullb *nullb,
++ sector_t sector, bool for_write, bool ignore_cache)
++{
++ struct nullb_page *page = NULL;
++
++ if (!ignore_cache)
++ page = __null_lookup_page(nullb, sector, for_write, true);
++ if (page)
++ return page;
++ return __null_lookup_page(nullb, sector, for_write, false);
++}
++
++static struct nullb_page *null_insert_page(struct nullb *nullb,
++ sector_t sector, bool ignore_cache)
++ __releases(&nullb->lock)
++ __acquires(&nullb->lock)
++{
++ u64 idx;
++ struct nullb_page *t_page;
++
++ t_page = null_lookup_page(nullb, sector, true, ignore_cache);
++ if (t_page)
++ return t_page;
++
++ spin_unlock_irq(&nullb->lock);
++
++ t_page = null_alloc_page(GFP_NOIO);
++ if (!t_page)
++ goto out_lock;
++
++ if (radix_tree_preload(GFP_NOIO))
++ goto out_freepage;
++
++ spin_lock_irq(&nullb->lock);
++ idx = sector >> PAGE_SECTORS_SHIFT;
++ t_page->page->index = idx;
++ t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
++ radix_tree_preload_end();
++
++ return t_page;
++out_freepage:
++ null_free_page(t_page);
++out_lock:
++ spin_lock_irq(&nullb->lock);
++ return null_lookup_page(nullb, sector, true, ignore_cache);
++}
++
++static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
++{
++ int i;
++ unsigned int offset;
++ u64 idx;
++ struct nullb_page *t_page, *ret;
++ void *dst, *src;
++
++ idx = c_page->page->index;
++
++ t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
++
++ __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
++ if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
++ null_free_page(c_page);
++ if (t_page && null_page_empty(t_page)) {
++ ret = radix_tree_delete_item(&nullb->dev->data,
++ idx, t_page);
++ null_free_page(t_page);
++ }
++ return 0;
++ }
++
++ if (!t_page)
++ return -ENOMEM;
++
++ src = kmap_atomic(c_page->page);
++ dst = kmap_atomic(t_page->page);
++
++ for (i = 0; i < PAGE_SECTORS;
++ i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
++ if (test_bit(i, c_page->bitmap)) {
++ offset = (i << SECTOR_SHIFT);
++ memcpy(dst + offset, src + offset,
++ nullb->dev->blocksize);
++ __set_bit(i, t_page->bitmap);
++ }
++ }
++
++ kunmap_atomic(dst);
++ kunmap_atomic(src);
++
++ ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
++ null_free_page(ret);
++ nullb->dev->curr_cache -= PAGE_SIZE;
++
++ return 0;
++}
++
++static int null_make_cache_space(struct nullb *nullb, unsigned long n)
++{
++ int i, err, nr_pages;
++ struct nullb_page *c_pages[FREE_BATCH];
++ unsigned long flushed = 0, one_round;
++
++again:
++ if ((nullb->dev->cache_size * 1024 * 1024) >
++ nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
++ return 0;
++
++ nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
++ (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
++ /*
++ * nullb_flush_cache_page could unlock before using the c_pages. To
++ * avoid race, we don't allow page free
++ */
++ for (i = 0; i < nr_pages; i++) {
++ nullb->cache_flush_pos = c_pages[i]->page->index;
++ /*
++ * We found the page which is being flushed to disk by other
++ * threads
++ */
++ if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
++ c_pages[i] = NULL;
++ else
++ __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
++ }
++
++ one_round = 0;
++ for (i = 0; i < nr_pages; i++) {
++ if (c_pages[i] == NULL)
++ continue;
++ err = null_flush_cache_page(nullb, c_pages[i]);
++ if (err)
++ return err;
++ one_round++;
++ }
++ flushed += one_round << PAGE_SHIFT;
++
++ if (n > flushed) {
++ if (nr_pages == 0)
++ nullb->cache_flush_pos = 0;
++ if (one_round == 0) {
++ /* give other threads a chance */
++ spin_unlock_irq(&nullb->lock);
++ spin_lock_irq(&nullb->lock);
++ }
++ goto again;
++ }
++ return 0;
++}
++
++static int copy_to_nullb(struct nullb *nullb, struct page *source,
++ unsigned int off, sector_t sector, size_t n, bool is_fua)
++{
++ size_t temp, count = 0;
++ unsigned int offset;
++ struct nullb_page *t_page;
++ void *dst, *src;
++
++ while (count < n) {
++ temp = min_t(size_t, nullb->dev->blocksize, n - count);
++
++ if (null_cache_active(nullb) && !is_fua)
++ null_make_cache_space(nullb, PAGE_SIZE);
++
++ offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
++ t_page = null_insert_page(nullb, sector,
++ !null_cache_active(nullb) || is_fua);
++ if (!t_page)
++ return -ENOSPC;
++
++ src = kmap_atomic(source);
++ dst = kmap_atomic(t_page->page);
++ memcpy(dst + offset, src + off + count, temp);
++ kunmap_atomic(dst);
++ kunmap_atomic(src);
++
++ __set_bit(sector & SECTOR_MASK, t_page->bitmap);
++
++ if (is_fua)
++ null_free_sector(nullb, sector, true);
++
++ count += temp;
++ sector += temp >> SECTOR_SHIFT;
++ }
++ return 0;
++}
++
++static int copy_from_nullb(struct nullb *nullb, struct page *dest,
++ unsigned int off, sector_t sector, size_t n)
++{
++ size_t temp, count = 0;
++ unsigned int offset;
++ struct nullb_page *t_page;
++ void *dst, *src;
++
++ while (count < n) {
++ temp = min_t(size_t, nullb->dev->blocksize, n - count);
++
++ offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
++ t_page = null_lookup_page(nullb, sector, false,
++ !null_cache_active(nullb));
++
++ dst = kmap_atomic(dest);
++ if (!t_page) {
++ memset(dst + off + count, 0, temp);
++ goto next;
++ }
++ src = kmap_atomic(t_page->page);
++ memcpy(dst + off + count, src + offset, temp);
++ kunmap_atomic(src);
++next:
++ kunmap_atomic(dst);
++
++ count += temp;
++ sector += temp >> SECTOR_SHIFT;
++ }
++ return 0;
++}
++
++static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
++ unsigned int len, unsigned int off)
++{
++ void *dst;
++
++ dst = kmap_atomic(page);
++ memset(dst + off, 0xFF, len);
++ kunmap_atomic(dst);
++}
++
++static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
++{
++ size_t temp;
++
++ spin_lock_irq(&nullb->lock);
++ while (n > 0) {
++ temp = min_t(size_t, n, nullb->dev->blocksize);
++ null_free_sector(nullb, sector, false);
++ if (null_cache_active(nullb))
++ null_free_sector(nullb, sector, true);
++ sector += temp >> SECTOR_SHIFT;
++ n -= temp;
++ }
++ spin_unlock_irq(&nullb->lock);
++}
++
++static int null_handle_flush(struct nullb *nullb)
++{
++ int err;
++
++ if (!null_cache_active(nullb))
++ return 0;
++
++ spin_lock_irq(&nullb->lock);
++ while (true) {
++ err = null_make_cache_space(nullb,
++ nullb->dev->cache_size * 1024 * 1024);
++ if (err || nullb->dev->curr_cache == 0)
++ break;
++ }
++
++ WARN_ON(!radix_tree_empty(&nullb->dev->cache));
++ spin_unlock_irq(&nullb->lock);
++ return err;
++}
++
++static int null_transfer(struct nullb *nullb, struct page *page,
++ unsigned int len, unsigned int off, bool is_write, sector_t sector,
++ bool is_fua)
++{
++ struct nullb_device *dev = nullb->dev;
++ unsigned int valid_len = len;
++ int err = 0;
++
++ if (!is_write) {
++ if (dev->zoned)
++ valid_len = null_zone_valid_read_len(nullb,
++ sector, len);
++
++ if (valid_len) {
++ err = copy_from_nullb(nullb, page, off,
++ sector, valid_len);
++ off += valid_len;
++ len -= valid_len;
++ }
++
++ if (len)
++ nullb_fill_pattern(nullb, page, len, off);
++ flush_dcache_page(page);
++ } else {
++ flush_dcache_page(page);
++ err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
++ }
++
++ return err;
++}
++
++static int null_handle_rq(struct nullb_cmd *cmd)
++{
++ struct request *rq = cmd->rq;
++ struct nullb *nullb = cmd->nq->dev->nullb;
++ int err;
++ unsigned int len;
++ sector_t sector;
++ struct req_iterator iter;
++ struct bio_vec bvec;
++
++ sector = blk_rq_pos(rq);
++
++ if (req_op(rq) == REQ_OP_DISCARD) {
++ null_handle_discard(nullb, sector, blk_rq_bytes(rq));
++ return 0;
++ }
++
++ spin_lock_irq(&nullb->lock);
++ rq_for_each_segment(bvec, rq, iter) {
++ len = bvec.bv_len;
++ err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
++ op_is_write(req_op(rq)), sector,
++ rq->cmd_flags & REQ_FUA);
++ if (err) {
++ spin_unlock_irq(&nullb->lock);
++ return err;
++ }
++ sector += len >> SECTOR_SHIFT;
++ }
++ spin_unlock_irq(&nullb->lock);
++
++ return 0;
++}
++
++static int null_handle_bio(struct nullb_cmd *cmd)
++{
++ struct bio *bio = cmd->bio;
++ struct nullb *nullb = cmd->nq->dev->nullb;
++ int err;
++ unsigned int len;
++ sector_t sector;
++ struct bio_vec bvec;
++ struct bvec_iter iter;
++
++ sector = bio->bi_iter.bi_sector;
++
++ if (bio_op(bio) == REQ_OP_DISCARD) {
++ null_handle_discard(nullb, sector,
++ bio_sectors(bio) << SECTOR_SHIFT);
++ return 0;
++ }
++
++ spin_lock_irq(&nullb->lock);
++ bio_for_each_segment(bvec, bio, iter) {
++ len = bvec.bv_len;
++ err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
++ op_is_write(bio_op(bio)), sector,
++ bio->bi_opf & REQ_FUA);
++ if (err) {
++ spin_unlock_irq(&nullb->lock);
++ return err;
++ }
++ sector += len >> SECTOR_SHIFT;
++ }
++ spin_unlock_irq(&nullb->lock);
++ return 0;
++}
++
++static void null_stop_queue(struct nullb *nullb)
++{
++ struct request_queue *q = nullb->q;
++
++ if (nullb->dev->queue_mode == NULL_Q_MQ)
++ blk_mq_stop_hw_queues(q);
++}
++
++static void null_restart_queue_async(struct nullb *nullb)
++{
++ struct request_queue *q = nullb->q;
++
++ if (nullb->dev->queue_mode == NULL_Q_MQ)
++ blk_mq_start_stopped_hw_queues(q, true);
++}
++
++static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
++{
++ struct nullb_device *dev = cmd->nq->dev;
++ struct nullb *nullb = dev->nullb;
++ blk_status_t sts = BLK_STS_OK;
++ struct request *rq = cmd->rq;
++
++ if (!hrtimer_active(&nullb->bw_timer))
++ hrtimer_restart(&nullb->bw_timer);
++
++ if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
++ null_stop_queue(nullb);
++ /* race with timer */
++ if (atomic_long_read(&nullb->cur_bytes) > 0)
++ null_restart_queue_async(nullb);
++ /* requeue request */
++ sts = BLK_STS_DEV_RESOURCE;
++ }
++ return sts;
++}
++
++static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
++ sector_t sector,
++ sector_t nr_sectors)
++{
++ struct badblocks *bb = &cmd->nq->dev->badblocks;
++ sector_t first_bad;
++ int bad_sectors;
++
++ if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
++ return BLK_STS_IOERR;
++
++ return BLK_STS_OK;
++}
++
++static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
++ enum req_opf op)
++{
++ struct nullb_device *dev = cmd->nq->dev;
++ int err;
++
++ if (dev->queue_mode == NULL_Q_BIO)
++ err = null_handle_bio(cmd);
++ else
++ err = null_handle_rq(cmd);
++
++ return errno_to_blk_status(err);
++}
++
++static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
++{
++ struct nullb_device *dev = cmd->nq->dev;
++ struct bio *bio;
++
++ if (dev->memory_backed)
++ return;
++
++ if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
++ zero_fill_bio(cmd->bio);
++ } else if (req_op(cmd->rq) == REQ_OP_READ) {
++ __rq_for_each_bio(bio, cmd->rq)
++ zero_fill_bio(bio);
++ }
++}
++
++static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
++{
++ /*
++ * Since root privileges are required to configure the null_blk
++ * driver, it is fine that this driver does not initialize the
++ * data buffers of read commands. Zero-initialize these buffers
++ * anyway if KMSAN is enabled to prevent that KMSAN complains
++ * about null_blk not initializing read data buffers.
++ */
++ if (IS_ENABLED(CONFIG_KMSAN))
++ nullb_zero_read_cmd_buffer(cmd);
++
++ /* Complete IO by inline, softirq or timer */
++ switch (cmd->nq->dev->irqmode) {
++ case NULL_IRQ_SOFTIRQ:
++ switch (cmd->nq->dev->queue_mode) {
++ case NULL_Q_MQ:
++ blk_mq_complete_request(cmd->rq);
++ break;
++ case NULL_Q_BIO:
++ /*
++ * XXX: no proper submitting cpu information available.
++ */
++ end_cmd(cmd);
++ break;
++ }
++ break;
++ case NULL_IRQ_NONE:
++ end_cmd(cmd);
++ break;
++ case NULL_IRQ_TIMER:
++ null_cmd_end_timer(cmd);
++ break;
++ }
++}
++
++blk_status_t null_process_cmd(struct nullb_cmd *cmd,
++ enum req_opf op, sector_t sector,
++ unsigned int nr_sectors)
++{
++ struct nullb_device *dev = cmd->nq->dev;
++ blk_status_t ret;
++
++ if (dev->badblocks.shift != -1) {
++ ret = null_handle_badblocks(cmd, sector, nr_sectors);
++ if (ret != BLK_STS_OK)
++ return ret;
++ }
++
++ if (dev->memory_backed)
++ return null_handle_memory_backed(cmd, op);
++
++ return BLK_STS_OK;
++}
++
++static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
++ sector_t nr_sectors, enum req_opf op)
++{
++ struct nullb_device *dev = cmd->nq->dev;
++ struct nullb *nullb = dev->nullb;
++ blk_status_t sts;
++
++ if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
++ sts = null_handle_throttled(cmd);
++ if (sts != BLK_STS_OK)
++ return sts;
++ }
++
++ if (op == REQ_OP_FLUSH) {
++ cmd->error = errno_to_blk_status(null_handle_flush(nullb));
++ goto out;
++ }
++
++ if (dev->zoned)
++ sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
++ else
++ sts = null_process_cmd(cmd, op, sector, nr_sectors);
++
++ /* Do not overwrite errors (e.g. timeout errors) */
++ if (cmd->error == BLK_STS_OK)
++ cmd->error = sts;
++
++out:
++ nullb_complete_cmd(cmd);
++ return BLK_STS_OK;
++}
++
++static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
++{
++ struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
++ ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
++ unsigned int mbps = nullb->dev->mbps;
++
++ if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
++ return HRTIMER_NORESTART;
++
++ atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
++ null_restart_queue_async(nullb);
++
++ hrtimer_forward_now(&nullb->bw_timer, timer_interval);
++
++ return HRTIMER_RESTART;
++}
++
++static void nullb_setup_bwtimer(struct nullb *nullb)
++{
++ ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
++
++ hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++ nullb->bw_timer.function = nullb_bwtimer_fn;
++ atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
++ hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
++}
++
++static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
++{
++ int index = 0;
++
++ if (nullb->nr_queues != 1)
++ index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
++
++ return &nullb->queues[index];
++}
++
++static blk_qc_t null_submit_bio(struct bio *bio)
++{
++ sector_t sector = bio->bi_iter.bi_sector;
++ sector_t nr_sectors = bio_sectors(bio);
++ struct nullb *nullb = bio->bi_disk->private_data;
++ struct nullb_queue *nq = nullb_to_queue(nullb);
++ struct nullb_cmd *cmd;
++
++ cmd = alloc_cmd(nq, 1);
++ cmd->bio = bio;
++
++ null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
++ return BLK_QC_T_NONE;
++}
++
++static bool should_timeout_request(struct request *rq)
++{
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++ if (g_timeout_str[0])
++ return should_fail(&null_timeout_attr, 1);
++#endif
++ return false;
++}
++
++static bool should_requeue_request(struct request *rq)
++{
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++ if (g_requeue_str[0])
++ return should_fail(&null_requeue_attr, 1);
++#endif
++ return false;
++}
++
++static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
++{
++ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
++
++ pr_info("rq %p timed out\n", rq);
++
++ /*
++ * If the device is marked as blocking (i.e. memory backed or zoned
++ * device), the submission path may be blocked waiting for resources
++ * and cause real timeouts. For these real timeouts, the submission
++ * path will complete the request using blk_mq_complete_request().
++ * Only fake timeouts need to execute blk_mq_complete_request() here.
++ */
++ cmd->error = BLK_STS_TIMEOUT;
++ if (cmd->fake_timeout)
++ blk_mq_complete_request(rq);
++ return BLK_EH_DONE;
++}
++
++static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
++ const struct blk_mq_queue_data *bd)
++{
++ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
++ struct nullb_queue *nq = hctx->driver_data;
++ sector_t nr_sectors = blk_rq_sectors(bd->rq);
++ sector_t sector = blk_rq_pos(bd->rq);
++
++ might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
++
++ if (nq->dev->irqmode == NULL_IRQ_TIMER) {
++ hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++ cmd->timer.function = null_cmd_timer_expired;
++ }
++ cmd->rq = bd->rq;
++ cmd->error = BLK_STS_OK;
++ cmd->nq = nq;
++ cmd->fake_timeout = should_timeout_request(bd->rq) ||
++ blk_should_fake_timeout(bd->rq->q);
++
++ blk_mq_start_request(bd->rq);
++
++ if (should_requeue_request(bd->rq)) {
++ /*
++ * Alternate between hitting the core BUSY path, and the
++ * driver driven requeue path
++ */
++ nq->requeue_selection++;
++ if (nq->requeue_selection & 1)
++ return BLK_STS_RESOURCE;
++ else {
++ blk_mq_requeue_request(bd->rq, true);
++ return BLK_STS_OK;
++ }
++ }
++ if (cmd->fake_timeout)
++ return BLK_STS_OK;
++
++ return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
++}
++
++static void cleanup_queue(struct nullb_queue *nq)
++{
++ kfree(nq->tag_map);
++ kfree(nq->cmds);
++}
++
++static void cleanup_queues(struct nullb *nullb)
++{
++ int i;
++
++ for (i = 0; i < nullb->nr_queues; i++)
++ cleanup_queue(&nullb->queues[i]);
++
++ kfree(nullb->queues);
++}
++
++static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
++{
++ struct nullb_queue *nq = hctx->driver_data;
++ struct nullb *nullb = nq->dev->nullb;
++
++ nullb->nr_queues--;
++}
++
++static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
++{
++ init_waitqueue_head(&nq->wait);
++ nq->queue_depth = nullb->queue_depth;
++ nq->dev = nullb->dev;
++}
++
++static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
++ unsigned int hctx_idx)
++{
++ struct nullb *nullb = hctx->queue->queuedata;
++ struct nullb_queue *nq;
++
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++ if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
++ return -EFAULT;
++#endif
++
++ nq = &nullb->queues[hctx_idx];
++ hctx->driver_data = nq;
++ null_init_queue(nullb, nq);
++ nullb->nr_queues++;
++
++ return 0;
++}
++
++static const struct blk_mq_ops null_mq_ops = {
++ .queue_rq = null_queue_rq,
++ .complete = null_complete_rq,
++ .timeout = null_timeout_rq,
++ .init_hctx = null_init_hctx,
++ .exit_hctx = null_exit_hctx,
++};
++
++static void null_del_dev(struct nullb *nullb)
++{
++ struct nullb_device *dev;
++
++ if (!nullb)
++ return;
++
++ dev = nullb->dev;
++
++ ida_simple_remove(&nullb_indexes, nullb->index);
++
++ list_del_init(&nullb->list);
++
++ del_gendisk(nullb->disk);
++
++ if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
++ hrtimer_cancel(&nullb->bw_timer);
++ atomic_long_set(&nullb->cur_bytes, LONG_MAX);
++ null_restart_queue_async(nullb);
++ }
++
++ blk_cleanup_queue(nullb->q);
++ if (dev->queue_mode == NULL_Q_MQ &&
++ nullb->tag_set == &nullb->__tag_set)
++ blk_mq_free_tag_set(nullb->tag_set);
++ put_disk(nullb->disk);
++ cleanup_queues(nullb);
++ if (null_cache_active(nullb))
++ null_free_device_storage(nullb->dev, true);
++ kfree(nullb);
++ dev->nullb = NULL;
++}
++
++static void null_config_discard(struct nullb *nullb)
++{
++ if (nullb->dev->discard == false)
++ return;
++
++ if (nullb->dev->zoned) {
++ nullb->dev->discard = false;
++ pr_info("discard option is ignored in zoned mode\n");
++ return;
++ }
++
++ nullb->q->limits.discard_granularity = nullb->dev->blocksize;
++ nullb->q->limits.discard_alignment = nullb->dev->blocksize;
++ blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
++ blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
++}
++
++static const struct block_device_operations null_bio_ops = {
++ .owner = THIS_MODULE,
++ .submit_bio = null_submit_bio,
++ .report_zones = null_report_zones,
++};
++
++static const struct block_device_operations null_rq_ops = {
++ .owner = THIS_MODULE,
++ .report_zones = null_report_zones,
++};
++
++static int setup_commands(struct nullb_queue *nq)
++{
++ struct nullb_cmd *cmd;
++ int i, tag_size;
++
++ nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
++ if (!nq->cmds)
++ return -ENOMEM;
++
++ tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
++ nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
++ if (!nq->tag_map) {
++ kfree(nq->cmds);
++ return -ENOMEM;
++ }
++
++ for (i = 0; i < nq->queue_depth; i++) {
++ cmd = &nq->cmds[i];
++ cmd->tag = -1U;
++ }
++
++ return 0;
++}
++
++static int setup_queues(struct nullb *nullb)
++{
++ nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue),
++ GFP_KERNEL);
++ if (!nullb->queues)
++ return -ENOMEM;
++
++ nullb->queue_depth = nullb->dev->hw_queue_depth;
++
++ return 0;
++}
++
++static int init_driver_queues(struct nullb *nullb)
++{
++ struct nullb_queue *nq;
++ int i, ret = 0;
++
++ for (i = 0; i < nullb->dev->submit_queues; i++) {
++ nq = &nullb->queues[i];
++
++ null_init_queue(nullb, nq);
++
++ ret = setup_commands(nq);
++ if (ret)
++ return ret;
++ nullb->nr_queues++;
++ }
++ return 0;
++}
++
++static int null_gendisk_register(struct nullb *nullb)
++{
++ sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
++ struct gendisk *disk;
++
++ disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
++ if (!disk)
++ return -ENOMEM;
++ set_capacity(disk, size);
++
++ disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
++ disk->major = null_major;
++ disk->first_minor = nullb->index;
++ if (queue_is_mq(nullb->q))
++ disk->fops = &null_rq_ops;
++ else
++ disk->fops = &null_bio_ops;
++ disk->private_data = nullb;
++ disk->queue = nullb->q;
++ strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
++
++ if (nullb->dev->zoned) {
++ int ret = null_register_zoned_dev(nullb);
++
++ if (ret)
++ return ret;
++ }
++
++ add_disk(disk);
++ return 0;
++}
++
++static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
++{
++ set->ops = &null_mq_ops;
++ set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
++ g_submit_queues;
++ set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
++ g_hw_queue_depth;
++ set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
++ set->cmd_size = sizeof(struct nullb_cmd);
++ set->flags = BLK_MQ_F_SHOULD_MERGE;
++ if (g_no_sched)
++ set->flags |= BLK_MQ_F_NO_SCHED;
++ if (g_shared_tag_bitmap)
++ set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
++ set->driver_data = NULL;
++
++ if ((nullb && nullb->dev->blocking) || g_blocking)
++ set->flags |= BLK_MQ_F_BLOCKING;
++
++ return blk_mq_alloc_tag_set(set);
++}
++
++static int null_validate_conf(struct nullb_device *dev)
++{
++ dev->blocksize = round_down(dev->blocksize, 512);
++ dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
++
++ if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
++ if (dev->submit_queues != nr_online_nodes)
++ dev->submit_queues = nr_online_nodes;
++ } else if (dev->submit_queues > nr_cpu_ids)
++ dev->submit_queues = nr_cpu_ids;
++ else if (dev->submit_queues == 0)
++ dev->submit_queues = 1;
++
++ dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
++ dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
++
++ /* Do memory allocation, so set blocking */
++ if (dev->memory_backed)
++ dev->blocking = true;
++ else /* cache is meaningless */
++ dev->cache_size = 0;
++ dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
++ dev->cache_size);
++ dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
++ /* can not stop a queue */
++ if (dev->queue_mode == NULL_Q_BIO)
++ dev->mbps = 0;
++
++ if (dev->zoned &&
++ (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
++ pr_err("zone_size must be power-of-two\n");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++static bool __null_setup_fault(struct fault_attr *attr, char *str)
++{
++ if (!str[0])
++ return true;
++
++ if (!setup_fault_attr(attr, str))
++ return false;
++
++ attr->verbose = 0;
++ return true;
++}
++#endif
++
++static bool null_setup_fault(void)
++{
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++ if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
++ return false;
++ if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
++ return false;
++ if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
++ return false;
++#endif
++ return true;
++}
++
++static int null_add_dev(struct nullb_device *dev)
++{
++ struct nullb *nullb;
++ int rv;
++
++ rv = null_validate_conf(dev);
++ if (rv)
++ return rv;
++
++ nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
++ if (!nullb) {
++ rv = -ENOMEM;
++ goto out;
++ }
++ nullb->dev = dev;
++ dev->nullb = nullb;
++
++ spin_lock_init(&nullb->lock);
++
++ rv = setup_queues(nullb);
++ if (rv)
++ goto out_free_nullb;
++
++ if (dev->queue_mode == NULL_Q_MQ) {
++ if (shared_tags) {
++ nullb->tag_set = &tag_set;
++ rv = 0;
++ } else {
++ nullb->tag_set = &nullb->__tag_set;
++ rv = null_init_tag_set(nullb, nullb->tag_set);
++ }
++
++ if (rv)
++ goto out_cleanup_queues;
++
++ if (!null_setup_fault())
++ goto out_cleanup_queues;
++
++ nullb->tag_set->timeout = 5 * HZ;
++ nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
++ if (IS_ERR(nullb->q)) {
++ rv = -ENOMEM;
++ goto out_cleanup_tags;
++ }
++ } else if (dev->queue_mode == NULL_Q_BIO) {
++ nullb->q = blk_alloc_queue(dev->home_node);
++ if (!nullb->q) {
++ rv = -ENOMEM;
++ goto out_cleanup_queues;
++ }
++ rv = init_driver_queues(nullb);
++ if (rv)
++ goto out_cleanup_blk_queue;
++ }
++
++ if (dev->mbps) {
++ set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
++ nullb_setup_bwtimer(nullb);
++ }
++
++ if (dev->cache_size > 0) {
++ set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
++ blk_queue_write_cache(nullb->q, true, true);
++ }
++
++ if (dev->zoned) {
++ rv = null_init_zoned_dev(dev, nullb->q);
++ if (rv)
++ goto out_cleanup_blk_queue;
++ }
++
++ nullb->q->queuedata = nullb;
++ blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
++ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
++
++ mutex_lock(&lock);
++ rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
++ if (rv < 0) {
++ mutex_unlock(&lock);
++ goto out_cleanup_zone;
++ }
++ nullb->index = rv;
++ dev->index = rv;
++ mutex_unlock(&lock);
++
++ blk_queue_logical_block_size(nullb->q, dev->blocksize);
++ blk_queue_physical_block_size(nullb->q, dev->blocksize);
++
++ null_config_discard(nullb);
++
++ sprintf(nullb->disk_name, "nullb%d", nullb->index);
++
++ rv = null_gendisk_register(nullb);
++ if (rv)
++ goto out_ida_free;
++
++ mutex_lock(&lock);
++ list_add_tail(&nullb->list, &nullb_list);
++ mutex_unlock(&lock);
++
++ return 0;
++
++out_ida_free:
++ ida_free(&nullb_indexes, nullb->index);
++out_cleanup_zone:
++ null_free_zoned_dev(dev);
++out_cleanup_blk_queue:
++ blk_cleanup_queue(nullb->q);
++out_cleanup_tags:
++ if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
++ blk_mq_free_tag_set(nullb->tag_set);
++out_cleanup_queues:
++ cleanup_queues(nullb);
++out_free_nullb:
++ kfree(nullb);
++ dev->nullb = NULL;
++out:
++ return rv;
++}
++
++static int __init null_init(void)
++{
++ int ret = 0;
++ unsigned int i;
++ struct nullb *nullb;
++ struct nullb_device *dev;
++
++ if (g_bs > PAGE_SIZE) {
++ pr_warn("invalid block size\n");
++ pr_warn("defaults block size to %lu\n", PAGE_SIZE);
++ g_bs = PAGE_SIZE;
++ }
++
++ if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
++ pr_err("invalid home_node value\n");
++ g_home_node = NUMA_NO_NODE;
++ }
++
++ if (g_queue_mode == NULL_Q_RQ) {
++ pr_err("legacy IO path no longer available\n");
++ return -EINVAL;
++ }
++ if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
++ if (g_submit_queues != nr_online_nodes) {
++ pr_warn("submit_queues param is set to %u.\n",
++ nr_online_nodes);
++ g_submit_queues = nr_online_nodes;
++ }
++ } else if (g_submit_queues > nr_cpu_ids)
++ g_submit_queues = nr_cpu_ids;
++ else if (g_submit_queues <= 0)
++ g_submit_queues = 1;
++
++ if (g_queue_mode == NULL_Q_MQ && shared_tags) {
++ ret = null_init_tag_set(NULL, &tag_set);
++ if (ret)
++ return ret;
++ }
++
++ config_group_init(&nullb_subsys.su_group);
++ mutex_init(&nullb_subsys.su_mutex);
++
++ ret = configfs_register_subsystem(&nullb_subsys);
++ if (ret)
++ goto err_tagset;
++
++ mutex_init(&lock);
++
++ null_major = register_blkdev(0, "nullb");
++ if (null_major < 0) {
++ ret = null_major;
++ goto err_conf;
++ }
++
++ for (i = 0; i < nr_devices; i++) {
++ dev = null_alloc_dev();
++ if (!dev) {
++ ret = -ENOMEM;
++ goto err_dev;
++ }
++ ret = null_add_dev(dev);
++ if (ret) {
++ null_free_dev(dev);
++ goto err_dev;
++ }
++ }
++
++ pr_info("module loaded\n");
++ return 0;
++
++err_dev:
++ while (!list_empty(&nullb_list)) {
++ nullb = list_entry(nullb_list.next, struct nullb, list);
++ dev = nullb->dev;
++ null_del_dev(nullb);
++ null_free_dev(dev);
++ }
++ unregister_blkdev(null_major, "nullb");
++err_conf:
++ configfs_unregister_subsystem(&nullb_subsys);
++err_tagset:
++ if (g_queue_mode == NULL_Q_MQ && shared_tags)
++ blk_mq_free_tag_set(&tag_set);
++ return ret;
++}
++
++static void __exit null_exit(void)
++{
++ struct nullb *nullb;
++
++ configfs_unregister_subsystem(&nullb_subsys);
++
++ unregister_blkdev(null_major, "nullb");
++
++ mutex_lock(&lock);
++ while (!list_empty(&nullb_list)) {
++ struct nullb_device *dev;
++
++ nullb = list_entry(nullb_list.next, struct nullb, list);
++ dev = nullb->dev;
++ null_del_dev(nullb);
++ null_free_dev(dev);
++ }
++ mutex_unlock(&lock);
++
++ if (g_queue_mode == NULL_Q_MQ && shared_tags)
++ blk_mq_free_tag_set(&tag_set);
++}
++
++module_init(null_init);
++module_exit(null_exit);
++
++MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
+new file mode 100644
+index 0000000000000..7de703f28617b
+--- /dev/null
++++ b/drivers/block/null_blk/null_blk.h
+@@ -0,0 +1,137 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __BLK_NULL_BLK_H
++#define __BLK_NULL_BLK_H
++
++#undef pr_fmt
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++#include <linux/blk-mq.h>
++#include <linux/hrtimer.h>
++#include <linux/configfs.h>
++#include <linux/badblocks.h>
++#include <linux/fault-inject.h>
++
++struct nullb_cmd {
++ struct request *rq;
++ struct bio *bio;
++ unsigned int tag;
++ blk_status_t error;
++ struct nullb_queue *nq;
++ struct hrtimer timer;
++ bool fake_timeout;
++};
++
++struct nullb_queue {
++ unsigned long *tag_map;
++ wait_queue_head_t wait;
++ unsigned int queue_depth;
++ struct nullb_device *dev;
++ unsigned int requeue_selection;
++
++ struct nullb_cmd *cmds;
++};
++
++struct nullb_device {
++ struct nullb *nullb;
++ struct config_item item;
++ struct radix_tree_root data; /* data stored in the disk */
++ struct radix_tree_root cache; /* disk cache data */
++ unsigned long flags; /* device flags */
++ unsigned int curr_cache;
++ struct badblocks badblocks;
++
++ unsigned int nr_zones;
++ unsigned int nr_zones_imp_open;
++ unsigned int nr_zones_exp_open;
++ unsigned int nr_zones_closed;
++ struct blk_zone *zones;
++ sector_t zone_size_sects;
++ spinlock_t zone_lock;
++ unsigned long *zone_locks;
++
++ unsigned long size; /* device size in MB */
++ unsigned long completion_nsec; /* time in ns to complete a request */
++ unsigned long cache_size; /* disk cache size in MB */
++ unsigned long zone_size; /* zone size in MB if device is zoned */
++ unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
++ unsigned int zone_nr_conv; /* number of conventional zones */
++ unsigned int zone_max_open; /* max number of open zones */
++ unsigned int zone_max_active; /* max number of active zones */
++ unsigned int submit_queues; /* number of submission queues */
++ unsigned int home_node; /* home node for the device */
++ unsigned int queue_mode; /* block interface */
++ unsigned int blocksize; /* block size */
++ unsigned int irqmode; /* IRQ completion handler */
++ unsigned int hw_queue_depth; /* queue depth */
++ unsigned int index; /* index of the disk, only valid with a disk */
++ unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
++ bool blocking; /* blocking blk-mq device */
++ bool use_per_node_hctx; /* use per-node allocation for hardware context */
++ bool power; /* power on/off the device */
++ bool memory_backed; /* if data is stored in memory */
++ bool discard; /* if support discard */
++ bool zoned; /* if device is zoned */
++};
++
++struct nullb {
++ struct nullb_device *dev;
++ struct list_head list;
++ unsigned int index;
++ struct request_queue *q;
++ struct gendisk *disk;
++ struct blk_mq_tag_set *tag_set;
++ struct blk_mq_tag_set __tag_set;
++ unsigned int queue_depth;
++ atomic_long_t cur_bytes;
++ struct hrtimer bw_timer;
++ unsigned long cache_flush_pos;
++ spinlock_t lock;
++
++ struct nullb_queue *queues;
++ unsigned int nr_queues;
++ char disk_name[DISK_NAME_LEN];
++};
++
++blk_status_t null_process_cmd(struct nullb_cmd *cmd,
++ enum req_opf op, sector_t sector,
++ unsigned int nr_sectors);
++
++#ifdef CONFIG_BLK_DEV_ZONED
++int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
++int null_register_zoned_dev(struct nullb *nullb);
++void null_free_zoned_dev(struct nullb_device *dev);
++int null_report_zones(struct gendisk *disk, sector_t sector,
++ unsigned int nr_zones, report_zones_cb cb, void *data);
++blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
++ enum req_opf op, sector_t sector,
++ sector_t nr_sectors);
++size_t null_zone_valid_read_len(struct nullb *nullb,
++ sector_t sector, unsigned int len);
++#else
++static inline int null_init_zoned_dev(struct nullb_device *dev,
++ struct request_queue *q)
++{
++ pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
++ return -EINVAL;
++}
++static inline int null_register_zoned_dev(struct nullb *nullb)
++{
++ return -ENODEV;
++}
++static inline void null_free_zoned_dev(struct nullb_device *dev) {}
++static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
++ enum req_opf op, sector_t sector, sector_t nr_sectors)
++{
++ return BLK_STS_NOTSUPP;
++}
++static inline size_t null_zone_valid_read_len(struct nullb *nullb,
++ sector_t sector,
++ unsigned int len)
++{
++ return len;
++}
++#define null_report_zones NULL
++#endif /* CONFIG_BLK_DEV_ZONED */
++#endif /* __NULL_BLK_H */
+diff --git a/drivers/block/null_blk/trace.c b/drivers/block/null_blk/trace.c
+new file mode 100644
+index 0000000000000..3711cba160715
+--- /dev/null
++++ b/drivers/block/null_blk/trace.c
+@@ -0,0 +1,21 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * null_blk trace related helpers.
++ *
++ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
++ */
++#include "trace.h"
++
++/*
++ * Helper to use for all null_blk traces to extract disk name.
++ */
++const char *nullb_trace_disk_name(struct trace_seq *p, char *name)
++{
++ const char *ret = trace_seq_buffer_ptr(p);
++
++ if (name && *name)
++ trace_seq_printf(p, "disk=%s, ", name);
++ trace_seq_putc(p, 0);
++
++ return ret;
++}
+diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h
+new file mode 100644
+index 0000000000000..ce3b430e88c57
+--- /dev/null
++++ b/drivers/block/null_blk/trace.h
+@@ -0,0 +1,79 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * null_blk device driver tracepoints.
++ *
++ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
++ */
++
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM nullb
++
++#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_NULLB_H
++
++#include <linux/tracepoint.h>
++#include <linux/trace_seq.h>
++
++#include "null_blk.h"
++
++const char *nullb_trace_disk_name(struct trace_seq *p, char *name);
++
++#define __print_disk_name(name) nullb_trace_disk_name(p, name)
++
++#ifndef TRACE_HEADER_MULTI_READ
++static inline void __assign_disk_name(char *name, struct gendisk *disk)
++{
++ if (disk)
++ memcpy(name, disk->disk_name, DISK_NAME_LEN);
++ else
++ memset(name, 0, DISK_NAME_LEN);
++}
++#endif
++
++TRACE_EVENT(nullb_zone_op,
++ TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no,
++ unsigned int zone_cond),
++ TP_ARGS(cmd, zone_no, zone_cond),
++ TP_STRUCT__entry(
++ __array(char, disk, DISK_NAME_LEN)
++ __field(enum req_opf, op)
++ __field(unsigned int, zone_no)
++ __field(unsigned int, zone_cond)
++ ),
++ TP_fast_assign(
++ __entry->op = req_op(cmd->rq);
++ __entry->zone_no = zone_no;
++ __entry->zone_cond = zone_cond;
++ __assign_disk_name(__entry->disk, cmd->rq->rq_disk);
++ ),
++ TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
++ __print_disk_name(__entry->disk),
++ blk_op_str(__entry->op),
++ __entry->zone_no,
++ blk_zone_cond_str(__entry->zone_cond))
++);
++
++TRACE_EVENT(nullb_report_zones,
++ TP_PROTO(struct nullb *nullb, unsigned int nr_zones),
++ TP_ARGS(nullb, nr_zones),
++ TP_STRUCT__entry(
++ __array(char, disk, DISK_NAME_LEN)
++ __field(unsigned int, nr_zones)
++ ),
++ TP_fast_assign(
++ __entry->nr_zones = nr_zones;
++ __assign_disk_name(__entry->disk, nullb->disk);
++ ),
++ TP_printk("%s nr_zones=%u",
++ __print_disk_name(__entry->disk), __entry->nr_zones)
++);
++
++#endif /* _TRACE_NULLB_H */
++
++#undef TRACE_INCLUDE_PATH
++#define TRACE_INCLUDE_PATH .
++#undef TRACE_INCLUDE_FILE
++#define TRACE_INCLUDE_FILE trace
++
++/* This part must be outside protection */
++#include <trace/define_trace.h>
+diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
+new file mode 100644
+index 0000000000000..41220ce59659b
+--- /dev/null
++++ b/drivers/block/null_blk/zoned.c
+@@ -0,0 +1,617 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <linux/vmalloc.h>
++#include <linux/bitmap.h>
++#include "null_blk.h"
++
++#define CREATE_TRACE_POINTS
++#include "trace.h"
++
++#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT)
++
++static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
++{
++ return sect >> ilog2(dev->zone_size_sects);
++}
++
++int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
++{
++ sector_t dev_capacity_sects, zone_capacity_sects;
++ sector_t sector = 0;
++ unsigned int i;
++
++ if (!is_power_of_2(dev->zone_size)) {
++ pr_err("zone_size must be power-of-two\n");
++ return -EINVAL;
++ }
++ if (dev->zone_size > dev->size) {
++ pr_err("Zone size larger than device capacity\n");
++ return -EINVAL;
++ }
++
++ if (!dev->zone_capacity)
++ dev->zone_capacity = dev->zone_size;
++
++ if (dev->zone_capacity > dev->zone_size) {
++ pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
++ dev->zone_capacity, dev->zone_size);
++ return -EINVAL;
++ }
++
++ zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity);
++ dev_capacity_sects = MB_TO_SECTS(dev->size);
++ dev->zone_size_sects = MB_TO_SECTS(dev->zone_size);
++ dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects);
++ if (dev_capacity_sects & (dev->zone_size_sects - 1))
++ dev->nr_zones++;
++
++ dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
++ GFP_KERNEL | __GFP_ZERO);
++ if (!dev->zones)
++ return -ENOMEM;
++
++ /*
++ * With memory backing, the zone_lock spinlock needs to be temporarily
++ * released to avoid scheduling in atomic context. To guarantee zone
++ * information protection, use a bitmap to lock zones with
++ * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
++ * implies that the queue is marked with BLK_MQ_F_BLOCKING.
++ */
++ spin_lock_init(&dev->zone_lock);
++ if (dev->memory_backed) {
++ dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
++ if (!dev->zone_locks) {
++ kvfree(dev->zones);
++ return -ENOMEM;
++ }
++ }
++
++ if (dev->zone_nr_conv >= dev->nr_zones) {
++ dev->zone_nr_conv = dev->nr_zones - 1;
++ pr_info("changed the number of conventional zones to %u",
++ dev->zone_nr_conv);
++ }
++
++ /* Max active zones has to be < nbr of seq zones in order to be enforceable */
++ if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
++ dev->zone_max_active = 0;
++ pr_info("zone_max_active limit disabled, limit >= zone count\n");
++ }
++
++ /* Max open zones has to be <= max active zones */
++ if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
++ dev->zone_max_open = dev->zone_max_active;
++ pr_info("changed the maximum number of open zones to %u\n",
++ dev->nr_zones);
++ } else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
++ dev->zone_max_open = 0;
++ pr_info("zone_max_open limit disabled, limit >= zone count\n");
++ }
++
++ for (i = 0; i < dev->zone_nr_conv; i++) {
++ struct blk_zone *zone = &dev->zones[i];
++
++ zone->start = sector;
++ zone->len = dev->zone_size_sects;
++ zone->capacity = zone->len;
++ zone->wp = zone->start + zone->len;
++ zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
++ zone->cond = BLK_ZONE_COND_NOT_WP;
++
++ sector += dev->zone_size_sects;
++ }
++
++ for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
++ struct blk_zone *zone = &dev->zones[i];
++
++ zone->start = zone->wp = sector;
++ if (zone->start + dev->zone_size_sects > dev_capacity_sects)
++ zone->len = dev_capacity_sects - zone->start;
++ else
++ zone->len = dev->zone_size_sects;
++ zone->capacity =
++ min_t(sector_t, zone->len, zone_capacity_sects);
++ zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
++ zone->cond = BLK_ZONE_COND_EMPTY;
++
++ sector += dev->zone_size_sects;
++ }
++
++ q->limits.zoned = BLK_ZONED_HM;
++ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
++ blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
++
++ return 0;
++}
++
++int null_register_zoned_dev(struct nullb *nullb)
++{
++ struct nullb_device *dev = nullb->dev;
++ struct request_queue *q = nullb->q;
++
++ if (queue_is_mq(q)) {
++ int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
++
++ if (ret)
++ return ret;
++ } else {
++ blk_queue_chunk_sectors(q, dev->zone_size_sects);
++ q->nr_zones = blkdev_nr_zones(nullb->disk);
++ }
++
++ blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
++ blk_queue_max_open_zones(q, dev->zone_max_open);
++ blk_queue_max_active_zones(q, dev->zone_max_active);
++
++ return 0;
++}
++
++void null_free_zoned_dev(struct nullb_device *dev)
++{
++ bitmap_free(dev->zone_locks);
++ kvfree(dev->zones);
++ dev->zones = NULL;
++}
++
++static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
++{
++ if (dev->memory_backed)
++ wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
++ spin_lock_irq(&dev->zone_lock);
++}
++
++static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
++{
++ spin_unlock_irq(&dev->zone_lock);
++
++ if (dev->memory_backed)
++ clear_and_wake_up_bit(zno, dev->zone_locks);
++}
++
++int null_report_zones(struct gendisk *disk, sector_t sector,
++ unsigned int nr_zones, report_zones_cb cb, void *data)
++{
++ struct nullb *nullb = disk->private_data;
++ struct nullb_device *dev = nullb->dev;
++ unsigned int first_zone, i, zno;
++ struct blk_zone zone;
++ int error;
++
++ first_zone = null_zone_no(dev, sector);
++ if (first_zone >= dev->nr_zones)
++ return 0;
++
++ nr_zones = min(nr_zones, dev->nr_zones - first_zone);
++ trace_nullb_report_zones(nullb, nr_zones);
++
++ zno = first_zone;
++ for (i = 0; i < nr_zones; i++, zno++) {
++ /*
++ * Stacked DM target drivers will remap the zone information by
++ * modifying the zone information passed to the report callback.
++ * So use a local copy to avoid corruption of the device zone
++ * array.
++ */
++ null_lock_zone(dev, zno);
++ memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone));
++ null_unlock_zone(dev, zno);
++
++ error = cb(&zone, i, data);
++ if (error)
++ return error;
++ }
++
++ return nr_zones;
++}
++
++/*
++ * This is called in the case of memory backing from null_process_cmd()
++ * with the target zone already locked.
++ */
++size_t null_zone_valid_read_len(struct nullb *nullb,
++ sector_t sector, unsigned int len)
++{
++ struct nullb_device *dev = nullb->dev;
++ struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
++ unsigned int nr_sectors = len >> SECTOR_SHIFT;
++
++ /* Read must be below the write pointer position */
++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
++ sector + nr_sectors <= zone->wp)
++ return len;
++
++ if (sector > zone->wp)
++ return 0;
++
++ return (zone->wp - sector) << SECTOR_SHIFT;
++}
++
++static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone)
++{
++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
++ return BLK_STS_IOERR;
++
++ switch (zone->cond) {
++ case BLK_ZONE_COND_CLOSED:
++ /* close operation on closed is not an error */
++ return BLK_STS_OK;
++ case BLK_ZONE_COND_IMP_OPEN:
++ dev->nr_zones_imp_open--;
++ break;
++ case BLK_ZONE_COND_EXP_OPEN:
++ dev->nr_zones_exp_open--;
++ break;
++ case BLK_ZONE_COND_EMPTY:
++ case BLK_ZONE_COND_FULL:
++ default:
++ return BLK_STS_IOERR;
++ }
++
++ if (zone->wp == zone->start) {
++ zone->cond = BLK_ZONE_COND_EMPTY;
++ } else {
++ zone->cond = BLK_ZONE_COND_CLOSED;
++ dev->nr_zones_closed++;
++ }
++
++ return BLK_STS_OK;
++}
++
++static void null_close_first_imp_zone(struct nullb_device *dev)
++{
++ unsigned int i;
++
++ for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
++ if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) {
++ null_close_zone(dev, &dev->zones[i]);
++ return;
++ }
++ }
++}
++
++static blk_status_t null_check_active(struct nullb_device *dev)
++{
++ if (!dev->zone_max_active)
++ return BLK_STS_OK;
++
++ if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
++ dev->nr_zones_closed < dev->zone_max_active)
++ return BLK_STS_OK;
++
++ return BLK_STS_ZONE_ACTIVE_RESOURCE;
++}
++
++static blk_status_t null_check_open(struct nullb_device *dev)
++{
++ if (!dev->zone_max_open)
++ return BLK_STS_OK;
++
++ if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
++ return BLK_STS_OK;
++
++ if (dev->nr_zones_imp_open) {
++ if (null_check_active(dev) == BLK_STS_OK) {
++ null_close_first_imp_zone(dev);
++ return BLK_STS_OK;
++ }
++ }
++
++ return BLK_STS_ZONE_OPEN_RESOURCE;
++}
++
++/*
++ * This function matches the manage open zone resources function in the ZBC standard,
++ * with the addition of max active zones support (added in the ZNS standard).
++ *
++ * The function determines if a zone can transition to implicit open or explicit open,
++ * while maintaining the max open zone (and max active zone) limit(s). It may close an
++ * implicit open zone in order to make additional zone resources available.
++ *
++ * ZBC states that an implicit open zone shall be closed only if there is not
++ * room within the open limit. However, with the addition of an active limit,
++ * it is not certain that closing an implicit open zone will allow a new zone
++ * to be opened, since we might already be at the active limit capacity.
++ */
++static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
++{
++ blk_status_t ret;
++
++ switch (zone->cond) {
++ case BLK_ZONE_COND_EMPTY:
++ ret = null_check_active(dev);
++ if (ret != BLK_STS_OK)
++ return ret;
++ fallthrough;
++ case BLK_ZONE_COND_CLOSED:
++ return null_check_open(dev);
++ default:
++ /* Should never be called for other states */
++ WARN_ON(1);
++ return BLK_STS_IOERR;
++ }
++}
++
++static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
++ unsigned int nr_sectors, bool append)
++{
++ struct nullb_device *dev = cmd->nq->dev;
++ unsigned int zno = null_zone_no(dev, sector);
++ struct blk_zone *zone = &dev->zones[zno];
++ blk_status_t ret;
++
++ trace_nullb_zone_op(cmd, zno, zone->cond);
++
++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
++ if (append)
++ return BLK_STS_IOERR;
++ return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
++ }
++
++ null_lock_zone(dev, zno);
++
++ switch (zone->cond) {
++ case BLK_ZONE_COND_FULL:
++ /* Cannot write to a full zone */
++ ret = BLK_STS_IOERR;
++ goto unlock;
++ case BLK_ZONE_COND_EMPTY:
++ case BLK_ZONE_COND_CLOSED:
++ ret = null_check_zone_resources(dev, zone);
++ if (ret != BLK_STS_OK)
++ goto unlock;
++ break;
++ case BLK_ZONE_COND_IMP_OPEN:
++ case BLK_ZONE_COND_EXP_OPEN:
++ break;
++ default:
++ /* Invalid zone condition */
++ ret = BLK_STS_IOERR;
++ goto unlock;
++ }
++
++ /*
++ * Regular writes must be at the write pointer position.
++ * Zone append writes are automatically issued at the write
++ * pointer and the position returned using the request or BIO
++ * sector.
++ */
++ if (append) {
++ sector = zone->wp;
++ if (cmd->bio)
++ cmd->bio->bi_iter.bi_sector = sector;
++ else
++ cmd->rq->__sector = sector;
++ } else if (sector != zone->wp) {
++ ret = BLK_STS_IOERR;
++ goto unlock;
++ }
++
++ if (zone->wp + nr_sectors > zone->start + zone->capacity) {
++ ret = BLK_STS_IOERR;
++ goto unlock;
++ }
++
++ if (zone->cond == BLK_ZONE_COND_CLOSED) {
++ dev->nr_zones_closed--;
++ dev->nr_zones_imp_open++;
++ } else if (zone->cond == BLK_ZONE_COND_EMPTY) {
++ dev->nr_zones_imp_open++;
++ }
++ if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
++ zone->cond = BLK_ZONE_COND_IMP_OPEN;
++
++ /*
++ * Memory backing allocation may sleep: release the zone_lock spinlock
++ * to avoid scheduling in atomic context. Zone operation atomicity is
++ * still guaranteed through the zone_locks bitmap.
++ */
++ if (dev->memory_backed)
++ spin_unlock_irq(&dev->zone_lock);
++ ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
++ if (dev->memory_backed)
++ spin_lock_irq(&dev->zone_lock);
++
++ if (ret != BLK_STS_OK)
++ goto unlock;
++
++ zone->wp += nr_sectors;
++ if (zone->wp == zone->start + zone->capacity) {
++ if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
++ dev->nr_zones_exp_open--;
++ else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
++ dev->nr_zones_imp_open--;
++ zone->cond = BLK_ZONE_COND_FULL;
++ }
++ ret = BLK_STS_OK;
++
++unlock:
++ null_unlock_zone(dev, zno);
++
++ return ret;
++}
++
++static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
++{
++ blk_status_t ret;
++
++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
++ return BLK_STS_IOERR;
++
++ switch (zone->cond) {
++ case BLK_ZONE_COND_EXP_OPEN:
++ /* open operation on exp open is not an error */
++ return BLK_STS_OK;
++ case BLK_ZONE_COND_EMPTY:
++ ret = null_check_zone_resources(dev, zone);
++ if (ret != BLK_STS_OK)
++ return ret;
++ break;
++ case BLK_ZONE_COND_IMP_OPEN:
++ dev->nr_zones_imp_open--;
++ break;
++ case BLK_ZONE_COND_CLOSED:
++ ret = null_check_zone_resources(dev, zone);
++ if (ret != BLK_STS_OK)
++ return ret;
++ dev->nr_zones_closed--;
++ break;
++ case BLK_ZONE_COND_FULL:
++ default:
++ return BLK_STS_IOERR;
++ }
++
++ zone->cond = BLK_ZONE_COND_EXP_OPEN;
++ dev->nr_zones_exp_open++;
++
++ return BLK_STS_OK;
++}
++
++static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
++{
++ blk_status_t ret;
++
++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
++ return BLK_STS_IOERR;
++
++ switch (zone->cond) {
++ case BLK_ZONE_COND_FULL:
++ /* finish operation on full is not an error */
++ return BLK_STS_OK;
++ case BLK_ZONE_COND_EMPTY:
++ ret = null_check_zone_resources(dev, zone);
++ if (ret != BLK_STS_OK)
++ return ret;
++ break;
++ case BLK_ZONE_COND_IMP_OPEN:
++ dev->nr_zones_imp_open--;
++ break;
++ case BLK_ZONE_COND_EXP_OPEN:
++ dev->nr_zones_exp_open--;
++ break;
++ case BLK_ZONE_COND_CLOSED:
++ ret = null_check_zone_resources(dev, zone);
++ if (ret != BLK_STS_OK)
++ return ret;
++ dev->nr_zones_closed--;
++ break;
++ default:
++ return BLK_STS_IOERR;
++ }
++
++ zone->cond = BLK_ZONE_COND_FULL;
++ zone->wp = zone->start + zone->len;
++
++ return BLK_STS_OK;
++}
++
++static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone)
++{
++ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
++ return BLK_STS_IOERR;
++
++ switch (zone->cond) {
++ case BLK_ZONE_COND_EMPTY:
++ /* reset operation on empty is not an error */
++ return BLK_STS_OK;
++ case BLK_ZONE_COND_IMP_OPEN:
++ dev->nr_zones_imp_open--;
++ break;
++ case BLK_ZONE_COND_EXP_OPEN:
++ dev->nr_zones_exp_open--;
++ break;
++ case BLK_ZONE_COND_CLOSED:
++ dev->nr_zones_closed--;
++ break;
++ case BLK_ZONE_COND_FULL:
++ break;
++ default:
++ return BLK_STS_IOERR;
++ }
++
++ zone->cond = BLK_ZONE_COND_EMPTY;
++ zone->wp = zone->start;
++
++ return BLK_STS_OK;
++}
++
++static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
++ sector_t sector)
++{
++ struct nullb_device *dev = cmd->nq->dev;
++ unsigned int zone_no;
++ struct blk_zone *zone;
++ blk_status_t ret;
++ size_t i;
++
++ if (op == REQ_OP_ZONE_RESET_ALL) {
++ for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
++ null_lock_zone(dev, i);
++ zone = &dev->zones[i];
++ if (zone->cond != BLK_ZONE_COND_EMPTY) {
++ null_reset_zone(dev, zone);
++ trace_nullb_zone_op(cmd, i, zone->cond);
++ }
++ null_unlock_zone(dev, i);
++ }
++ return BLK_STS_OK;
++ }
++
++ zone_no = null_zone_no(dev, sector);
++ zone = &dev->zones[zone_no];
++
++ null_lock_zone(dev, zone_no);
++
++ switch (op) {
++ case REQ_OP_ZONE_RESET:
++ ret = null_reset_zone(dev, zone);
++ break;
++ case REQ_OP_ZONE_OPEN:
++ ret = null_open_zone(dev, zone);
++ break;
++ case REQ_OP_ZONE_CLOSE:
++ ret = null_close_zone(dev, zone);
++ break;
++ case REQ_OP_ZONE_FINISH:
++ ret = null_finish_zone(dev, zone);
++ break;
++ default:
++ ret = BLK_STS_NOTSUPP;
++ break;
++ }
++
++ if (ret == BLK_STS_OK)
++ trace_nullb_zone_op(cmd, zone_no, zone->cond);
++
++ null_unlock_zone(dev, zone_no);
++
++ return ret;
++}
++
++blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
++ sector_t sector, sector_t nr_sectors)
++{
++ struct nullb_device *dev = cmd->nq->dev;
++ unsigned int zno = null_zone_no(dev, sector);
++ blk_status_t sts;
++
++ switch (op) {
++ case REQ_OP_WRITE:
++ sts = null_zone_write(cmd, sector, nr_sectors, false);
++ break;
++ case REQ_OP_ZONE_APPEND:
++ sts = null_zone_write(cmd, sector, nr_sectors, true);
++ break;
++ case REQ_OP_ZONE_RESET:
++ case REQ_OP_ZONE_RESET_ALL:
++ case REQ_OP_ZONE_OPEN:
++ case REQ_OP_ZONE_CLOSE:
++ case REQ_OP_ZONE_FINISH:
++ sts = null_zone_mgmt(cmd, op, sector);
++ break;
++ default:
++ null_lock_zone(dev, zno);
++ sts = null_process_cmd(cmd, op, sector, nr_sectors);
++ null_unlock_zone(dev, zno);
++ }
++
++ return sts;
++}
+diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
+deleted file mode 100644
+index c6ba8f9f3f311..0000000000000
+--- a/drivers/block/null_blk_main.c
++++ /dev/null
+@@ -1,2036 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
+- * Shaohua Li <shli@fb.com>
+- */
+-#include <linux/module.h>
+-
+-#include <linux/moduleparam.h>
+-#include <linux/sched.h>
+-#include <linux/fs.h>
+-#include <linux/init.h>
+-#include "null_blk.h"
+-
+-#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+-#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+-#define SECTOR_MASK (PAGE_SECTORS - 1)
+-
+-#define FREE_BATCH 16
+-
+-#define TICKS_PER_SEC 50ULL
+-#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
+-
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-static DECLARE_FAULT_ATTR(null_timeout_attr);
+-static DECLARE_FAULT_ATTR(null_requeue_attr);
+-static DECLARE_FAULT_ATTR(null_init_hctx_attr);
+-#endif
+-
+-static inline u64 mb_per_tick(int mbps)
+-{
+- return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
+-}
+-
+-/*
+- * Status flags for nullb_device.
+- *
+- * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
+- * UP: Device is currently on and visible in userspace.
+- * THROTTLED: Device is being throttled.
+- * CACHE: Device is using a write-back cache.
+- */
+-enum nullb_device_flags {
+- NULLB_DEV_FL_CONFIGURED = 0,
+- NULLB_DEV_FL_UP = 1,
+- NULLB_DEV_FL_THROTTLED = 2,
+- NULLB_DEV_FL_CACHE = 3,
+-};
+-
+-#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
+-/*
+- * nullb_page is a page in memory for nullb devices.
+- *
+- * @page: The page holding the data.
+- * @bitmap: The bitmap represents which sector in the page has data.
+- * Each bit represents one block size. For example, sector 8
+- * will use the 7th bit
+- * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
+- * page is being flushing to storage. FREE means the cache page is freed and
+- * should be skipped from flushing to storage. Please see
+- * null_make_cache_space
+- */
+-struct nullb_page {
+- struct page *page;
+- DECLARE_BITMAP(bitmap, MAP_SZ);
+-};
+-#define NULLB_PAGE_LOCK (MAP_SZ - 1)
+-#define NULLB_PAGE_FREE (MAP_SZ - 2)
+-
+-static LIST_HEAD(nullb_list);
+-static struct mutex lock;
+-static int null_major;
+-static DEFINE_IDA(nullb_indexes);
+-static struct blk_mq_tag_set tag_set;
+-
+-enum {
+- NULL_IRQ_NONE = 0,
+- NULL_IRQ_SOFTIRQ = 1,
+- NULL_IRQ_TIMER = 2,
+-};
+-
+-enum {
+- NULL_Q_BIO = 0,
+- NULL_Q_RQ = 1,
+- NULL_Q_MQ = 2,
+-};
+-
+-static int g_no_sched;
+-module_param_named(no_sched, g_no_sched, int, 0444);
+-MODULE_PARM_DESC(no_sched, "No io scheduler");
+-
+-static int g_submit_queues = 1;
+-module_param_named(submit_queues, g_submit_queues, int, 0444);
+-MODULE_PARM_DESC(submit_queues, "Number of submission queues");
+-
+-static int g_home_node = NUMA_NO_NODE;
+-module_param_named(home_node, g_home_node, int, 0444);
+-MODULE_PARM_DESC(home_node, "Home node for the device");
+-
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-/*
+- * For more details about fault injection, please refer to
+- * Documentation/fault-injection/fault-injection.rst.
+- */
+-static char g_timeout_str[80];
+-module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
+-MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
+-
+-static char g_requeue_str[80];
+-module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
+-MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
+-
+-static char g_init_hctx_str[80];
+-module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
+-MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
+-#endif
+-
+-static int g_queue_mode = NULL_Q_MQ;
+-
+-static int null_param_store_val(const char *str, int *val, int min, int max)
+-{
+- int ret, new_val;
+-
+- ret = kstrtoint(str, 10, &new_val);
+- if (ret)
+- return -EINVAL;
+-
+- if (new_val < min || new_val > max)
+- return -EINVAL;
+-
+- *val = new_val;
+- return 0;
+-}
+-
+-static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
+-{
+- return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
+-}
+-
+-static const struct kernel_param_ops null_queue_mode_param_ops = {
+- .set = null_set_queue_mode,
+- .get = param_get_int,
+-};
+-
+-device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
+-MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
+-
+-static int g_gb = 250;
+-module_param_named(gb, g_gb, int, 0444);
+-MODULE_PARM_DESC(gb, "Size in GB");
+-
+-static int g_bs = 512;
+-module_param_named(bs, g_bs, int, 0444);
+-MODULE_PARM_DESC(bs, "Block size (in bytes)");
+-
+-static unsigned int nr_devices = 1;
+-module_param(nr_devices, uint, 0444);
+-MODULE_PARM_DESC(nr_devices, "Number of devices to register");
+-
+-static bool g_blocking;
+-module_param_named(blocking, g_blocking, bool, 0444);
+-MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
+-
+-static bool shared_tags;
+-module_param(shared_tags, bool, 0444);
+-MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
+-
+-static bool g_shared_tag_bitmap;
+-module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
+-MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
+-
+-static int g_irqmode = NULL_IRQ_SOFTIRQ;
+-
+-static int null_set_irqmode(const char *str, const struct kernel_param *kp)
+-{
+- return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
+- NULL_IRQ_TIMER);
+-}
+-
+-static const struct kernel_param_ops null_irqmode_param_ops = {
+- .set = null_set_irqmode,
+- .get = param_get_int,
+-};
+-
+-device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
+-MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
+-
+-static unsigned long g_completion_nsec = 10000;
+-module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
+-MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
+-
+-static int g_hw_queue_depth = 64;
+-module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
+-MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
+-
+-static bool g_use_per_node_hctx;
+-module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
+-MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
+-
+-static bool g_zoned;
+-module_param_named(zoned, g_zoned, bool, S_IRUGO);
+-MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
+-
+-static unsigned long g_zone_size = 256;
+-module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
+-MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
+-
+-static unsigned long g_zone_capacity;
+-module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
+-MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
+-
+-static unsigned int g_zone_nr_conv;
+-module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
+-MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
+-
+-static unsigned int g_zone_max_open;
+-module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
+-MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
+-
+-static unsigned int g_zone_max_active;
+-module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
+-MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
+-
+-static struct nullb_device *null_alloc_dev(void);
+-static void null_free_dev(struct nullb_device *dev);
+-static void null_del_dev(struct nullb *nullb);
+-static int null_add_dev(struct nullb_device *dev);
+-static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
+-
+-static inline struct nullb_device *to_nullb_device(struct config_item *item)
+-{
+- return item ? container_of(item, struct nullb_device, item) : NULL;
+-}
+-
+-static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
+-{
+- return snprintf(page, PAGE_SIZE, "%u\n", val);
+-}
+-
+-static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
+- char *page)
+-{
+- return snprintf(page, PAGE_SIZE, "%lu\n", val);
+-}
+-
+-static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
+-{
+- return snprintf(page, PAGE_SIZE, "%u\n", val);
+-}
+-
+-static ssize_t nullb_device_uint_attr_store(unsigned int *val,
+- const char *page, size_t count)
+-{
+- unsigned int tmp;
+- int result;
+-
+- result = kstrtouint(page, 0, &tmp);
+- if (result < 0)
+- return result;
+-
+- *val = tmp;
+- return count;
+-}
+-
+-static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
+- const char *page, size_t count)
+-{
+- int result;
+- unsigned long tmp;
+-
+- result = kstrtoul(page, 0, &tmp);
+- if (result < 0)
+- return result;
+-
+- *val = tmp;
+- return count;
+-}
+-
+-static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
+- size_t count)
+-{
+- bool tmp;
+- int result;
+-
+- result = kstrtobool(page, &tmp);
+- if (result < 0)
+- return result;
+-
+- *val = tmp;
+- return count;
+-}
+-
+-/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
+-#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
+-static ssize_t \
+-nullb_device_##NAME##_show(struct config_item *item, char *page) \
+-{ \
+- return nullb_device_##TYPE##_attr_show( \
+- to_nullb_device(item)->NAME, page); \
+-} \
+-static ssize_t \
+-nullb_device_##NAME##_store(struct config_item *item, const char *page, \
+- size_t count) \
+-{ \
+- int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
+- struct nullb_device *dev = to_nullb_device(item); \
+- TYPE new_value = 0; \
+- int ret; \
+- \
+- ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
+- if (ret < 0) \
+- return ret; \
+- if (apply_fn) \
+- ret = apply_fn(dev, new_value); \
+- else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
+- ret = -EBUSY; \
+- if (ret < 0) \
+- return ret; \
+- dev->NAME = new_value; \
+- return count; \
+-} \
+-CONFIGFS_ATTR(nullb_device_, NAME);
+-
+-static int nullb_apply_submit_queues(struct nullb_device *dev,
+- unsigned int submit_queues)
+-{
+- struct nullb *nullb = dev->nullb;
+- struct blk_mq_tag_set *set;
+-
+- if (!nullb)
+- return 0;
+-
+- /*
+- * Make sure that null_init_hctx() does not access nullb->queues[] past
+- * the end of that array.
+- */
+- if (submit_queues > nr_cpu_ids)
+- return -EINVAL;
+- set = nullb->tag_set;
+- blk_mq_update_nr_hw_queues(set, submit_queues);
+- return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
+-}
+-
+-NULLB_DEVICE_ATTR(size, ulong, NULL);
+-NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
+-NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
+-NULLB_DEVICE_ATTR(home_node, uint, NULL);
+-NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
+-NULLB_DEVICE_ATTR(blocksize, uint, NULL);
+-NULLB_DEVICE_ATTR(irqmode, uint, NULL);
+-NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
+-NULLB_DEVICE_ATTR(index, uint, NULL);
+-NULLB_DEVICE_ATTR(blocking, bool, NULL);
+-NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
+-NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
+-NULLB_DEVICE_ATTR(discard, bool, NULL);
+-NULLB_DEVICE_ATTR(mbps, uint, NULL);
+-NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
+-NULLB_DEVICE_ATTR(zoned, bool, NULL);
+-NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
+-NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
+-NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
+-NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
+-NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
+-
+-static ssize_t nullb_device_power_show(struct config_item *item, char *page)
+-{
+- return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
+-}
+-
+-static ssize_t nullb_device_power_store(struct config_item *item,
+- const char *page, size_t count)
+-{
+- struct nullb_device *dev = to_nullb_device(item);
+- bool newp = false;
+- ssize_t ret;
+-
+- ret = nullb_device_bool_attr_store(&newp, page, count);
+- if (ret < 0)
+- return ret;
+-
+- if (!dev->power && newp) {
+- if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
+- return count;
+- if (null_add_dev(dev)) {
+- clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+- return -ENOMEM;
+- }
+-
+- set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+- dev->power = newp;
+- } else if (dev->power && !newp) {
+- if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
+- mutex_lock(&lock);
+- dev->power = newp;
+- null_del_dev(dev->nullb);
+- mutex_unlock(&lock);
+- }
+- clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+- }
+-
+- return count;
+-}
+-
+-CONFIGFS_ATTR(nullb_device_, power);
+-
+-static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
+-{
+- struct nullb_device *t_dev = to_nullb_device(item);
+-
+- return badblocks_show(&t_dev->badblocks, page, 0);
+-}
+-
+-static ssize_t nullb_device_badblocks_store(struct config_item *item,
+- const char *page, size_t count)
+-{
+- struct nullb_device *t_dev = to_nullb_device(item);
+- char *orig, *buf, *tmp;
+- u64 start, end;
+- int ret;
+-
+- orig = kstrndup(page, count, GFP_KERNEL);
+- if (!orig)
+- return -ENOMEM;
+-
+- buf = strstrip(orig);
+-
+- ret = -EINVAL;
+- if (buf[0] != '+' && buf[0] != '-')
+- goto out;
+- tmp = strchr(&buf[1], '-');
+- if (!tmp)
+- goto out;
+- *tmp = '\0';
+- ret = kstrtoull(buf + 1, 0, &start);
+- if (ret)
+- goto out;
+- ret = kstrtoull(tmp + 1, 0, &end);
+- if (ret)
+- goto out;
+- ret = -EINVAL;
+- if (start > end)
+- goto out;
+- /* enable badblocks */
+- cmpxchg(&t_dev->badblocks.shift, -1, 0);
+- if (buf[0] == '+')
+- ret = badblocks_set(&t_dev->badblocks, start,
+- end - start + 1, 1);
+- else
+- ret = badblocks_clear(&t_dev->badblocks, start,
+- end - start + 1);
+- if (ret == 0)
+- ret = count;
+-out:
+- kfree(orig);
+- return ret;
+-}
+-CONFIGFS_ATTR(nullb_device_, badblocks);
+-
+-static struct configfs_attribute *nullb_device_attrs[] = {
+- &nullb_device_attr_size,
+- &nullb_device_attr_completion_nsec,
+- &nullb_device_attr_submit_queues,
+- &nullb_device_attr_home_node,
+- &nullb_device_attr_queue_mode,
+- &nullb_device_attr_blocksize,
+- &nullb_device_attr_irqmode,
+- &nullb_device_attr_hw_queue_depth,
+- &nullb_device_attr_index,
+- &nullb_device_attr_blocking,
+- &nullb_device_attr_use_per_node_hctx,
+- &nullb_device_attr_power,
+- &nullb_device_attr_memory_backed,
+- &nullb_device_attr_discard,
+- &nullb_device_attr_mbps,
+- &nullb_device_attr_cache_size,
+- &nullb_device_attr_badblocks,
+- &nullb_device_attr_zoned,
+- &nullb_device_attr_zone_size,
+- &nullb_device_attr_zone_capacity,
+- &nullb_device_attr_zone_nr_conv,
+- &nullb_device_attr_zone_max_open,
+- &nullb_device_attr_zone_max_active,
+- NULL,
+-};
+-
+-static void nullb_device_release(struct config_item *item)
+-{
+- struct nullb_device *dev = to_nullb_device(item);
+-
+- null_free_device_storage(dev, false);
+- null_free_dev(dev);
+-}
+-
+-static struct configfs_item_operations nullb_device_ops = {
+- .release = nullb_device_release,
+-};
+-
+-static const struct config_item_type nullb_device_type = {
+- .ct_item_ops = &nullb_device_ops,
+- .ct_attrs = nullb_device_attrs,
+- .ct_owner = THIS_MODULE,
+-};
+-
+-static struct
+-config_item *nullb_group_make_item(struct config_group *group, const char *name)
+-{
+- struct nullb_device *dev;
+-
+- dev = null_alloc_dev();
+- if (!dev)
+- return ERR_PTR(-ENOMEM);
+-
+- config_item_init_type_name(&dev->item, name, &nullb_device_type);
+-
+- return &dev->item;
+-}
+-
+-static void
+-nullb_group_drop_item(struct config_group *group, struct config_item *item)
+-{
+- struct nullb_device *dev = to_nullb_device(item);
+-
+- if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
+- mutex_lock(&lock);
+- dev->power = false;
+- null_del_dev(dev->nullb);
+- mutex_unlock(&lock);
+- }
+-
+- config_item_put(item);
+-}
+-
+-static ssize_t memb_group_features_show(struct config_item *item, char *page)
+-{
+- return snprintf(page, PAGE_SIZE,
+- "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n");
+-}
+-
+-CONFIGFS_ATTR_RO(memb_group_, features);
+-
+-static struct configfs_attribute *nullb_group_attrs[] = {
+- &memb_group_attr_features,
+- NULL,
+-};
+-
+-static struct configfs_group_operations nullb_group_ops = {
+- .make_item = nullb_group_make_item,
+- .drop_item = nullb_group_drop_item,
+-};
+-
+-static const struct config_item_type nullb_group_type = {
+- .ct_group_ops = &nullb_group_ops,
+- .ct_attrs = nullb_group_attrs,
+- .ct_owner = THIS_MODULE,
+-};
+-
+-static struct configfs_subsystem nullb_subsys = {
+- .su_group = {
+- .cg_item = {
+- .ci_namebuf = "nullb",
+- .ci_type = &nullb_group_type,
+- },
+- },
+-};
+-
+-static inline int null_cache_active(struct nullb *nullb)
+-{
+- return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+-}
+-
+-static struct nullb_device *null_alloc_dev(void)
+-{
+- struct nullb_device *dev;
+-
+- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+- if (!dev)
+- return NULL;
+- INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
+- INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
+- if (badblocks_init(&dev->badblocks, 0)) {
+- kfree(dev);
+- return NULL;
+- }
+-
+- dev->size = g_gb * 1024;
+- dev->completion_nsec = g_completion_nsec;
+- dev->submit_queues = g_submit_queues;
+- dev->home_node = g_home_node;
+- dev->queue_mode = g_queue_mode;
+- dev->blocksize = g_bs;
+- dev->irqmode = g_irqmode;
+- dev->hw_queue_depth = g_hw_queue_depth;
+- dev->blocking = g_blocking;
+- dev->use_per_node_hctx = g_use_per_node_hctx;
+- dev->zoned = g_zoned;
+- dev->zone_size = g_zone_size;
+- dev->zone_capacity = g_zone_capacity;
+- dev->zone_nr_conv = g_zone_nr_conv;
+- dev->zone_max_open = g_zone_max_open;
+- dev->zone_max_active = g_zone_max_active;
+- return dev;
+-}
+-
+-static void null_free_dev(struct nullb_device *dev)
+-{
+- if (!dev)
+- return;
+-
+- null_free_zoned_dev(dev);
+- badblocks_exit(&dev->badblocks);
+- kfree(dev);
+-}
+-
+-static void put_tag(struct nullb_queue *nq, unsigned int tag)
+-{
+- clear_bit_unlock(tag, nq->tag_map);
+-
+- if (waitqueue_active(&nq->wait))
+- wake_up(&nq->wait);
+-}
+-
+-static unsigned int get_tag(struct nullb_queue *nq)
+-{
+- unsigned int tag;
+-
+- do {
+- tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
+- if (tag >= nq->queue_depth)
+- return -1U;
+- } while (test_and_set_bit_lock(tag, nq->tag_map));
+-
+- return tag;
+-}
+-
+-static void free_cmd(struct nullb_cmd *cmd)
+-{
+- put_tag(cmd->nq, cmd->tag);
+-}
+-
+-static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
+-
+-static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
+-{
+- struct nullb_cmd *cmd;
+- unsigned int tag;
+-
+- tag = get_tag(nq);
+- if (tag != -1U) {
+- cmd = &nq->cmds[tag];
+- cmd->tag = tag;
+- cmd->error = BLK_STS_OK;
+- cmd->nq = nq;
+- if (nq->dev->irqmode == NULL_IRQ_TIMER) {
+- hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
+- HRTIMER_MODE_REL);
+- cmd->timer.function = null_cmd_timer_expired;
+- }
+- return cmd;
+- }
+-
+- return NULL;
+-}
+-
+-static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
+-{
+- struct nullb_cmd *cmd;
+- DEFINE_WAIT(wait);
+-
+- cmd = __alloc_cmd(nq);
+- if (cmd || !can_wait)
+- return cmd;
+-
+- do {
+- prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
+- cmd = __alloc_cmd(nq);
+- if (cmd)
+- break;
+-
+- io_schedule();
+- } while (1);
+-
+- finish_wait(&nq->wait, &wait);
+- return cmd;
+-}
+-
+-static void end_cmd(struct nullb_cmd *cmd)
+-{
+- int queue_mode = cmd->nq->dev->queue_mode;
+-
+- switch (queue_mode) {
+- case NULL_Q_MQ:
+- blk_mq_end_request(cmd->rq, cmd->error);
+- return;
+- case NULL_Q_BIO:
+- cmd->bio->bi_status = cmd->error;
+- bio_endio(cmd->bio);
+- break;
+- }
+-
+- free_cmd(cmd);
+-}
+-
+-static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
+-{
+- end_cmd(container_of(timer, struct nullb_cmd, timer));
+-
+- return HRTIMER_NORESTART;
+-}
+-
+-static void null_cmd_end_timer(struct nullb_cmd *cmd)
+-{
+- ktime_t kt = cmd->nq->dev->completion_nsec;
+-
+- hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
+-}
+-
+-static void null_complete_rq(struct request *rq)
+-{
+- end_cmd(blk_mq_rq_to_pdu(rq));
+-}
+-
+-static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
+-{
+- struct nullb_page *t_page;
+-
+- t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
+- if (!t_page)
+- goto out;
+-
+- t_page->page = alloc_pages(gfp_flags, 0);
+- if (!t_page->page)
+- goto out_freepage;
+-
+- memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
+- return t_page;
+-out_freepage:
+- kfree(t_page);
+-out:
+- return NULL;
+-}
+-
+-static void null_free_page(struct nullb_page *t_page)
+-{
+- __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
+- if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
+- return;
+- __free_page(t_page->page);
+- kfree(t_page);
+-}
+-
+-static bool null_page_empty(struct nullb_page *page)
+-{
+- int size = MAP_SZ - 2;
+-
+- return find_first_bit(page->bitmap, size) == size;
+-}
+-
+-static void null_free_sector(struct nullb *nullb, sector_t sector,
+- bool is_cache)
+-{
+- unsigned int sector_bit;
+- u64 idx;
+- struct nullb_page *t_page, *ret;
+- struct radix_tree_root *root;
+-
+- root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+- idx = sector >> PAGE_SECTORS_SHIFT;
+- sector_bit = (sector & SECTOR_MASK);
+-
+- t_page = radix_tree_lookup(root, idx);
+- if (t_page) {
+- __clear_bit(sector_bit, t_page->bitmap);
+-
+- if (null_page_empty(t_page)) {
+- ret = radix_tree_delete_item(root, idx, t_page);
+- WARN_ON(ret != t_page);
+- null_free_page(ret);
+- if (is_cache)
+- nullb->dev->curr_cache -= PAGE_SIZE;
+- }
+- }
+-}
+-
+-static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
+- struct nullb_page *t_page, bool is_cache)
+-{
+- struct radix_tree_root *root;
+-
+- root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+-
+- if (radix_tree_insert(root, idx, t_page)) {
+- null_free_page(t_page);
+- t_page = radix_tree_lookup(root, idx);
+- WARN_ON(!t_page || t_page->page->index != idx);
+- } else if (is_cache)
+- nullb->dev->curr_cache += PAGE_SIZE;
+-
+- return t_page;
+-}
+-
+-static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
+-{
+- unsigned long pos = 0;
+- int nr_pages;
+- struct nullb_page *ret, *t_pages[FREE_BATCH];
+- struct radix_tree_root *root;
+-
+- root = is_cache ? &dev->cache : &dev->data;
+-
+- do {
+- int i;
+-
+- nr_pages = radix_tree_gang_lookup(root,
+- (void **)t_pages, pos, FREE_BATCH);
+-
+- for (i = 0; i < nr_pages; i++) {
+- pos = t_pages[i]->page->index;
+- ret = radix_tree_delete_item(root, pos, t_pages[i]);
+- WARN_ON(ret != t_pages[i]);
+- null_free_page(ret);
+- }
+-
+- pos++;
+- } while (nr_pages == FREE_BATCH);
+-
+- if (is_cache)
+- dev->curr_cache = 0;
+-}
+-
+-static struct nullb_page *__null_lookup_page(struct nullb *nullb,
+- sector_t sector, bool for_write, bool is_cache)
+-{
+- unsigned int sector_bit;
+- u64 idx;
+- struct nullb_page *t_page;
+- struct radix_tree_root *root;
+-
+- idx = sector >> PAGE_SECTORS_SHIFT;
+- sector_bit = (sector & SECTOR_MASK);
+-
+- root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+- t_page = radix_tree_lookup(root, idx);
+- WARN_ON(t_page && t_page->page->index != idx);
+-
+- if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
+- return t_page;
+-
+- return NULL;
+-}
+-
+-static struct nullb_page *null_lookup_page(struct nullb *nullb,
+- sector_t sector, bool for_write, bool ignore_cache)
+-{
+- struct nullb_page *page = NULL;
+-
+- if (!ignore_cache)
+- page = __null_lookup_page(nullb, sector, for_write, true);
+- if (page)
+- return page;
+- return __null_lookup_page(nullb, sector, for_write, false);
+-}
+-
+-static struct nullb_page *null_insert_page(struct nullb *nullb,
+- sector_t sector, bool ignore_cache)
+- __releases(&nullb->lock)
+- __acquires(&nullb->lock)
+-{
+- u64 idx;
+- struct nullb_page *t_page;
+-
+- t_page = null_lookup_page(nullb, sector, true, ignore_cache);
+- if (t_page)
+- return t_page;
+-
+- spin_unlock_irq(&nullb->lock);
+-
+- t_page = null_alloc_page(GFP_NOIO);
+- if (!t_page)
+- goto out_lock;
+-
+- if (radix_tree_preload(GFP_NOIO))
+- goto out_freepage;
+-
+- spin_lock_irq(&nullb->lock);
+- idx = sector >> PAGE_SECTORS_SHIFT;
+- t_page->page->index = idx;
+- t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
+- radix_tree_preload_end();
+-
+- return t_page;
+-out_freepage:
+- null_free_page(t_page);
+-out_lock:
+- spin_lock_irq(&nullb->lock);
+- return null_lookup_page(nullb, sector, true, ignore_cache);
+-}
+-
+-static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
+-{
+- int i;
+- unsigned int offset;
+- u64 idx;
+- struct nullb_page *t_page, *ret;
+- void *dst, *src;
+-
+- idx = c_page->page->index;
+-
+- t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
+-
+- __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
+- if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
+- null_free_page(c_page);
+- if (t_page && null_page_empty(t_page)) {
+- ret = radix_tree_delete_item(&nullb->dev->data,
+- idx, t_page);
+- null_free_page(t_page);
+- }
+- return 0;
+- }
+-
+- if (!t_page)
+- return -ENOMEM;
+-
+- src = kmap_atomic(c_page->page);
+- dst = kmap_atomic(t_page->page);
+-
+- for (i = 0; i < PAGE_SECTORS;
+- i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
+- if (test_bit(i, c_page->bitmap)) {
+- offset = (i << SECTOR_SHIFT);
+- memcpy(dst + offset, src + offset,
+- nullb->dev->blocksize);
+- __set_bit(i, t_page->bitmap);
+- }
+- }
+-
+- kunmap_atomic(dst);
+- kunmap_atomic(src);
+-
+- ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
+- null_free_page(ret);
+- nullb->dev->curr_cache -= PAGE_SIZE;
+-
+- return 0;
+-}
+-
+-static int null_make_cache_space(struct nullb *nullb, unsigned long n)
+-{
+- int i, err, nr_pages;
+- struct nullb_page *c_pages[FREE_BATCH];
+- unsigned long flushed = 0, one_round;
+-
+-again:
+- if ((nullb->dev->cache_size * 1024 * 1024) >
+- nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
+- return 0;
+-
+- nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
+- (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
+- /*
+- * nullb_flush_cache_page could unlock before using the c_pages. To
+- * avoid race, we don't allow page free
+- */
+- for (i = 0; i < nr_pages; i++) {
+- nullb->cache_flush_pos = c_pages[i]->page->index;
+- /*
+- * We found the page which is being flushed to disk by other
+- * threads
+- */
+- if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
+- c_pages[i] = NULL;
+- else
+- __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
+- }
+-
+- one_round = 0;
+- for (i = 0; i < nr_pages; i++) {
+- if (c_pages[i] == NULL)
+- continue;
+- err = null_flush_cache_page(nullb, c_pages[i]);
+- if (err)
+- return err;
+- one_round++;
+- }
+- flushed += one_round << PAGE_SHIFT;
+-
+- if (n > flushed) {
+- if (nr_pages == 0)
+- nullb->cache_flush_pos = 0;
+- if (one_round == 0) {
+- /* give other threads a chance */
+- spin_unlock_irq(&nullb->lock);
+- spin_lock_irq(&nullb->lock);
+- }
+- goto again;
+- }
+- return 0;
+-}
+-
+-static int copy_to_nullb(struct nullb *nullb, struct page *source,
+- unsigned int off, sector_t sector, size_t n, bool is_fua)
+-{
+- size_t temp, count = 0;
+- unsigned int offset;
+- struct nullb_page *t_page;
+- void *dst, *src;
+-
+- while (count < n) {
+- temp = min_t(size_t, nullb->dev->blocksize, n - count);
+-
+- if (null_cache_active(nullb) && !is_fua)
+- null_make_cache_space(nullb, PAGE_SIZE);
+-
+- offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+- t_page = null_insert_page(nullb, sector,
+- !null_cache_active(nullb) || is_fua);
+- if (!t_page)
+- return -ENOSPC;
+-
+- src = kmap_atomic(source);
+- dst = kmap_atomic(t_page->page);
+- memcpy(dst + offset, src + off + count, temp);
+- kunmap_atomic(dst);
+- kunmap_atomic(src);
+-
+- __set_bit(sector & SECTOR_MASK, t_page->bitmap);
+-
+- if (is_fua)
+- null_free_sector(nullb, sector, true);
+-
+- count += temp;
+- sector += temp >> SECTOR_SHIFT;
+- }
+- return 0;
+-}
+-
+-static int copy_from_nullb(struct nullb *nullb, struct page *dest,
+- unsigned int off, sector_t sector, size_t n)
+-{
+- size_t temp, count = 0;
+- unsigned int offset;
+- struct nullb_page *t_page;
+- void *dst, *src;
+-
+- while (count < n) {
+- temp = min_t(size_t, nullb->dev->blocksize, n - count);
+-
+- offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+- t_page = null_lookup_page(nullb, sector, false,
+- !null_cache_active(nullb));
+-
+- dst = kmap_atomic(dest);
+- if (!t_page) {
+- memset(dst + off + count, 0, temp);
+- goto next;
+- }
+- src = kmap_atomic(t_page->page);
+- memcpy(dst + off + count, src + offset, temp);
+- kunmap_atomic(src);
+-next:
+- kunmap_atomic(dst);
+-
+- count += temp;
+- sector += temp >> SECTOR_SHIFT;
+- }
+- return 0;
+-}
+-
+-static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
+- unsigned int len, unsigned int off)
+-{
+- void *dst;
+-
+- dst = kmap_atomic(page);
+- memset(dst + off, 0xFF, len);
+- kunmap_atomic(dst);
+-}
+-
+-static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
+-{
+- size_t temp;
+-
+- spin_lock_irq(&nullb->lock);
+- while (n > 0) {
+- temp = min_t(size_t, n, nullb->dev->blocksize);
+- null_free_sector(nullb, sector, false);
+- if (null_cache_active(nullb))
+- null_free_sector(nullb, sector, true);
+- sector += temp >> SECTOR_SHIFT;
+- n -= temp;
+- }
+- spin_unlock_irq(&nullb->lock);
+-}
+-
+-static int null_handle_flush(struct nullb *nullb)
+-{
+- int err;
+-
+- if (!null_cache_active(nullb))
+- return 0;
+-
+- spin_lock_irq(&nullb->lock);
+- while (true) {
+- err = null_make_cache_space(nullb,
+- nullb->dev->cache_size * 1024 * 1024);
+- if (err || nullb->dev->curr_cache == 0)
+- break;
+- }
+-
+- WARN_ON(!radix_tree_empty(&nullb->dev->cache));
+- spin_unlock_irq(&nullb->lock);
+- return err;
+-}
+-
+-static int null_transfer(struct nullb *nullb, struct page *page,
+- unsigned int len, unsigned int off, bool is_write, sector_t sector,
+- bool is_fua)
+-{
+- struct nullb_device *dev = nullb->dev;
+- unsigned int valid_len = len;
+- int err = 0;
+-
+- if (!is_write) {
+- if (dev->zoned)
+- valid_len = null_zone_valid_read_len(nullb,
+- sector, len);
+-
+- if (valid_len) {
+- err = copy_from_nullb(nullb, page, off,
+- sector, valid_len);
+- off += valid_len;
+- len -= valid_len;
+- }
+-
+- if (len)
+- nullb_fill_pattern(nullb, page, len, off);
+- flush_dcache_page(page);
+- } else {
+- flush_dcache_page(page);
+- err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
+- }
+-
+- return err;
+-}
+-
+-static int null_handle_rq(struct nullb_cmd *cmd)
+-{
+- struct request *rq = cmd->rq;
+- struct nullb *nullb = cmd->nq->dev->nullb;
+- int err;
+- unsigned int len;
+- sector_t sector;
+- struct req_iterator iter;
+- struct bio_vec bvec;
+-
+- sector = blk_rq_pos(rq);
+-
+- if (req_op(rq) == REQ_OP_DISCARD) {
+- null_handle_discard(nullb, sector, blk_rq_bytes(rq));
+- return 0;
+- }
+-
+- spin_lock_irq(&nullb->lock);
+- rq_for_each_segment(bvec, rq, iter) {
+- len = bvec.bv_len;
+- err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+- op_is_write(req_op(rq)), sector,
+- rq->cmd_flags & REQ_FUA);
+- if (err) {
+- spin_unlock_irq(&nullb->lock);
+- return err;
+- }
+- sector += len >> SECTOR_SHIFT;
+- }
+- spin_unlock_irq(&nullb->lock);
+-
+- return 0;
+-}
+-
+-static int null_handle_bio(struct nullb_cmd *cmd)
+-{
+- struct bio *bio = cmd->bio;
+- struct nullb *nullb = cmd->nq->dev->nullb;
+- int err;
+- unsigned int len;
+- sector_t sector;
+- struct bio_vec bvec;
+- struct bvec_iter iter;
+-
+- sector = bio->bi_iter.bi_sector;
+-
+- if (bio_op(bio) == REQ_OP_DISCARD) {
+- null_handle_discard(nullb, sector,
+- bio_sectors(bio) << SECTOR_SHIFT);
+- return 0;
+- }
+-
+- spin_lock_irq(&nullb->lock);
+- bio_for_each_segment(bvec, bio, iter) {
+- len = bvec.bv_len;
+- err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+- op_is_write(bio_op(bio)), sector,
+- bio->bi_opf & REQ_FUA);
+- if (err) {
+- spin_unlock_irq(&nullb->lock);
+- return err;
+- }
+- sector += len >> SECTOR_SHIFT;
+- }
+- spin_unlock_irq(&nullb->lock);
+- return 0;
+-}
+-
+-static void null_stop_queue(struct nullb *nullb)
+-{
+- struct request_queue *q = nullb->q;
+-
+- if (nullb->dev->queue_mode == NULL_Q_MQ)
+- blk_mq_stop_hw_queues(q);
+-}
+-
+-static void null_restart_queue_async(struct nullb *nullb)
+-{
+- struct request_queue *q = nullb->q;
+-
+- if (nullb->dev->queue_mode == NULL_Q_MQ)
+- blk_mq_start_stopped_hw_queues(q, true);
+-}
+-
+-static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
+-{
+- struct nullb_device *dev = cmd->nq->dev;
+- struct nullb *nullb = dev->nullb;
+- blk_status_t sts = BLK_STS_OK;
+- struct request *rq = cmd->rq;
+-
+- if (!hrtimer_active(&nullb->bw_timer))
+- hrtimer_restart(&nullb->bw_timer);
+-
+- if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
+- null_stop_queue(nullb);
+- /* race with timer */
+- if (atomic_long_read(&nullb->cur_bytes) > 0)
+- null_restart_queue_async(nullb);
+- /* requeue request */
+- sts = BLK_STS_DEV_RESOURCE;
+- }
+- return sts;
+-}
+-
+-static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
+- sector_t sector,
+- sector_t nr_sectors)
+-{
+- struct badblocks *bb = &cmd->nq->dev->badblocks;
+- sector_t first_bad;
+- int bad_sectors;
+-
+- if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
+- return BLK_STS_IOERR;
+-
+- return BLK_STS_OK;
+-}
+-
+-static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
+- enum req_opf op)
+-{
+- struct nullb_device *dev = cmd->nq->dev;
+- int err;
+-
+- if (dev->queue_mode == NULL_Q_BIO)
+- err = null_handle_bio(cmd);
+- else
+- err = null_handle_rq(cmd);
+-
+- return errno_to_blk_status(err);
+-}
+-
+-static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
+-{
+- struct nullb_device *dev = cmd->nq->dev;
+- struct bio *bio;
+-
+- if (dev->memory_backed)
+- return;
+-
+- if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
+- zero_fill_bio(cmd->bio);
+- } else if (req_op(cmd->rq) == REQ_OP_READ) {
+- __rq_for_each_bio(bio, cmd->rq)
+- zero_fill_bio(bio);
+- }
+-}
+-
+-static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
+-{
+- /*
+- * Since root privileges are required to configure the null_blk
+- * driver, it is fine that this driver does not initialize the
+- * data buffers of read commands. Zero-initialize these buffers
+- * anyway if KMSAN is enabled to prevent that KMSAN complains
+- * about null_blk not initializing read data buffers.
+- */
+- if (IS_ENABLED(CONFIG_KMSAN))
+- nullb_zero_read_cmd_buffer(cmd);
+-
+- /* Complete IO by inline, softirq or timer */
+- switch (cmd->nq->dev->irqmode) {
+- case NULL_IRQ_SOFTIRQ:
+- switch (cmd->nq->dev->queue_mode) {
+- case NULL_Q_MQ:
+- if (likely(!blk_should_fake_timeout(cmd->rq->q)))
+- blk_mq_complete_request(cmd->rq);
+- break;
+- case NULL_Q_BIO:
+- /*
+- * XXX: no proper submitting cpu information available.
+- */
+- end_cmd(cmd);
+- break;
+- }
+- break;
+- case NULL_IRQ_NONE:
+- end_cmd(cmd);
+- break;
+- case NULL_IRQ_TIMER:
+- null_cmd_end_timer(cmd);
+- break;
+- }
+-}
+-
+-blk_status_t null_process_cmd(struct nullb_cmd *cmd,
+- enum req_opf op, sector_t sector,
+- unsigned int nr_sectors)
+-{
+- struct nullb_device *dev = cmd->nq->dev;
+- blk_status_t ret;
+-
+- if (dev->badblocks.shift != -1) {
+- ret = null_handle_badblocks(cmd, sector, nr_sectors);
+- if (ret != BLK_STS_OK)
+- return ret;
+- }
+-
+- if (dev->memory_backed)
+- return null_handle_memory_backed(cmd, op);
+-
+- return BLK_STS_OK;
+-}
+-
+-static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
+- sector_t nr_sectors, enum req_opf op)
+-{
+- struct nullb_device *dev = cmd->nq->dev;
+- struct nullb *nullb = dev->nullb;
+- blk_status_t sts;
+-
+- if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
+- sts = null_handle_throttled(cmd);
+- if (sts != BLK_STS_OK)
+- return sts;
+- }
+-
+- if (op == REQ_OP_FLUSH) {
+- cmd->error = errno_to_blk_status(null_handle_flush(nullb));
+- goto out;
+- }
+-
+- if (dev->zoned)
+- sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
+- else
+- sts = null_process_cmd(cmd, op, sector, nr_sectors);
+-
+- /* Do not overwrite errors (e.g. timeout errors) */
+- if (cmd->error == BLK_STS_OK)
+- cmd->error = sts;
+-
+-out:
+- nullb_complete_cmd(cmd);
+- return BLK_STS_OK;
+-}
+-
+-static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
+-{
+- struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
+- ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+- unsigned int mbps = nullb->dev->mbps;
+-
+- if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
+- return HRTIMER_NORESTART;
+-
+- atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
+- null_restart_queue_async(nullb);
+-
+- hrtimer_forward_now(&nullb->bw_timer, timer_interval);
+-
+- return HRTIMER_RESTART;
+-}
+-
+-static void nullb_setup_bwtimer(struct nullb *nullb)
+-{
+- ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+-
+- hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+- nullb->bw_timer.function = nullb_bwtimer_fn;
+- atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
+- hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
+-}
+-
+-static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
+-{
+- int index = 0;
+-
+- if (nullb->nr_queues != 1)
+- index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
+-
+- return &nullb->queues[index];
+-}
+-
+-static blk_qc_t null_submit_bio(struct bio *bio)
+-{
+- sector_t sector = bio->bi_iter.bi_sector;
+- sector_t nr_sectors = bio_sectors(bio);
+- struct nullb *nullb = bio->bi_disk->private_data;
+- struct nullb_queue *nq = nullb_to_queue(nullb);
+- struct nullb_cmd *cmd;
+-
+- cmd = alloc_cmd(nq, 1);
+- cmd->bio = bio;
+-
+- null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
+- return BLK_QC_T_NONE;
+-}
+-
+-static bool should_timeout_request(struct request *rq)
+-{
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+- if (g_timeout_str[0])
+- return should_fail(&null_timeout_attr, 1);
+-#endif
+- return false;
+-}
+-
+-static bool should_requeue_request(struct request *rq)
+-{
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+- if (g_requeue_str[0])
+- return should_fail(&null_requeue_attr, 1);
+-#endif
+- return false;
+-}
+-
+-static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
+-{
+- struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+-
+- pr_info("rq %p timed out\n", rq);
+-
+- /*
+- * If the device is marked as blocking (i.e. memory backed or zoned
+- * device), the submission path may be blocked waiting for resources
+- * and cause real timeouts. For these real timeouts, the submission
+- * path will complete the request using blk_mq_complete_request().
+- * Only fake timeouts need to execute blk_mq_complete_request() here.
+- */
+- cmd->error = BLK_STS_TIMEOUT;
+- if (cmd->fake_timeout)
+- blk_mq_complete_request(rq);
+- return BLK_EH_DONE;
+-}
+-
+-static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
+- const struct blk_mq_queue_data *bd)
+-{
+- struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+- struct nullb_queue *nq = hctx->driver_data;
+- sector_t nr_sectors = blk_rq_sectors(bd->rq);
+- sector_t sector = blk_rq_pos(bd->rq);
+-
+- might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
+-
+- if (nq->dev->irqmode == NULL_IRQ_TIMER) {
+- hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+- cmd->timer.function = null_cmd_timer_expired;
+- }
+- cmd->rq = bd->rq;
+- cmd->error = BLK_STS_OK;
+- cmd->nq = nq;
+- cmd->fake_timeout = should_timeout_request(bd->rq);
+-
+- blk_mq_start_request(bd->rq);
+-
+- if (should_requeue_request(bd->rq)) {
+- /*
+- * Alternate between hitting the core BUSY path, and the
+- * driver driven requeue path
+- */
+- nq->requeue_selection++;
+- if (nq->requeue_selection & 1)
+- return BLK_STS_RESOURCE;
+- else {
+- blk_mq_requeue_request(bd->rq, true);
+- return BLK_STS_OK;
+- }
+- }
+- if (cmd->fake_timeout)
+- return BLK_STS_OK;
+-
+- return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
+-}
+-
+-static void cleanup_queue(struct nullb_queue *nq)
+-{
+- kfree(nq->tag_map);
+- kfree(nq->cmds);
+-}
+-
+-static void cleanup_queues(struct nullb *nullb)
+-{
+- int i;
+-
+- for (i = 0; i < nullb->nr_queues; i++)
+- cleanup_queue(&nullb->queues[i]);
+-
+- kfree(nullb->queues);
+-}
+-
+-static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+-{
+- struct nullb_queue *nq = hctx->driver_data;
+- struct nullb *nullb = nq->dev->nullb;
+-
+- nullb->nr_queues--;
+-}
+-
+-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+-{
+- init_waitqueue_head(&nq->wait);
+- nq->queue_depth = nullb->queue_depth;
+- nq->dev = nullb->dev;
+-}
+-
+-static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
+- unsigned int hctx_idx)
+-{
+- struct nullb *nullb = hctx->queue->queuedata;
+- struct nullb_queue *nq;
+-
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+- if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
+- return -EFAULT;
+-#endif
+-
+- nq = &nullb->queues[hctx_idx];
+- hctx->driver_data = nq;
+- null_init_queue(nullb, nq);
+- nullb->nr_queues++;
+-
+- return 0;
+-}
+-
+-static const struct blk_mq_ops null_mq_ops = {
+- .queue_rq = null_queue_rq,
+- .complete = null_complete_rq,
+- .timeout = null_timeout_rq,
+- .init_hctx = null_init_hctx,
+- .exit_hctx = null_exit_hctx,
+-};
+-
+-static void null_del_dev(struct nullb *nullb)
+-{
+- struct nullb_device *dev;
+-
+- if (!nullb)
+- return;
+-
+- dev = nullb->dev;
+-
+- ida_simple_remove(&nullb_indexes, nullb->index);
+-
+- list_del_init(&nullb->list);
+-
+- del_gendisk(nullb->disk);
+-
+- if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
+- hrtimer_cancel(&nullb->bw_timer);
+- atomic_long_set(&nullb->cur_bytes, LONG_MAX);
+- null_restart_queue_async(nullb);
+- }
+-
+- blk_cleanup_queue(nullb->q);
+- if (dev->queue_mode == NULL_Q_MQ &&
+- nullb->tag_set == &nullb->__tag_set)
+- blk_mq_free_tag_set(nullb->tag_set);
+- put_disk(nullb->disk);
+- cleanup_queues(nullb);
+- if (null_cache_active(nullb))
+- null_free_device_storage(nullb->dev, true);
+- kfree(nullb);
+- dev->nullb = NULL;
+-}
+-
+-static void null_config_discard(struct nullb *nullb)
+-{
+- if (nullb->dev->discard == false)
+- return;
+-
+- if (nullb->dev->zoned) {
+- nullb->dev->discard = false;
+- pr_info("discard option is ignored in zoned mode\n");
+- return;
+- }
+-
+- nullb->q->limits.discard_granularity = nullb->dev->blocksize;
+- nullb->q->limits.discard_alignment = nullb->dev->blocksize;
+- blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
+- blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
+-}
+-
+-static const struct block_device_operations null_bio_ops = {
+- .owner = THIS_MODULE,
+- .submit_bio = null_submit_bio,
+- .report_zones = null_report_zones,
+-};
+-
+-static const struct block_device_operations null_rq_ops = {
+- .owner = THIS_MODULE,
+- .report_zones = null_report_zones,
+-};
+-
+-static int setup_commands(struct nullb_queue *nq)
+-{
+- struct nullb_cmd *cmd;
+- int i, tag_size;
+-
+- nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
+- if (!nq->cmds)
+- return -ENOMEM;
+-
+- tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
+- nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
+- if (!nq->tag_map) {
+- kfree(nq->cmds);
+- return -ENOMEM;
+- }
+-
+- for (i = 0; i < nq->queue_depth; i++) {
+- cmd = &nq->cmds[i];
+- cmd->tag = -1U;
+- }
+-
+- return 0;
+-}
+-
+-static int setup_queues(struct nullb *nullb)
+-{
+- nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue),
+- GFP_KERNEL);
+- if (!nullb->queues)
+- return -ENOMEM;
+-
+- nullb->queue_depth = nullb->dev->hw_queue_depth;
+-
+- return 0;
+-}
+-
+-static int init_driver_queues(struct nullb *nullb)
+-{
+- struct nullb_queue *nq;
+- int i, ret = 0;
+-
+- for (i = 0; i < nullb->dev->submit_queues; i++) {
+- nq = &nullb->queues[i];
+-
+- null_init_queue(nullb, nq);
+-
+- ret = setup_commands(nq);
+- if (ret)
+- return ret;
+- nullb->nr_queues++;
+- }
+- return 0;
+-}
+-
+-static int null_gendisk_register(struct nullb *nullb)
+-{
+- sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
+- struct gendisk *disk;
+-
+- disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
+- if (!disk)
+- return -ENOMEM;
+- set_capacity(disk, size);
+-
+- disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
+- disk->major = null_major;
+- disk->first_minor = nullb->index;
+- if (queue_is_mq(nullb->q))
+- disk->fops = &null_rq_ops;
+- else
+- disk->fops = &null_bio_ops;
+- disk->private_data = nullb;
+- disk->queue = nullb->q;
+- strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+-
+- if (nullb->dev->zoned) {
+- int ret = null_register_zoned_dev(nullb);
+-
+- if (ret)
+- return ret;
+- }
+-
+- add_disk(disk);
+- return 0;
+-}
+-
+-static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
+-{
+- set->ops = &null_mq_ops;
+- set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
+- g_submit_queues;
+- set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
+- g_hw_queue_depth;
+- set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
+- set->cmd_size = sizeof(struct nullb_cmd);
+- set->flags = BLK_MQ_F_SHOULD_MERGE;
+- if (g_no_sched)
+- set->flags |= BLK_MQ_F_NO_SCHED;
+- if (g_shared_tag_bitmap)
+- set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+- set->driver_data = NULL;
+-
+- if ((nullb && nullb->dev->blocking) || g_blocking)
+- set->flags |= BLK_MQ_F_BLOCKING;
+-
+- return blk_mq_alloc_tag_set(set);
+-}
+-
+-static int null_validate_conf(struct nullb_device *dev)
+-{
+- dev->blocksize = round_down(dev->blocksize, 512);
+- dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
+-
+- if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
+- if (dev->submit_queues != nr_online_nodes)
+- dev->submit_queues = nr_online_nodes;
+- } else if (dev->submit_queues > nr_cpu_ids)
+- dev->submit_queues = nr_cpu_ids;
+- else if (dev->submit_queues == 0)
+- dev->submit_queues = 1;
+-
+- dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
+- dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
+-
+- /* Do memory allocation, so set blocking */
+- if (dev->memory_backed)
+- dev->blocking = true;
+- else /* cache is meaningless */
+- dev->cache_size = 0;
+- dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
+- dev->cache_size);
+- dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
+- /* can not stop a queue */
+- if (dev->queue_mode == NULL_Q_BIO)
+- dev->mbps = 0;
+-
+- if (dev->zoned &&
+- (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
+- pr_err("zone_size must be power-of-two\n");
+- return -EINVAL;
+- }
+-
+- return 0;
+-}
+-
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-static bool __null_setup_fault(struct fault_attr *attr, char *str)
+-{
+- if (!str[0])
+- return true;
+-
+- if (!setup_fault_attr(attr, str))
+- return false;
+-
+- attr->verbose = 0;
+- return true;
+-}
+-#endif
+-
+-static bool null_setup_fault(void)
+-{
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+- if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
+- return false;
+- if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
+- return false;
+- if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
+- return false;
+-#endif
+- return true;
+-}
+-
+-static int null_add_dev(struct nullb_device *dev)
+-{
+- struct nullb *nullb;
+- int rv;
+-
+- rv = null_validate_conf(dev);
+- if (rv)
+- return rv;
+-
+- nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
+- if (!nullb) {
+- rv = -ENOMEM;
+- goto out;
+- }
+- nullb->dev = dev;
+- dev->nullb = nullb;
+-
+- spin_lock_init(&nullb->lock);
+-
+- rv = setup_queues(nullb);
+- if (rv)
+- goto out_free_nullb;
+-
+- if (dev->queue_mode == NULL_Q_MQ) {
+- if (shared_tags) {
+- nullb->tag_set = &tag_set;
+- rv = 0;
+- } else {
+- nullb->tag_set = &nullb->__tag_set;
+- rv = null_init_tag_set(nullb, nullb->tag_set);
+- }
+-
+- if (rv)
+- goto out_cleanup_queues;
+-
+- if (!null_setup_fault())
+- goto out_cleanup_queues;
+-
+- nullb->tag_set->timeout = 5 * HZ;
+- nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
+- if (IS_ERR(nullb->q)) {
+- rv = -ENOMEM;
+- goto out_cleanup_tags;
+- }
+- } else if (dev->queue_mode == NULL_Q_BIO) {
+- nullb->q = blk_alloc_queue(dev->home_node);
+- if (!nullb->q) {
+- rv = -ENOMEM;
+- goto out_cleanup_queues;
+- }
+- rv = init_driver_queues(nullb);
+- if (rv)
+- goto out_cleanup_blk_queue;
+- }
+-
+- if (dev->mbps) {
+- set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
+- nullb_setup_bwtimer(nullb);
+- }
+-
+- if (dev->cache_size > 0) {
+- set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+- blk_queue_write_cache(nullb->q, true, true);
+- }
+-
+- if (dev->zoned) {
+- rv = null_init_zoned_dev(dev, nullb->q);
+- if (rv)
+- goto out_cleanup_blk_queue;
+- }
+-
+- nullb->q->queuedata = nullb;
+- blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
+- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
+-
+- mutex_lock(&lock);
+- rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+- if (rv < 0) {
+- mutex_unlock(&lock);
+- goto out_cleanup_zone;
+- }
+- nullb->index = rv;
+- dev->index = rv;
+- mutex_unlock(&lock);
+-
+- blk_queue_logical_block_size(nullb->q, dev->blocksize);
+- blk_queue_physical_block_size(nullb->q, dev->blocksize);
+-
+- null_config_discard(nullb);
+-
+- sprintf(nullb->disk_name, "nullb%d", nullb->index);
+-
+- rv = null_gendisk_register(nullb);
+- if (rv)
+- goto out_ida_free;
+-
+- mutex_lock(&lock);
+- list_add_tail(&nullb->list, &nullb_list);
+- mutex_unlock(&lock);
+-
+- return 0;
+-
+-out_ida_free:
+- ida_free(&nullb_indexes, nullb->index);
+-out_cleanup_zone:
+- null_free_zoned_dev(dev);
+-out_cleanup_blk_queue:
+- blk_cleanup_queue(nullb->q);
+-out_cleanup_tags:
+- if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+- blk_mq_free_tag_set(nullb->tag_set);
+-out_cleanup_queues:
+- cleanup_queues(nullb);
+-out_free_nullb:
+- kfree(nullb);
+- dev->nullb = NULL;
+-out:
+- return rv;
+-}
+-
+-static int __init null_init(void)
+-{
+- int ret = 0;
+- unsigned int i;
+- struct nullb *nullb;
+- struct nullb_device *dev;
+-
+- if (g_bs > PAGE_SIZE) {
+- pr_warn("invalid block size\n");
+- pr_warn("defaults block size to %lu\n", PAGE_SIZE);
+- g_bs = PAGE_SIZE;
+- }
+-
+- if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
+- pr_err("invalid home_node value\n");
+- g_home_node = NUMA_NO_NODE;
+- }
+-
+- if (g_queue_mode == NULL_Q_RQ) {
+- pr_err("legacy IO path no longer available\n");
+- return -EINVAL;
+- }
+- if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+- if (g_submit_queues != nr_online_nodes) {
+- pr_warn("submit_queues param is set to %u.\n",
+- nr_online_nodes);
+- g_submit_queues = nr_online_nodes;
+- }
+- } else if (g_submit_queues > nr_cpu_ids)
+- g_submit_queues = nr_cpu_ids;
+- else if (g_submit_queues <= 0)
+- g_submit_queues = 1;
+-
+- if (g_queue_mode == NULL_Q_MQ && shared_tags) {
+- ret = null_init_tag_set(NULL, &tag_set);
+- if (ret)
+- return ret;
+- }
+-
+- config_group_init(&nullb_subsys.su_group);
+- mutex_init(&nullb_subsys.su_mutex);
+-
+- ret = configfs_register_subsystem(&nullb_subsys);
+- if (ret)
+- goto err_tagset;
+-
+- mutex_init(&lock);
+-
+- null_major = register_blkdev(0, "nullb");
+- if (null_major < 0) {
+- ret = null_major;
+- goto err_conf;
+- }
+-
+- for (i = 0; i < nr_devices; i++) {
+- dev = null_alloc_dev();
+- if (!dev) {
+- ret = -ENOMEM;
+- goto err_dev;
+- }
+- ret = null_add_dev(dev);
+- if (ret) {
+- null_free_dev(dev);
+- goto err_dev;
+- }
+- }
+-
+- pr_info("module loaded\n");
+- return 0;
+-
+-err_dev:
+- while (!list_empty(&nullb_list)) {
+- nullb = list_entry(nullb_list.next, struct nullb, list);
+- dev = nullb->dev;
+- null_del_dev(nullb);
+- null_free_dev(dev);
+- }
+- unregister_blkdev(null_major, "nullb");
+-err_conf:
+- configfs_unregister_subsystem(&nullb_subsys);
+-err_tagset:
+- if (g_queue_mode == NULL_Q_MQ && shared_tags)
+- blk_mq_free_tag_set(&tag_set);
+- return ret;
+-}
+-
+-static void __exit null_exit(void)
+-{
+- struct nullb *nullb;
+-
+- configfs_unregister_subsystem(&nullb_subsys);
+-
+- unregister_blkdev(null_major, "nullb");
+-
+- mutex_lock(&lock);
+- while (!list_empty(&nullb_list)) {
+- struct nullb_device *dev;
+-
+- nullb = list_entry(nullb_list.next, struct nullb, list);
+- dev = nullb->dev;
+- null_del_dev(nullb);
+- null_free_dev(dev);
+- }
+- mutex_unlock(&lock);
+-
+- if (g_queue_mode == NULL_Q_MQ && shared_tags)
+- blk_mq_free_tag_set(&tag_set);
+-}
+-
+-module_init(null_init);
+-module_exit(null_exit);
+-
+-MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
+-MODULE_LICENSE("GPL");
+diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk_trace.c
+deleted file mode 100644
+index f246e7bff6982..0000000000000
+--- a/drivers/block/null_blk_trace.c
++++ /dev/null
+@@ -1,21 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * null_blk trace related helpers.
+- *
+- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+- */
+-#include "null_blk_trace.h"
+-
+-/*
+- * Helper to use for all null_blk traces to extract disk name.
+- */
+-const char *nullb_trace_disk_name(struct trace_seq *p, char *name)
+-{
+- const char *ret = trace_seq_buffer_ptr(p);
+-
+- if (name && *name)
+- trace_seq_printf(p, "disk=%s, ", name);
+- trace_seq_putc(p, 0);
+-
+- return ret;
+-}
+diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk_trace.h
+deleted file mode 100644
+index 4f83032eb5441..0000000000000
+--- a/drivers/block/null_blk_trace.h
++++ /dev/null
+@@ -1,79 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * null_blk device driver tracepoints.
+- *
+- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+- */
+-
+-#undef TRACE_SYSTEM
+-#define TRACE_SYSTEM nullb
+-
+-#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ)
+-#define _TRACE_NULLB_H
+-
+-#include <linux/tracepoint.h>
+-#include <linux/trace_seq.h>
+-
+-#include "null_blk.h"
+-
+-const char *nullb_trace_disk_name(struct trace_seq *p, char *name);
+-
+-#define __print_disk_name(name) nullb_trace_disk_name(p, name)
+-
+-#ifndef TRACE_HEADER_MULTI_READ
+-static inline void __assign_disk_name(char *name, struct gendisk *disk)
+-{
+- if (disk)
+- memcpy(name, disk->disk_name, DISK_NAME_LEN);
+- else
+- memset(name, 0, DISK_NAME_LEN);
+-}
+-#endif
+-
+-TRACE_EVENT(nullb_zone_op,
+- TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no,
+- unsigned int zone_cond),
+- TP_ARGS(cmd, zone_no, zone_cond),
+- TP_STRUCT__entry(
+- __array(char, disk, DISK_NAME_LEN)
+- __field(enum req_opf, op)
+- __field(unsigned int, zone_no)
+- __field(unsigned int, zone_cond)
+- ),
+- TP_fast_assign(
+- __entry->op = req_op(cmd->rq);
+- __entry->zone_no = zone_no;
+- __entry->zone_cond = zone_cond;
+- __assign_disk_name(__entry->disk, cmd->rq->rq_disk);
+- ),
+- TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
+- __print_disk_name(__entry->disk),
+- blk_op_str(__entry->op),
+- __entry->zone_no,
+- blk_zone_cond_str(__entry->zone_cond))
+-);
+-
+-TRACE_EVENT(nullb_report_zones,
+- TP_PROTO(struct nullb *nullb, unsigned int nr_zones),
+- TP_ARGS(nullb, nr_zones),
+- TP_STRUCT__entry(
+- __array(char, disk, DISK_NAME_LEN)
+- __field(unsigned int, nr_zones)
+- ),
+- TP_fast_assign(
+- __entry->nr_zones = nr_zones;
+- __assign_disk_name(__entry->disk, nullb->disk);
+- ),
+- TP_printk("%s nr_zones=%u",
+- __print_disk_name(__entry->disk), __entry->nr_zones)
+-);
+-
+-#endif /* _TRACE_NULLB_H */
+-
+-#undef TRACE_INCLUDE_PATH
+-#define TRACE_INCLUDE_PATH .
+-#undef TRACE_INCLUDE_FILE
+-#define TRACE_INCLUDE_FILE null_blk_trace
+-
+-/* This part must be outside protection */
+-#include <trace/define_trace.h>
+diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
+deleted file mode 100644
+index f5df82c26c16f..0000000000000
+--- a/drivers/block/null_blk_zoned.c
++++ /dev/null
+@@ -1,617 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-#include <linux/vmalloc.h>
+-#include <linux/bitmap.h>
+-#include "null_blk.h"
+-
+-#define CREATE_TRACE_POINTS
+-#include "null_blk_trace.h"
+-
+-#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT)
+-
+-static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
+-{
+- return sect >> ilog2(dev->zone_size_sects);
+-}
+-
+-int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
+-{
+- sector_t dev_capacity_sects, zone_capacity_sects;
+- sector_t sector = 0;
+- unsigned int i;
+-
+- if (!is_power_of_2(dev->zone_size)) {
+- pr_err("zone_size must be power-of-two\n");
+- return -EINVAL;
+- }
+- if (dev->zone_size > dev->size) {
+- pr_err("Zone size larger than device capacity\n");
+- return -EINVAL;
+- }
+-
+- if (!dev->zone_capacity)
+- dev->zone_capacity = dev->zone_size;
+-
+- if (dev->zone_capacity > dev->zone_size) {
+- pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
+- dev->zone_capacity, dev->zone_size);
+- return -EINVAL;
+- }
+-
+- zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity);
+- dev_capacity_sects = MB_TO_SECTS(dev->size);
+- dev->zone_size_sects = MB_TO_SECTS(dev->zone_size);
+- dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects);
+- if (dev_capacity_sects & (dev->zone_size_sects - 1))
+- dev->nr_zones++;
+-
+- dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
+- GFP_KERNEL | __GFP_ZERO);
+- if (!dev->zones)
+- return -ENOMEM;
+-
+- /*
+- * With memory backing, the zone_lock spinlock needs to be temporarily
+- * released to avoid scheduling in atomic context. To guarantee zone
+- * information protection, use a bitmap to lock zones with
+- * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
+- * implies that the queue is marked with BLK_MQ_F_BLOCKING.
+- */
+- spin_lock_init(&dev->zone_lock);
+- if (dev->memory_backed) {
+- dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
+- if (!dev->zone_locks) {
+- kvfree(dev->zones);
+- return -ENOMEM;
+- }
+- }
+-
+- if (dev->zone_nr_conv >= dev->nr_zones) {
+- dev->zone_nr_conv = dev->nr_zones - 1;
+- pr_info("changed the number of conventional zones to %u",
+- dev->zone_nr_conv);
+- }
+-
+- /* Max active zones has to be < nbr of seq zones in order to be enforceable */
+- if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
+- dev->zone_max_active = 0;
+- pr_info("zone_max_active limit disabled, limit >= zone count\n");
+- }
+-
+- /* Max open zones has to be <= max active zones */
+- if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
+- dev->zone_max_open = dev->zone_max_active;
+- pr_info("changed the maximum number of open zones to %u\n",
+- dev->nr_zones);
+- } else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
+- dev->zone_max_open = 0;
+- pr_info("zone_max_open limit disabled, limit >= zone count\n");
+- }
+-
+- for (i = 0; i < dev->zone_nr_conv; i++) {
+- struct blk_zone *zone = &dev->zones[i];
+-
+- zone->start = sector;
+- zone->len = dev->zone_size_sects;
+- zone->capacity = zone->len;
+- zone->wp = zone->start + zone->len;
+- zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
+- zone->cond = BLK_ZONE_COND_NOT_WP;
+-
+- sector += dev->zone_size_sects;
+- }
+-
+- for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
+- struct blk_zone *zone = &dev->zones[i];
+-
+- zone->start = zone->wp = sector;
+- if (zone->start + dev->zone_size_sects > dev_capacity_sects)
+- zone->len = dev_capacity_sects - zone->start;
+- else
+- zone->len = dev->zone_size_sects;
+- zone->capacity =
+- min_t(sector_t, zone->len, zone_capacity_sects);
+- zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
+- zone->cond = BLK_ZONE_COND_EMPTY;
+-
+- sector += dev->zone_size_sects;
+- }
+-
+- q->limits.zoned = BLK_ZONED_HM;
+- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
+- blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
+-
+- return 0;
+-}
+-
+-int null_register_zoned_dev(struct nullb *nullb)
+-{
+- struct nullb_device *dev = nullb->dev;
+- struct request_queue *q = nullb->q;
+-
+- if (queue_is_mq(q)) {
+- int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
+-
+- if (ret)
+- return ret;
+- } else {
+- blk_queue_chunk_sectors(q, dev->zone_size_sects);
+- q->nr_zones = blkdev_nr_zones(nullb->disk);
+- }
+-
+- blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
+- blk_queue_max_open_zones(q, dev->zone_max_open);
+- blk_queue_max_active_zones(q, dev->zone_max_active);
+-
+- return 0;
+-}
+-
+-void null_free_zoned_dev(struct nullb_device *dev)
+-{
+- bitmap_free(dev->zone_locks);
+- kvfree(dev->zones);
+- dev->zones = NULL;
+-}
+-
+-static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
+-{
+- if (dev->memory_backed)
+- wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
+- spin_lock_irq(&dev->zone_lock);
+-}
+-
+-static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
+-{
+- spin_unlock_irq(&dev->zone_lock);
+-
+- if (dev->memory_backed)
+- clear_and_wake_up_bit(zno, dev->zone_locks);
+-}
+-
+-int null_report_zones(struct gendisk *disk, sector_t sector,
+- unsigned int nr_zones, report_zones_cb cb, void *data)
+-{
+- struct nullb *nullb = disk->private_data;
+- struct nullb_device *dev = nullb->dev;
+- unsigned int first_zone, i, zno;
+- struct blk_zone zone;
+- int error;
+-
+- first_zone = null_zone_no(dev, sector);
+- if (first_zone >= dev->nr_zones)
+- return 0;
+-
+- nr_zones = min(nr_zones, dev->nr_zones - first_zone);
+- trace_nullb_report_zones(nullb, nr_zones);
+-
+- zno = first_zone;
+- for (i = 0; i < nr_zones; i++, zno++) {
+- /*
+- * Stacked DM target drivers will remap the zone information by
+- * modifying the zone information passed to the report callback.
+- * So use a local copy to avoid corruption of the device zone
+- * array.
+- */
+- null_lock_zone(dev, zno);
+- memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone));
+- null_unlock_zone(dev, zno);
+-
+- error = cb(&zone, i, data);
+- if (error)
+- return error;
+- }
+-
+- return nr_zones;
+-}
+-
+-/*
+- * This is called in the case of memory backing from null_process_cmd()
+- * with the target zone already locked.
+- */
+-size_t null_zone_valid_read_len(struct nullb *nullb,
+- sector_t sector, unsigned int len)
+-{
+- struct nullb_device *dev = nullb->dev;
+- struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+- unsigned int nr_sectors = len >> SECTOR_SHIFT;
+-
+- /* Read must be below the write pointer position */
+- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
+- sector + nr_sectors <= zone->wp)
+- return len;
+-
+- if (sector > zone->wp)
+- return 0;
+-
+- return (zone->wp - sector) << SECTOR_SHIFT;
+-}
+-
+-static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone)
+-{
+- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+- return BLK_STS_IOERR;
+-
+- switch (zone->cond) {
+- case BLK_ZONE_COND_CLOSED:
+- /* close operation on closed is not an error */
+- return BLK_STS_OK;
+- case BLK_ZONE_COND_IMP_OPEN:
+- dev->nr_zones_imp_open--;
+- break;
+- case BLK_ZONE_COND_EXP_OPEN:
+- dev->nr_zones_exp_open--;
+- break;
+- case BLK_ZONE_COND_EMPTY:
+- case BLK_ZONE_COND_FULL:
+- default:
+- return BLK_STS_IOERR;
+- }
+-
+- if (zone->wp == zone->start) {
+- zone->cond = BLK_ZONE_COND_EMPTY;
+- } else {
+- zone->cond = BLK_ZONE_COND_CLOSED;
+- dev->nr_zones_closed++;
+- }
+-
+- return BLK_STS_OK;
+-}
+-
+-static void null_close_first_imp_zone(struct nullb_device *dev)
+-{
+- unsigned int i;
+-
+- for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
+- if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) {
+- null_close_zone(dev, &dev->zones[i]);
+- return;
+- }
+- }
+-}
+-
+-static blk_status_t null_check_active(struct nullb_device *dev)
+-{
+- if (!dev->zone_max_active)
+- return BLK_STS_OK;
+-
+- if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
+- dev->nr_zones_closed < dev->zone_max_active)
+- return BLK_STS_OK;
+-
+- return BLK_STS_ZONE_ACTIVE_RESOURCE;
+-}
+-
+-static blk_status_t null_check_open(struct nullb_device *dev)
+-{
+- if (!dev->zone_max_open)
+- return BLK_STS_OK;
+-
+- if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
+- return BLK_STS_OK;
+-
+- if (dev->nr_zones_imp_open) {
+- if (null_check_active(dev) == BLK_STS_OK) {
+- null_close_first_imp_zone(dev);
+- return BLK_STS_OK;
+- }
+- }
+-
+- return BLK_STS_ZONE_OPEN_RESOURCE;
+-}
+-
+-/*
+- * This function matches the manage open zone resources function in the ZBC standard,
+- * with the addition of max active zones support (added in the ZNS standard).
+- *
+- * The function determines if a zone can transition to implicit open or explicit open,
+- * while maintaining the max open zone (and max active zone) limit(s). It may close an
+- * implicit open zone in order to make additional zone resources available.
+- *
+- * ZBC states that an implicit open zone shall be closed only if there is not
+- * room within the open limit. However, with the addition of an active limit,
+- * it is not certain that closing an implicit open zone will allow a new zone
+- * to be opened, since we might already be at the active limit capacity.
+- */
+-static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
+-{
+- blk_status_t ret;
+-
+- switch (zone->cond) {
+- case BLK_ZONE_COND_EMPTY:
+- ret = null_check_active(dev);
+- if (ret != BLK_STS_OK)
+- return ret;
+- fallthrough;
+- case BLK_ZONE_COND_CLOSED:
+- return null_check_open(dev);
+- default:
+- /* Should never be called for other states */
+- WARN_ON(1);
+- return BLK_STS_IOERR;
+- }
+-}
+-
+-static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
+- unsigned int nr_sectors, bool append)
+-{
+- struct nullb_device *dev = cmd->nq->dev;
+- unsigned int zno = null_zone_no(dev, sector);
+- struct blk_zone *zone = &dev->zones[zno];
+- blk_status_t ret;
+-
+- trace_nullb_zone_op(cmd, zno, zone->cond);
+-
+- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
+- if (append)
+- return BLK_STS_IOERR;
+- return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
+- }
+-
+- null_lock_zone(dev, zno);
+-
+- switch (zone->cond) {
+- case BLK_ZONE_COND_FULL:
+- /* Cannot write to a full zone */
+- ret = BLK_STS_IOERR;
+- goto unlock;
+- case BLK_ZONE_COND_EMPTY:
+- case BLK_ZONE_COND_CLOSED:
+- ret = null_check_zone_resources(dev, zone);
+- if (ret != BLK_STS_OK)
+- goto unlock;
+- break;
+- case BLK_ZONE_COND_IMP_OPEN:
+- case BLK_ZONE_COND_EXP_OPEN:
+- break;
+- default:
+- /* Invalid zone condition */
+- ret = BLK_STS_IOERR;
+- goto unlock;
+- }
+-
+- /*
+- * Regular writes must be at the write pointer position.
+- * Zone append writes are automatically issued at the write
+- * pointer and the position returned using the request or BIO
+- * sector.
+- */
+- if (append) {
+- sector = zone->wp;
+- if (cmd->bio)
+- cmd->bio->bi_iter.bi_sector = sector;
+- else
+- cmd->rq->__sector = sector;
+- } else if (sector != zone->wp) {
+- ret = BLK_STS_IOERR;
+- goto unlock;
+- }
+-
+- if (zone->wp + nr_sectors > zone->start + zone->capacity) {
+- ret = BLK_STS_IOERR;
+- goto unlock;
+- }
+-
+- if (zone->cond == BLK_ZONE_COND_CLOSED) {
+- dev->nr_zones_closed--;
+- dev->nr_zones_imp_open++;
+- } else if (zone->cond == BLK_ZONE_COND_EMPTY) {
+- dev->nr_zones_imp_open++;
+- }
+- if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
+- zone->cond = BLK_ZONE_COND_IMP_OPEN;
+-
+- /*
+- * Memory backing allocation may sleep: release the zone_lock spinlock
+- * to avoid scheduling in atomic context. Zone operation atomicity is
+- * still guaranteed through the zone_locks bitmap.
+- */
+- if (dev->memory_backed)
+- spin_unlock_irq(&dev->zone_lock);
+- ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
+- if (dev->memory_backed)
+- spin_lock_irq(&dev->zone_lock);
+-
+- if (ret != BLK_STS_OK)
+- goto unlock;
+-
+- zone->wp += nr_sectors;
+- if (zone->wp == zone->start + zone->capacity) {
+- if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
+- dev->nr_zones_exp_open--;
+- else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
+- dev->nr_zones_imp_open--;
+- zone->cond = BLK_ZONE_COND_FULL;
+- }
+- ret = BLK_STS_OK;
+-
+-unlock:
+- null_unlock_zone(dev, zno);
+-
+- return ret;
+-}
+-
+-static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
+-{
+- blk_status_t ret;
+-
+- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+- return BLK_STS_IOERR;
+-
+- switch (zone->cond) {
+- case BLK_ZONE_COND_EXP_OPEN:
+- /* open operation on exp open is not an error */
+- return BLK_STS_OK;
+- case BLK_ZONE_COND_EMPTY:
+- ret = null_check_zone_resources(dev, zone);
+- if (ret != BLK_STS_OK)
+- return ret;
+- break;
+- case BLK_ZONE_COND_IMP_OPEN:
+- dev->nr_zones_imp_open--;
+- break;
+- case BLK_ZONE_COND_CLOSED:
+- ret = null_check_zone_resources(dev, zone);
+- if (ret != BLK_STS_OK)
+- return ret;
+- dev->nr_zones_closed--;
+- break;
+- case BLK_ZONE_COND_FULL:
+- default:
+- return BLK_STS_IOERR;
+- }
+-
+- zone->cond = BLK_ZONE_COND_EXP_OPEN;
+- dev->nr_zones_exp_open++;
+-
+- return BLK_STS_OK;
+-}
+-
+-static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
+-{
+- blk_status_t ret;
+-
+- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+- return BLK_STS_IOERR;
+-
+- switch (zone->cond) {
+- case BLK_ZONE_COND_FULL:
+- /* finish operation on full is not an error */
+- return BLK_STS_OK;
+- case BLK_ZONE_COND_EMPTY:
+- ret = null_check_zone_resources(dev, zone);
+- if (ret != BLK_STS_OK)
+- return ret;
+- break;
+- case BLK_ZONE_COND_IMP_OPEN:
+- dev->nr_zones_imp_open--;
+- break;
+- case BLK_ZONE_COND_EXP_OPEN:
+- dev->nr_zones_exp_open--;
+- break;
+- case BLK_ZONE_COND_CLOSED:
+- ret = null_check_zone_resources(dev, zone);
+- if (ret != BLK_STS_OK)
+- return ret;
+- dev->nr_zones_closed--;
+- break;
+- default:
+- return BLK_STS_IOERR;
+- }
+-
+- zone->cond = BLK_ZONE_COND_FULL;
+- zone->wp = zone->start + zone->len;
+-
+- return BLK_STS_OK;
+-}
+-
+-static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone)
+-{
+- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+- return BLK_STS_IOERR;
+-
+- switch (zone->cond) {
+- case BLK_ZONE_COND_EMPTY:
+- /* reset operation on empty is not an error */
+- return BLK_STS_OK;
+- case BLK_ZONE_COND_IMP_OPEN:
+- dev->nr_zones_imp_open--;
+- break;
+- case BLK_ZONE_COND_EXP_OPEN:
+- dev->nr_zones_exp_open--;
+- break;
+- case BLK_ZONE_COND_CLOSED:
+- dev->nr_zones_closed--;
+- break;
+- case BLK_ZONE_COND_FULL:
+- break;
+- default:
+- return BLK_STS_IOERR;
+- }
+-
+- zone->cond = BLK_ZONE_COND_EMPTY;
+- zone->wp = zone->start;
+-
+- return BLK_STS_OK;
+-}
+-
+-static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+- sector_t sector)
+-{
+- struct nullb_device *dev = cmd->nq->dev;
+- unsigned int zone_no;
+- struct blk_zone *zone;
+- blk_status_t ret;
+- size_t i;
+-
+- if (op == REQ_OP_ZONE_RESET_ALL) {
+- for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
+- null_lock_zone(dev, i);
+- zone = &dev->zones[i];
+- if (zone->cond != BLK_ZONE_COND_EMPTY) {
+- null_reset_zone(dev, zone);
+- trace_nullb_zone_op(cmd, i, zone->cond);
+- }
+- null_unlock_zone(dev, i);
+- }
+- return BLK_STS_OK;
+- }
+-
+- zone_no = null_zone_no(dev, sector);
+- zone = &dev->zones[zone_no];
+-
+- null_lock_zone(dev, zone_no);
+-
+- switch (op) {
+- case REQ_OP_ZONE_RESET:
+- ret = null_reset_zone(dev, zone);
+- break;
+- case REQ_OP_ZONE_OPEN:
+- ret = null_open_zone(dev, zone);
+- break;
+- case REQ_OP_ZONE_CLOSE:
+- ret = null_close_zone(dev, zone);
+- break;
+- case REQ_OP_ZONE_FINISH:
+- ret = null_finish_zone(dev, zone);
+- break;
+- default:
+- ret = BLK_STS_NOTSUPP;
+- break;
+- }
+-
+- if (ret == BLK_STS_OK)
+- trace_nullb_zone_op(cmd, zone_no, zone->cond);
+-
+- null_unlock_zone(dev, zone_no);
+-
+- return ret;
+-}
+-
+-blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
+- sector_t sector, sector_t nr_sectors)
+-{
+- struct nullb_device *dev = cmd->nq->dev;
+- unsigned int zno = null_zone_no(dev, sector);
+- blk_status_t sts;
+-
+- switch (op) {
+- case REQ_OP_WRITE:
+- sts = null_zone_write(cmd, sector, nr_sectors, false);
+- break;
+- case REQ_OP_ZONE_APPEND:
+- sts = null_zone_write(cmd, sector, nr_sectors, true);
+- break;
+- case REQ_OP_ZONE_RESET:
+- case REQ_OP_ZONE_RESET_ALL:
+- case REQ_OP_ZONE_OPEN:
+- case REQ_OP_ZONE_CLOSE:
+- case REQ_OP_ZONE_FINISH:
+- sts = null_zone_mgmt(cmd, op, sector);
+- break;
+- default:
+- null_lock_zone(dev, zno);
+- sts = null_process_cmd(cmd, op, sector, nr_sectors);
+- null_unlock_zone(dev, zno);
+- }
+-
+- return sts;
+-}
+diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
+index 39aeebc6837da..d9e41d3bbe717 100644
+--- a/drivers/block/sunvdc.c
++++ b/drivers/block/sunvdc.c
+@@ -984,6 +984,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+ print_version();
+
+ hp = mdesc_grab();
++ if (!hp)
++ return -ENODEV;
+
+ err = -ENODEV;
+ if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
+diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
+index c715d4681a0b8..4ae49eae45869 100644
+--- a/drivers/clk/Kconfig
++++ b/drivers/clk/Kconfig
+@@ -79,7 +79,7 @@ config COMMON_CLK_RK808
+ config COMMON_CLK_HI655X
+ tristate "Clock driver for Hi655x" if EXPERT
+ depends on (MFD_HI655X_PMIC || COMPILE_TEST)
+- depends on REGMAP
++ select REGMAP
+ default MFD_HI655X_PMIC
+ help
+ This driver supports the hi655x PMIC clock. This
+diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
+index 4a031c62f92a1..5098639d41f12 100644
+--- a/drivers/cpuidle/cpuidle-psci-domain.c
++++ b/drivers/cpuidle/cpuidle-psci-domain.c
+@@ -182,7 +182,8 @@ static void psci_pd_remove(void)
+ struct psci_pd_provider *pd_provider, *it;
+ struct generic_pm_domain *genpd;
+
+- list_for_each_entry_safe(pd_provider, it, &psci_pd_providers, link) {
++ list_for_each_entry_safe_reverse(pd_provider, it,
++ &psci_pd_providers, link) {
+ of_genpd_del_provider(pd_provider->node);
+
+ genpd = of_genpd_remove_last(pd_provider->node);
+diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
+index 9e6504592646e..300ba2991936b 100644
+--- a/drivers/firmware/xilinx/zynqmp.c
++++ b/drivers/firmware/xilinx/zynqmp.c
+@@ -171,7 +171,7 @@ static int zynqmp_pm_feature(u32 api_id)
+ }
+
+ /* Add new entry if not present */
+- feature_data = kmalloc(sizeof(*feature_data), GFP_KERNEL);
++ feature_data = kmalloc(sizeof(*feature_data), GFP_ATOMIC);
+ if (!feature_data)
+ return -ENOMEM;
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+index 159be13ef20bb..2c19b3775179b 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+@@ -528,16 +528,13 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
+ struct kfd_event_waiter *event_waiters;
+ uint32_t i;
+
+- event_waiters = kmalloc_array(num_events,
+- sizeof(struct kfd_event_waiter),
+- GFP_KERNEL);
++ event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
++ GFP_KERNEL);
+ if (!event_waiters)
+ return NULL;
+
+- for (i = 0; (event_waiters) && (i < num_events) ; i++) {
++ for (i = 0; i < num_events; i++)
+ init_wait(&event_waiters[i].wait);
+- event_waiters[i].activated = false;
+- }
+
+ return event_waiters;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+index e427f4ffa0807..e5b1002d7f3f0 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+@@ -1868,7 +1868,10 @@ static unsigned int CalculateVMAndRowBytes(
+ }
+
+ if (SurfaceTiling == dm_sw_linear) {
+- *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
++ if (PTEBufferSizeInRequests == 0)
++ *dpte_row_height = 1;
++ else
++ *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else if (ScanDirection != dm_vert) {
+diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
+index c56656a95cf99..b7bb5610dfe21 100644
+--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
++++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
+@@ -614,11 +614,14 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+ int ret;
+
+ if (obj->import_attach) {
+- /* Drop the reference drm_gem_mmap_obj() acquired.*/
+- drm_gem_object_put(obj);
+ vma->vm_private_data = NULL;
++ ret = dma_buf_mmap(obj->dma_buf, vma, 0);
++
++ /* Drop the reference drm_gem_mmap_obj() acquired.*/
++ if (!ret)
++ drm_gem_object_put(obj);
+
+- return dma_buf_mmap(obj->dma_buf, vma, 0);
++ return ret;
+ }
+
+ shmem = to_drm_gem_shmem_obj(obj);
+diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
+index 69b2e5509d678..de67b2745258f 100644
+--- a/drivers/gpu/drm/i915/gt/intel_ring.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring.c
+@@ -108,7 +108,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+ struct i915_vma *vma;
+
+ obj = ERR_PTR(-ENODEV);
+- if (i915_ggtt_has_aperture(ggtt))
++ if (i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
+ obj = i915_gem_object_create_stolen(i915, size);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, size);
+diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
+index c4c2d24dc5094..0532a5069c04b 100644
+--- a/drivers/gpu/drm/i915/i915_active.c
++++ b/drivers/gpu/drm/i915/i915_active.c
+@@ -432,8 +432,7 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
+ * we can use it to substitute for the pending idle-barrer
+ * request that we want to emit on the kernel_context.
+ */
+- __active_del_barrier(ref, node_from_active(active));
+- return true;
++ return __active_del_barrier(ref, node_from_active(active));
+ }
+
+ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+@@ -446,16 +445,19 @@ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+ if (err)
+ return err;
+
+- active = active_instance(ref, idx);
+- if (!active) {
+- err = -ENOMEM;
+- goto out;
+- }
++ do {
++ active = active_instance(ref, idx);
++ if (!active) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ if (replace_barrier(ref, active)) {
++ RCU_INIT_POINTER(active->fence, NULL);
++ atomic_dec(&ref->count);
++ }
++ } while (unlikely(is_barrier(active)));
+
+- if (replace_barrier(ref, active)) {
+- RCU_INIT_POINTER(active->fence, NULL);
+- atomic_dec(&ref->count);
+- }
+ if (!__i915_active_fence_set(active, fence))
+ __i915_active_acquire(ref);
+
+diff --git a/drivers/gpu/drm/meson/meson_vpp.c b/drivers/gpu/drm/meson/meson_vpp.c
+index 154837688ab0d..5df1957c8e41f 100644
+--- a/drivers/gpu/drm/meson/meson_vpp.c
++++ b/drivers/gpu/drm/meson/meson_vpp.c
+@@ -100,6 +100,8 @@ void meson_vpp_init(struct meson_drm *priv)
+ priv->io_base + _REG(VPP_DOLBY_CTRL));
+ writel_relaxed(0x1020080,
+ priv->io_base + _REG(VPP_DUMMY_DATA1));
++ writel_relaxed(0x42020,
++ priv->io_base + _REG(VPP_DUMMY_DATA));
+ } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+ writel_relaxed(0xf, priv->io_base + _REG(DOLBY_PATH_CTRL));
+
+diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+index 13596961ae17f..5ff856ef7d88c 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+@@ -236,7 +236,7 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+ if (pm_runtime_active(pfdev->dev))
+ mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
+
+- pm_runtime_put_sync_autosuspend(pfdev->dev);
++ pm_runtime_put_autosuspend(pfdev->dev);
+ }
+
+ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index 5f9ec1d1464a2..524d6d712e724 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -258,6 +258,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
+ {
+ struct hid_report *report;
+ struct hid_field *field;
++ unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
+ unsigned int usages;
+ unsigned int offset;
+ unsigned int i;
+@@ -288,8 +289,11 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
+ offset = report->size;
+ report->size += parser->global.report_size * parser->global.report_count;
+
++ if (parser->device->ll_driver->max_buffer_size)
++ max_buffer_size = parser->device->ll_driver->max_buffer_size;
++
+ /* Total size check: Allow for possible report index byte */
+- if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) {
++ if (report->size > (max_buffer_size - 1) << 3) {
+ hid_err(parser->device, "report is too long\n");
+ return -1;
+ }
+@@ -1752,6 +1756,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
+ struct hid_report_enum *report_enum = hid->report_enum + type;
+ struct hid_report *report;
+ struct hid_driver *hdrv;
++ int max_buffer_size = HID_MAX_BUFFER_SIZE;
+ unsigned int a;
+ u32 rsize, csize = size;
+ u8 *cdata = data;
+@@ -1768,10 +1773,13 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
+
+ rsize = hid_compute_report_size(report);
+
+- if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE)
+- rsize = HID_MAX_BUFFER_SIZE - 1;
+- else if (rsize > HID_MAX_BUFFER_SIZE)
+- rsize = HID_MAX_BUFFER_SIZE;
++ if (hid->ll_driver->max_buffer_size)
++ max_buffer_size = hid->ll_driver->max_buffer_size;
++
++ if (report_enum->numbered && rsize >= max_buffer_size)
++ rsize = max_buffer_size - 1;
++ else if (rsize > max_buffer_size)
++ rsize = max_buffer_size;
+
+ if (csize < rsize) {
+ dbg_hid("report %d is too short, (%d < %d)\n", report->id,
+diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
+index fc06d8bb42e0f..ba0ca652b9dab 100644
+--- a/drivers/hid/uhid.c
++++ b/drivers/hid/uhid.c
+@@ -395,6 +395,7 @@ struct hid_ll_driver uhid_hid_driver = {
+ .parse = uhid_hid_parse,
+ .raw_request = uhid_hid_raw_request,
+ .output_report = uhid_hid_output_report,
++ .max_buffer_size = UHID_DATA_MAX,
+ };
+ EXPORT_SYMBOL_GPL(uhid_hid_driver);
+
+diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
+index 9d5b019651f2d..6b84822e7d93b 100644
+--- a/drivers/hwmon/adt7475.c
++++ b/drivers/hwmon/adt7475.c
+@@ -486,10 +486,10 @@ static ssize_t temp_store(struct device *dev, struct device_attribute *attr,
+ val = (temp - val) / 1000;
+
+ if (sattr->index != 1) {
+- data->temp[HYSTERSIS][sattr->index] &= 0xF0;
++ data->temp[HYSTERSIS][sattr->index] &= 0x0F;
+ data->temp[HYSTERSIS][sattr->index] |= (val & 0xF) << 4;
+ } else {
+- data->temp[HYSTERSIS][sattr->index] &= 0x0F;
++ data->temp[HYSTERSIS][sattr->index] &= 0xF0;
+ data->temp[HYSTERSIS][sattr->index] |= (val & 0xF);
+ }
+
+@@ -554,11 +554,11 @@ static ssize_t temp_st_show(struct device *dev, struct device_attribute *attr,
+ val = data->enh_acoustics[0] & 0xf;
+ break;
+ case 1:
+- val = (data->enh_acoustics[1] >> 4) & 0xf;
++ val = data->enh_acoustics[1] & 0xf;
+ break;
+ case 2:
+ default:
+- val = data->enh_acoustics[1] & 0xf;
++ val = (data->enh_acoustics[1] >> 4) & 0xf;
+ break;
+ }
+
+diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
+index d3c98115042b5..836e7579e166a 100644
+--- a/drivers/hwmon/ina3221.c
++++ b/drivers/hwmon/ina3221.c
+@@ -772,7 +772,7 @@ static int ina3221_probe_child_from_dt(struct device *dev,
+ return ret;
+ } else if (val > INA3221_CHANNEL3) {
+ dev_err(dev, "invalid reg %d of %pOFn\n", val, child);
+- return ret;
++ return -EINVAL;
+ }
+
+ input = &ina->inputs[val];
+diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c
+index c7b373ba92f21..d1b2e936546fd 100644
+--- a/drivers/hwmon/pmbus/adm1266.c
++++ b/drivers/hwmon/pmbus/adm1266.c
+@@ -301,6 +301,7 @@ static int adm1266_config_gpio(struct adm1266_data *data)
+ data->gc.label = name;
+ data->gc.parent = &data->client->dev;
+ data->gc.owner = THIS_MODULE;
++ data->gc.can_sleep = true;
+ data->gc.base = -1;
+ data->gc.names = data->gpio_names;
+ data->gc.ngpio = ARRAY_SIZE(data->gpio_names);
+diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
+index f8017993e2b4d..9e26cc084a176 100644
+--- a/drivers/hwmon/pmbus/ucd9000.c
++++ b/drivers/hwmon/pmbus/ucd9000.c
+@@ -7,6 +7,7 @@
+ */
+
+ #include <linux/debugfs.h>
++#include <linux/delay.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/of_device.h>
+@@ -16,6 +17,7 @@
+ #include <linux/i2c.h>
+ #include <linux/pmbus.h>
+ #include <linux/gpio/driver.h>
++#include <linux/timekeeping.h>
+ #include "pmbus.h"
+
+ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd90320, ucd9090,
+@@ -65,6 +67,7 @@ struct ucd9000_data {
+ struct gpio_chip gpio;
+ #endif
+ struct dentry *debugfs;
++ ktime_t write_time;
+ };
+ #define to_ucd9000_data(_info) container_of(_info, struct ucd9000_data, info)
+
+@@ -73,6 +76,73 @@ struct ucd9000_debugfs_entry {
+ u8 index;
+ };
+
++/*
++ * It has been observed that the UCD90320 randomly fails register access when
++ * doing another access right on the back of a register write. To mitigate this
++ * make sure that there is a minimum delay between a write access and the
++ * following access. The 250us is based on experimental data. At a delay of
++ * 200us the issue seems to go away. Add a bit of extra margin to allow for
++ * system to system differences.
++ */
++#define UCD90320_WAIT_DELAY_US 250
++
++static inline void ucd90320_wait(const struct ucd9000_data *data)
++{
++ s64 delta = ktime_us_delta(ktime_get(), data->write_time);
++
++ if (delta < UCD90320_WAIT_DELAY_US)
++ udelay(UCD90320_WAIT_DELAY_US - delta);
++}
++
++static int ucd90320_read_word_data(struct i2c_client *client, int page,
++ int phase, int reg)
++{
++ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++ struct ucd9000_data *data = to_ucd9000_data(info);
++
++ if (reg >= PMBUS_VIRT_BASE)
++ return -ENXIO;
++
++ ucd90320_wait(data);
++ return pmbus_read_word_data(client, page, phase, reg);
++}
++
++static int ucd90320_read_byte_data(struct i2c_client *client, int page, int reg)
++{
++ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++ struct ucd9000_data *data = to_ucd9000_data(info);
++
++ ucd90320_wait(data);
++ return pmbus_read_byte_data(client, page, reg);
++}
++
++static int ucd90320_write_word_data(struct i2c_client *client, int page,
++ int reg, u16 word)
++{
++ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++ struct ucd9000_data *data = to_ucd9000_data(info);
++ int ret;
++
++ ucd90320_wait(data);
++ ret = pmbus_write_word_data(client, page, reg, word);
++ data->write_time = ktime_get();
++
++ return ret;
++}
++
++static int ucd90320_write_byte(struct i2c_client *client, int page, u8 value)
++{
++ const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++ struct ucd9000_data *data = to_ucd9000_data(info);
++ int ret;
++
++ ucd90320_wait(data);
++ ret = pmbus_write_byte(client, page, value);
++ data->write_time = ktime_get();
++
++ return ret;
++}
++
+ static int ucd9000_get_fan_config(struct i2c_client *client, int fan)
+ {
+ int fan_config = 0;
+@@ -598,6 +668,11 @@ static int ucd9000_probe(struct i2c_client *client)
+ info->read_byte_data = ucd9000_read_byte_data;
+ info->func[0] |= PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12
+ | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN34;
++ } else if (mid->driver_data == ucd90320) {
++ info->read_byte_data = ucd90320_read_byte_data;
++ info->read_word_data = ucd90320_read_word_data;
++ info->write_byte = ucd90320_write_byte;
++ info->write_word_data = ucd90320_write_word_data;
+ }
+
+ ucd9000_probe_gpio(client, mid, data);
+diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c
+index 47bbe47e062fd..7d5f7441aceb1 100644
+--- a/drivers/hwmon/tmp513.c
++++ b/drivers/hwmon/tmp513.c
+@@ -758,7 +758,7 @@ static int tmp51x_probe(struct i2c_client *client)
+ static struct i2c_driver tmp51x_driver = {
+ .driver = {
+ .name = "tmp51x",
+- .of_match_table = of_match_ptr(tmp51x_of_match),
++ .of_match_table = tmp51x_of_match,
+ },
+ .probe_new = tmp51x_probe,
+ .id_table = tmp51x_id,
+diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
+index f2a5af239c956..f5d3cf86753f7 100644
+--- a/drivers/hwmon/xgene-hwmon.c
++++ b/drivers/hwmon/xgene-hwmon.c
+@@ -768,6 +768,7 @@ static int xgene_hwmon_remove(struct platform_device *pdev)
+ {
+ struct xgene_hwmon_dev *ctx = platform_get_drvdata(pdev);
+
++ cancel_work_sync(&ctx->workq);
+ hwmon_device_unregister(ctx->hwmon_dev);
+ kfifo_free(&ctx->async_msg_fifo);
+ if (acpi_disabled)
+diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
+index ceb6cdc20484e..7db6d0fc6ec2e 100644
+--- a/drivers/interconnect/core.c
++++ b/drivers/interconnect/core.c
+@@ -850,6 +850,10 @@ void icc_node_destroy(int id)
+
+ mutex_unlock(&icc_lock);
+
++ if (!node)
++ return;
++
++ kfree(node->links);
+ kfree(node);
+ }
+ EXPORT_SYMBOL_GPL(icc_node_destroy);
+diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c
+index 21666d705e372..dcf9e4d4ee6b8 100644
+--- a/drivers/media/i2c/m5mols/m5mols_core.c
++++ b/drivers/media/i2c/m5mols/m5mols_core.c
+@@ -488,7 +488,7 @@ static enum m5mols_restype __find_restype(u32 code)
+ do {
+ if (code == m5mols_default_ffmt[type].code)
+ return type;
+- } while (type++ != SIZE_DEFAULT_FFMT);
++ } while (++type != SIZE_DEFAULT_FFMT);
+
+ return 0;
+ }
+diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
+index af85b32c6c1c8..c468f9a02ef6b 100644
+--- a/drivers/mmc/host/atmel-mci.c
++++ b/drivers/mmc/host/atmel-mci.c
+@@ -1818,7 +1818,6 @@ static void atmci_tasklet_func(unsigned long priv)
+ atmci_writel(host, ATMCI_IER, ATMCI_NOTBUSY);
+ state = STATE_WAITING_NOTBUSY;
+ } else if (host->mrq->stop) {
+- atmci_writel(host, ATMCI_IER, ATMCI_CMDRDY);
+ atmci_send_stop_cmd(host, data);
+ state = STATE_SENDING_STOP;
+ } else {
+@@ -1851,8 +1850,6 @@ static void atmci_tasklet_func(unsigned long priv)
+ * command to send.
+ */
+ if (host->mrq->stop) {
+- atmci_writel(host, ATMCI_IER,
+- ATMCI_CMDRDY);
+ atmci_send_stop_cmd(host, data);
+ state = STATE_SENDING_STOP;
+ } else {
+diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
+index 24cd6d3dc6477..bf2592774165b 100644
+--- a/drivers/mmc/host/sdhci_am654.c
++++ b/drivers/mmc/host/sdhci_am654.c
+@@ -369,7 +369,7 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg)
+ MAX_POWER_ON_TIMEOUT, false, host, val,
+ reg);
+ if (ret)
+- dev_warn(mmc_dev(host->mmc), "Power on failed\n");
++ dev_info(mmc_dev(host->mmc), "Power on failed\n");
+ }
+ }
+
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 371b345635e62..a253476a52b01 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -2734,7 +2734,7 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port)
+ return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
+ else if (chip->info->ops->set_max_frame_size)
+ return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
+- return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
++ return ETH_DATA_LEN;
+ }
+
+ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+@@ -2742,6 +2742,17 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ struct mv88e6xxx_chip *chip = ds->priv;
+ int ret = 0;
+
++ /* For families where we don't know how to alter the MTU,
++ * just accept any value up to ETH_DATA_LEN
++ */
++ if (!chip->info->ops->port_set_jumbo_size &&
++ !chip->info->ops->set_max_frame_size) {
++ if (new_mtu > ETH_DATA_LEN)
++ return -EINVAL;
++
++ return 0;
++ }
++
+ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+ new_mtu += EDSA_HLEN;
+
+@@ -2750,9 +2761,6 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
+ else if (chip->info->ops->set_max_frame_size)
+ ret = chip->info->ops->set_max_frame_size(chip, new_mtu);
+- else
+- if (new_mtu > 1522)
+- ret = -EINVAL;
+ mv88e6xxx_reg_unlock(chip);
+
+ return ret;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 9e8a20a94862f..76481ff7074ba 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -14851,6 +14851,7 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
+ int err;
+ int v_idx;
+
++ pci_set_drvdata(pf->pdev, pf);
+ pci_save_state(pf->pdev);
+
+ /* set up periodic task facility */
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
+index 59963b901be0f..e0790df700e2c 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
+@@ -169,8 +169,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+ }
+ netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+- ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+-
+ ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+ if (err)
+@@ -185,6 +183,8 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+ if (err)
+ return err;
+ }
++ ice_qvec_dis_irq(vsi, rx_ring, q_vector);
++
+ err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
+ if (err)
+ return err;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
+index d2f5855b2ea79..895b6f0a39841 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
+@@ -4986,6 +4986,11 @@ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn,
+
+ num_vports = p_hwfn->qm_info.num_vports;
+
++ if (num_vports < 2) {
++ DP_NOTICE(p_hwfn, "Unexpected num_vports: %d\n", num_vports);
++ return -EINVAL;
++ }
++
+ /* Accounting for the vports which are configured for WFQ explicitly */
+ for (i = 0; i < num_vports; i++) {
+ u32 tmp_speed;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+index 3e3192a3ad9b7..fdbd5f07a1857 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+@@ -422,7 +422,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time,
+ if (p_time->hour > 23)
+ p_time->hour = 0;
+ if (p_time->min > 59)
+- p_time->hour = 0;
++ p_time->min = 0;
+ if (p_time->msec > 999)
+ p_time->msec = 0;
+ if (p_time->usec > 999)
+diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
+index 01ea0d6f88193..934a4b54784b8 100644
+--- a/drivers/net/ethernet/sun/ldmvsw.c
++++ b/drivers/net/ethernet/sun/ldmvsw.c
+@@ -290,6 +290,9 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+
+ hp = mdesc_grab();
+
++ if (!hp)
++ return -ENODEV;
++
+ rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
+ err = -ENODEV;
+ if (!rmac) {
+diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
+index 96b883f965f63..b6c03adf1e762 100644
+--- a/drivers/net/ethernet/sun/sunvnet.c
++++ b/drivers/net/ethernet/sun/sunvnet.c
+@@ -431,6 +431,9 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+
+ hp = mdesc_grab();
+
++ if (!hp)
++ return -ENODEV;
++
+ vp = vnet_find_parent(hp, vdev->mp, vdev);
+ if (IS_ERR(vp)) {
+ pr_err("Cannot find port parent vnet\n");
+diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c
+index 943d26cbf39f5..71712ea25403d 100644
+--- a/drivers/net/ipvlan/ipvlan_l3s.c
++++ b/drivers/net/ipvlan/ipvlan_l3s.c
+@@ -101,6 +101,7 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+ goto out;
+
+ skb->dev = addr->master->dev;
++ skb->skb_iif = skb->dev->ifindex;
+ len = skb->len + ETH_HLEN;
+ ipvlan_count_rx(addr->master, len, true, false);
+ out:
+diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
+index caf7291ffaf83..b67de3f9ef186 100644
+--- a/drivers/net/phy/smsc.c
++++ b/drivers/net/phy/smsc.c
+@@ -181,8 +181,11 @@ static int lan95xx_config_aneg_ext(struct phy_device *phydev)
+ static int lan87xx_read_status(struct phy_device *phydev)
+ {
+ struct smsc_phy_priv *priv = phydev->priv;
++ int err;
+
+- int err = genphy_read_status(phydev);
++ err = genphy_read_status(phydev);
++ if (err)
++ return err;
+
+ if (!phydev->link && priv->energy_enable) {
+ /* Disable EDPD to wake up PHY */
+diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
+index 378a12ae2d957..fb1389bd09392 100644
+--- a/drivers/net/usb/smsc75xx.c
++++ b/drivers/net/usb/smsc75xx.c
+@@ -2199,6 +2199,13 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ size = (rx_cmd_a & RX_CMD_A_LEN) - RXW_PADDING;
+ align_count = (4 - ((size + RXW_PADDING) % 4)) % 4;
+
++ if (unlikely(size > skb->len)) {
++ netif_dbg(dev, rx_err, dev->net,
++ "size err rx_cmd_a=0x%08x\n",
++ rx_cmd_a);
++ return 0;
++ }
++
+ if (unlikely(rx_cmd_a & RX_CMD_A_RED)) {
+ netif_dbg(dev, rx_err, dev->net,
+ "Error rx_cmd_a=0x%08x\n", rx_cmd_a);
+diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
+index 57b07446bb768..68eb1253f888f 100644
+--- a/drivers/nfc/pn533/usb.c
++++ b/drivers/nfc/pn533/usb.c
+@@ -175,6 +175,7 @@ static int pn533_usb_send_frame(struct pn533 *dev,
+ print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1,
+ out->data, out->len, false);
+
++ arg.phy = phy;
+ init_completion(&arg.done);
+ cntx = phy->out_urb->context;
+ phy->out_urb->context = &arg;
+diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c
+index 5d74c674368a5..8ccf5a86ad1bb 100644
+--- a/drivers/nfc/st-nci/ndlc.c
++++ b/drivers/nfc/st-nci/ndlc.c
+@@ -286,13 +286,15 @@ EXPORT_SYMBOL(ndlc_probe);
+
+ void ndlc_remove(struct llt_ndlc *ndlc)
+ {
+- st_nci_remove(ndlc->ndev);
+-
+ /* cancel timers */
+ del_timer_sync(&ndlc->t1_timer);
+ del_timer_sync(&ndlc->t2_timer);
+ ndlc->t2_active = false;
+ ndlc->t1_active = false;
++ /* cancel work */
++ cancel_work_sync(&ndlc->sm_work);
++
++ st_nci_remove(ndlc->ndev);
+
+ skb_queue_purge(&ndlc->rcv_q);
+ skb_queue_purge(&ndlc->send_q);
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index e162f1dfbafe9..a4b6aa932a8fe 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -723,16 +723,26 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+ range = page_address(ns->ctrl->discard_page);
+ }
+
+- __rq_for_each_bio(bio, req) {
+- u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
+- u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
+-
+- if (n < segments) {
+- range[n].cattr = cpu_to_le32(0);
+- range[n].nlb = cpu_to_le32(nlb);
+- range[n].slba = cpu_to_le64(slba);
++ if (queue_max_discard_segments(req->q) == 1) {
++ u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req));
++ u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9);
++
++ range[0].cattr = cpu_to_le32(0);
++ range[0].nlb = cpu_to_le32(nlb);
++ range[0].slba = cpu_to_le64(slba);
++ n = 1;
++ } else {
++ __rq_for_each_bio(bio, req) {
++ u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
++ u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
++
++ if (n < segments) {
++ range[n].cattr = cpu_to_le32(0);
++ range[n].nlb = cpu_to_le32(nlb);
++ range[n].slba = cpu_to_le64(slba);
++ }
++ n++;
+ }
+- n++;
+ }
+
+ if (WARN_ON_ONCE(n != segments)) {
+diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
+index bc88ff2912f56..a82a0796a6148 100644
+--- a/drivers/nvme/target/core.c
++++ b/drivers/nvme/target/core.c
+@@ -749,8 +749,10 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+
+ void nvmet_req_complete(struct nvmet_req *req, u16 status)
+ {
++ struct nvmet_sq *sq = req->sq;
++
+ __nvmet_req_complete(req, status);
+- percpu_ref_put(&req->sq->ref);
++ percpu_ref_put(&sq->ref);
+ }
+ EXPORT_SYMBOL_GPL(nvmet_req_complete);
+
+diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
+index 8b587fc97f7bc..c22cc20db1a74 100644
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -911,7 +911,7 @@ static int pci_pm_resume_noirq(struct device *dev)
+ pcie_pme_root_status_cleanup(pci_dev);
+
+ if (!skip_bus_pm && prev_state == PCI_D3cold)
+- pci_bridge_wait_for_secondary_bus(pci_dev);
++ pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
+
+ if (pci_has_legacy_pm_support(pci_dev))
+ return 0;
+@@ -1298,7 +1298,7 @@ static int pci_pm_runtime_resume(struct device *dev)
+ pci_pm_default_resume(pci_dev);
+
+ if (prev_state == PCI_D3cold)
+- pci_bridge_wait_for_secondary_bus(pci_dev);
++ pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
+
+ if (pm && pm->runtime_resume)
+ error = pm->runtime_resume(dev);
+diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
+index 744a2e05635b9..d37013d007b6e 100644
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -157,9 +157,6 @@ static int __init pcie_port_pm_setup(char *str)
+ }
+ __setup("pcie_port_pm=", pcie_port_pm_setup);
+
+-/* Time to wait after a reset for device to become responsive */
+-#define PCIE_RESET_READY_POLL_MS 60000
+-
+ /**
+ * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
+ * @bus: pointer to PCI bus structure to search
+@@ -1221,7 +1218,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+ return -ENOTTY;
+ }
+
+- if (delay > 1000)
++ if (delay > PCI_RESET_WAIT)
+ pci_info(dev, "not ready %dms after %s; waiting\n",
+ delay - 1, reset_type);
+
+@@ -1230,7 +1227,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+ pci_read_config_dword(dev, PCI_COMMAND, &id);
+ }
+
+- if (delay > 1000)
++ if (delay > PCI_RESET_WAIT)
+ pci_info(dev, "ready %dms after %s\n", delay - 1,
+ reset_type);
+
+@@ -4792,24 +4789,31 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
+ /**
+ * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+ * @dev: PCI bridge
++ * @reset_type: reset type in human-readable form
++ * @timeout: maximum time to wait for devices on secondary bus (milliseconds)
+ *
+ * Handle necessary delays before access to the devices on the secondary
+- * side of the bridge are permitted after D3cold to D0 transition.
++ * side of the bridge are permitted after D3cold to D0 transition
++ * or Conventional Reset.
+ *
+ * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
+ * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
+ * 4.3.2.
++ *
++ * Return 0 on success or -ENOTTY if the first device on the secondary bus
++ * failed to become accessible.
+ */
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++ int timeout)
+ {
+ struct pci_dev *child;
+ int delay;
+
+ if (pci_dev_is_disconnected(dev))
+- return;
++ return 0;
+
+ if (!pci_is_bridge(dev))
+- return;
++ return 0;
+
+ down_read(&pci_bus_sem);
+
+@@ -4821,14 +4825,14 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ */
+ if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
+ up_read(&pci_bus_sem);
+- return;
++ return 0;
+ }
+
+ /* Take d3cold_delay requirements into account */
+ delay = pci_bus_max_d3cold_delay(dev->subordinate);
+ if (!delay) {
+ up_read(&pci_bus_sem);
+- return;
++ return 0;
+ }
+
+ child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
+@@ -4837,14 +4841,12 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+
+ /*
+ * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
+- * accessing the device after reset (that is 1000 ms + 100 ms). In
+- * practice this should not be needed because we don't do power
+- * management for them (see pci_bridge_d3_possible()).
++ * accessing the device after reset (that is 1000 ms + 100 ms).
+ */
+ if (!pci_is_pcie(dev)) {
+ pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
+ msleep(1000 + delay);
+- return;
++ return 0;
+ }
+
+ /*
+@@ -4861,11 +4863,11 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ * configuration requests if we only wait for 100 ms (see
+ * https://bugzilla.kernel.org/show_bug.cgi?id=203885).
+ *
+- * Therefore we wait for 100 ms and check for the device presence.
+- * If it is still not present give it an additional 100 ms.
++ * Therefore we wait for 100 ms and check for the device presence
++ * until the timeout expires.
+ */
+ if (!pcie_downstream_port(dev))
+- return;
++ return 0;
+
+ if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
+ pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
+@@ -4876,14 +4878,11 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ if (!pcie_wait_for_link_delay(dev, true, delay)) {
+ /* Did not train, no need to wait any further */
+ pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
+- return;
++ return -ENOTTY;
+ }
+ }
+
+- if (!pci_device_is_present(child)) {
+- pci_dbg(child, "waiting additional %d ms to become accessible\n", delay);
+- msleep(delay);
+- }
++ return pci_dev_wait(child, reset_type, timeout - delay);
+ }
+
+ void pci_reset_secondary_bus(struct pci_dev *dev)
+@@ -4902,15 +4901,6 @@ void pci_reset_secondary_bus(struct pci_dev *dev)
+
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+-
+- /*
+- * Trhfa for conventional PCI is 2^25 clock cycles.
+- * Assuming a minimum 33MHz clock this results in a 1s
+- * delay before we can consider subordinate devices to
+- * be re-initialized. PCIe has some ways to shorten this,
+- * but we don't make use of them yet.
+- */
+- ssleep(1);
+ }
+
+ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
+@@ -4929,7 +4919,8 @@ int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
+ {
+ pcibios_reset_secondary_bus(dev);
+
+- return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
++ return pci_bridge_wait_for_secondary_bus(dev, "bus reset",
++ PCIE_RESET_READY_POLL_MS);
+ }
+ EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
+
+diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
+index 9197d7362731e..72436000ff252 100644
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -47,6 +47,19 @@ int pci_bus_error_reset(struct pci_dev *dev);
+ #define PCI_PM_D3HOT_WAIT 10 /* msec */
+ #define PCI_PM_D3COLD_WAIT 100 /* msec */
+
++/*
++ * Following exit from Conventional Reset, devices must be ready within 1 sec
++ * (PCIe r6.0 sec 6.6.1). A D3cold to D0 transition implies a Conventional
++ * Reset (PCIe r6.0 sec 5.8).
++ */
++#define PCI_RESET_WAIT 1000 /* msec */
++/*
++ * Devices may extend the 1 sec period through Request Retry Status completions
++ * (PCIe r6.0 sec 2.3.1). The spec does not provide an upper limit, but 60 sec
++ * ought to be enough for any device to become responsive.
++ */
++#define PCIE_RESET_READY_POLL_MS 60000 /* msec */
++
+ /**
+ * struct pci_platform_pm_ops - Firmware PM callbacks
+ *
+@@ -108,7 +121,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev);
+ void pci_free_cap_save_buffers(struct pci_dev *dev);
+ bool pci_bridge_d3_possible(struct pci_dev *dev);
+ void pci_bridge_d3_update(struct pci_dev *dev);
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++ int timeout);
+
+ static inline void pci_wakeup_event(struct pci_dev *dev)
+ {
+diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
+index c556e7beafe38..f21d64ae4ffcc 100644
+--- a/drivers/pci/pcie/dpc.c
++++ b/drivers/pci/pcie/dpc.c
+@@ -170,8 +170,8 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
+ pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+ PCI_EXP_DPC_STATUS_TRIGGER);
+
+- if (!pcie_wait_for_link(pdev, true)) {
+- pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n");
++ if (pci_bridge_wait_for_secondary_bus(pdev, "DPC",
++ PCIE_RESET_READY_POLL_MS)) {
+ clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+ ret = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index fae0323242103..18321cf9db5d6 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -322,10 +322,7 @@ static void scsi_host_dev_release(struct device *dev)
+ struct Scsi_Host *shost = dev_to_shost(dev);
+ struct device *parent = dev->parent;
+
+- /* In case scsi_remove_host() has not been called. */
+- scsi_proc_hostdir_rm(shost->hostt);
+-
+- /* Wait for functions invoked through call_rcu(&shost->rcu, ...) */
++ /* Wait for functions invoked through call_rcu(&scmd->rcu, ...) */
+ rcu_barrier();
+
+ if (shost->tmf_work_q)
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
+index b58f4d9c296a3..326265fd7f91a 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
+@@ -670,7 +670,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ goto out_fail;
+ }
+ port = sas_port_alloc_num(sas_node->parent_dev);
+- if ((sas_port_add(port))) {
++ if (!port || (sas_port_add(port))) {
+ ioc_err(ioc, "failure at %s:%d/%s()!\n",
+ __FILE__, __LINE__, __func__);
+ goto out_fail;
+@@ -695,6 +695,12 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ rphy = sas_expander_alloc(port,
+ mpt3sas_port->remote_identify.device_type);
+
++ if (!rphy) {
++ ioc_err(ioc, "failure at %s:%d/%s()!\n",
++ __FILE__, __LINE__, __func__);
++ goto out_delete_port;
++ }
++
+ rphy->identify = mpt3sas_port->remote_identify;
+
+ if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) {
+@@ -714,6 +720,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ __FILE__, __LINE__, __func__);
+ sas_rphy_free(rphy);
+ rphy = NULL;
++ goto out_delete_port;
+ }
+
+ if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) {
+@@ -740,7 +747,10 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ rphy_to_expander_device(rphy));
+ return mpt3sas_port;
+
+- out_fail:
++out_delete_port:
++ sas_port_delete(port);
++
++out_fail:
+ list_for_each_entry_safe(mpt3sas_phy, next, &mpt3sas_port->phy_list,
+ port_siblings)
+ list_del(&mpt3sas_phy->port_siblings);
+diff --git a/drivers/tty/serial/8250/8250_em.c b/drivers/tty/serial/8250/8250_em.c
+index f8e99995eee91..d94c3811a8f7a 100644
+--- a/drivers/tty/serial/8250/8250_em.c
++++ b/drivers/tty/serial/8250/8250_em.c
+@@ -106,8 +106,8 @@ static int serial8250_em_probe(struct platform_device *pdev)
+ memset(&up, 0, sizeof(up));
+ up.port.mapbase = regs->start;
+ up.port.irq = irq;
+- up.port.type = PORT_UNKNOWN;
+- up.port.flags = UPF_BOOT_AUTOCONF | UPF_FIXED_PORT | UPF_IOREMAP;
++ up.port.type = PORT_16750;
++ up.port.flags = UPF_FIXED_PORT | UPF_IOREMAP | UPF_FIXED_TYPE;
+ up.port.dev = &pdev->dev;
+ up.port.private_data = priv;
+
+diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
+index 9cb0e8673f826..32cce52800a73 100644
+--- a/drivers/tty/serial/fsl_lpuart.c
++++ b/drivers/tty/serial/fsl_lpuart.c
+@@ -2159,9 +2159,15 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios,
+ /* update the per-port timeout */
+ uart_update_timeout(port, termios->c_cflag, baud);
+
+- /* wait transmit engin complete */
+- lpuart32_write(&sport->port, 0, UARTMODIR);
+- lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC);
++ /*
++ * LPUART Transmission Complete Flag may never be set while queuing a break
++ * character, so skip waiting for transmission complete when UARTCTRL_SBK is
++ * asserted.
++ */
++ if (!(old_ctrl & UARTCTRL_SBK)) {
++ lpuart32_write(&sport->port, 0, UARTMODIR);
++ lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC);
++ }
+
+ /* disable transmit and receive */
+ lpuart32_write(&sport->port, old_ctrl & ~(UARTCTRL_TE | UARTCTRL_RE),
+diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
+index 3feb6e40d56d8..ef8a4c5fc6875 100644
+--- a/drivers/video/fbdev/stifb.c
++++ b/drivers/video/fbdev/stifb.c
+@@ -921,6 +921,28 @@ SETUP_HCRX(struct stifb_info *fb)
+
+ /* ------------------- driver specific functions --------------------------- */
+
++static int
++stifb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
++{
++ struct stifb_info *fb = container_of(info, struct stifb_info, info);
++
++ if (var->xres != fb->info.var.xres ||
++ var->yres != fb->info.var.yres ||
++ var->bits_per_pixel != fb->info.var.bits_per_pixel)
++ return -EINVAL;
++
++ var->xres_virtual = var->xres;
++ var->yres_virtual = var->yres;
++ var->xoffset = 0;
++ var->yoffset = 0;
++ var->grayscale = fb->info.var.grayscale;
++ var->red.length = fb->info.var.red.length;
++ var->green.length = fb->info.var.green.length;
++ var->blue.length = fb->info.var.blue.length;
++
++ return 0;
++}
++
+ static int
+ stifb_setcolreg(u_int regno, u_int red, u_int green,
+ u_int blue, u_int transp, struct fb_info *info)
+@@ -1145,6 +1167,7 @@ stifb_init_display(struct stifb_info *fb)
+
+ static const struct fb_ops stifb_ops = {
+ .owner = THIS_MODULE,
++ .fb_check_var = stifb_check_var,
+ .fb_setcolreg = stifb_setcolreg,
+ .fb_blank = stifb_blank,
+ .fb_fillrect = stifb_fillrect,
+@@ -1164,6 +1187,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
+ struct stifb_info *fb;
+ struct fb_info *info;
+ unsigned long sti_rom_address;
++ char modestr[32];
+ char *dev_name;
+ int bpp, xres, yres;
+
+@@ -1342,6 +1366,9 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
+ info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
+ info->pseudo_palette = &fb->pseudo_palette;
+
++ scnprintf(modestr, sizeof(modestr), "%dx%d-%d", xres, yres, bpp);
++ fb_find_mode(&info->var, info, modestr, NULL, 0, NULL, bpp);
++
+ /* This has to be done !!! */
+ if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0))
+ goto out_err1;
+diff --git a/fs/attr.c b/fs/attr.c
+index 848ffe6e3c24b..326a0db3296d7 100644
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -18,6 +18,65 @@
+ #include <linux/evm.h>
+ #include <linux/ima.h>
+
++#include "internal.h"
++
++/**
++ * setattr_should_drop_sgid - determine whether the setgid bit needs to be
++ * removed
++ * @inode: inode to check
++ *
++ * This function determines whether the setgid bit needs to be removed.
++ * We retain backwards compatibility and require setgid bit to be removed
++ * unconditionally if S_IXGRP is set. Otherwise we have the exact same
++ * requirements as setattr_prepare() and setattr_copy().
++ *
++ * Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise.
++ */
++int setattr_should_drop_sgid(const struct inode *inode)
++{
++ umode_t mode = inode->i_mode;
++
++ if (!(mode & S_ISGID))
++ return 0;
++ if (mode & S_IXGRP)
++ return ATTR_KILL_SGID;
++ if (!in_group_or_capable(inode, inode->i_gid))
++ return ATTR_KILL_SGID;
++ return 0;
++}
++
++/**
++ * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to
++ * be dropped
++ * @inode: inode to check
++ *
++ * This function determines whether the set{g,u}id bits need to be removed.
++ * If the setuid bit needs to be removed ATTR_KILL_SUID is returned. If the
++ * setgid bit needs to be removed ATTR_KILL_SGID is returned. If both
++ * set{g,u}id bits need to be removed the corresponding mask of both flags is
++ * returned.
++ *
++ * Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits
++ * to remove, 0 otherwise.
++ */
++int setattr_should_drop_suidgid(struct inode *inode)
++{
++ umode_t mode = inode->i_mode;
++ int kill = 0;
++
++ /* suid always must be killed */
++ if (unlikely(mode & S_ISUID))
++ kill = ATTR_KILL_SUID;
++
++ kill |= setattr_should_drop_sgid(inode);
++
++ if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
++ return kill;
++
++ return 0;
++}
++EXPORT_SYMBOL(setattr_should_drop_suidgid);
++
+ static bool chown_ok(const struct inode *inode, kuid_t uid)
+ {
+ if (uid_eq(current_fsuid(), inode->i_uid) &&
+@@ -90,9 +149,8 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr)
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+ /* Also check the setgid bit! */
+- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
+- inode->i_gid) &&
+- !capable_wrt_inode_uidgid(inode, CAP_FSETID))
++ if (!in_group_or_capable(inode, (ia_valid & ATTR_GID) ?
++ attr->ia_gid : inode->i_gid))
+ attr->ia_mode &= ~S_ISGID;
+ }
+
+@@ -193,9 +251,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
+ inode->i_ctime = attr->ia_ctime;
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = attr->ia_mode;
+-
+- if (!in_group_p(inode->i_gid) &&
+- !capable_wrt_inode_uidgid(inode, CAP_FSETID))
++ if (!in_group_or_capable(inode, inode->i_gid))
+ mode &= ~S_ISGID;
+ inode->i_mode = mode;
+ }
+@@ -297,7 +353,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
+ }
+ }
+ if (ia_valid & ATTR_KILL_SGID) {
+- if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
++ if (mode & S_ISGID) {
+ if (!(ia_valid & ATTR_MODE)) {
+ ia_valid = attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode = inode->i_mode;
+diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
+index 97cd4df040608..e11818801148a 100644
+--- a/fs/cifs/smb2inode.c
++++ b/fs/cifs/smb2inode.c
+@@ -236,15 +236,32 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
+ size[0] = 8; /* sizeof __le64 */
+ data[0] = ptr;
+
+- rc = SMB2_set_info_init(tcon, server,
+- &rqst[num_rqst], COMPOUND_FID,
+- COMPOUND_FID, current->tgid,
+- FILE_END_OF_FILE_INFORMATION,
+- SMB2_O_INFO_FILE, 0, data, size);
++ if (cfile) {
++ rc = SMB2_set_info_init(tcon, server,
++ &rqst[num_rqst],
++ cfile->fid.persistent_fid,
++ cfile->fid.volatile_fid,
++ current->tgid,
++ FILE_END_OF_FILE_INFORMATION,
++ SMB2_O_INFO_FILE, 0,
++ data, size);
++ } else {
++ rc = SMB2_set_info_init(tcon, server,
++ &rqst[num_rqst],
++ COMPOUND_FID,
++ COMPOUND_FID,
++ current->tgid,
++ FILE_END_OF_FILE_INFORMATION,
++ SMB2_O_INFO_FILE, 0,
++ data, size);
++ if (!rc) {
++ smb2_set_next_command(tcon, &rqst[num_rqst]);
++ smb2_set_related(&rqst[num_rqst]);
++ }
++ }
+ if (rc)
+ goto finished;
+- smb2_set_next_command(tcon, &rqst[num_rqst]);
+- smb2_set_related(&rqst[num_rqst++]);
++ num_rqst++;
+ trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path);
+ break;
+ case SMB2_OP_SET_INFO:
+diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
+index b137006f0fd25..4409f56fc37e6 100644
+--- a/fs/cifs/transport.c
++++ b/fs/cifs/transport.c
+@@ -312,7 +312,7 @@ static int
+ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ struct smb_rqst *rqst)
+ {
+- int rc = 0;
++ int rc;
+ struct kvec *iov;
+ int n_vec;
+ unsigned int send_length = 0;
+@@ -323,6 +323,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ struct msghdr smb_msg = {};
+ __be32 rfc1002_marker;
+
++ cifs_in_send_inc(server);
+ if (cifs_rdma_enabled(server)) {
+ /* return -EAGAIN when connecting or reconnecting */
+ rc = -EAGAIN;
+@@ -331,14 +332,17 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ goto smbd_done;
+ }
+
++ rc = -EAGAIN;
+ if (ssocket == NULL)
+- return -EAGAIN;
++ goto out;
+
++ rc = -ERESTARTSYS;
+ if (fatal_signal_pending(current)) {
+ cifs_dbg(FYI, "signal pending before send request\n");
+- return -ERESTARTSYS;
++ goto out;
+ }
+
++ rc = 0;
+ /* cork the socket */
+ tcp_sock_set_cork(ssocket->sk, true);
+
+@@ -449,7 +453,8 @@ smbd_done:
+ rc);
+ else if (rc > 0)
+ rc = 0;
+-
++out:
++ cifs_in_send_dec(server);
+ return rc;
+ }
+
+@@ -826,9 +831,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
+ * I/O response may come back and free the mid entry on another thread.
+ */
+ cifs_save_when_sent(mid);
+- cifs_in_send_inc(server);
+ rc = smb_send_rqst(server, 1, rqst, flags);
+- cifs_in_send_dec(server);
+
+ if (rc < 0) {
+ revert_current_mid(server, mid->credits);
+@@ -1117,9 +1120,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ else
+ midQ[i]->callback = cifs_compound_last_callback;
+ }
+- cifs_in_send_inc(server);
+ rc = smb_send_rqst(server, num_rqst, rqst, flags);
+- cifs_in_send_dec(server);
+
+ for (i = 0; i < num_rqst; i++)
+ cifs_save_when_sent(midQ[i]);
+@@ -1356,9 +1357,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
+
+ midQ->mid_state = MID_REQUEST_SUBMITTED;
+
+- cifs_in_send_inc(server);
+ rc = smb_send(server, in_buf, len);
+- cifs_in_send_dec(server);
+ cifs_save_when_sent(midQ);
+
+ if (rc < 0)
+@@ -1495,9 +1494,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
+ }
+
+ midQ->mid_state = MID_REQUEST_SUBMITTED;
+- cifs_in_send_inc(server);
+ rc = smb_send(server, in_buf, len);
+- cifs_in_send_dec(server);
+ cifs_save_when_sent(midQ);
+
+ if (rc < 0)
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 1a654a1f3f46b..6ba185b46ba39 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4721,13 +4721,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ goto bad_inode;
+ raw_inode = ext4_raw_inode(&iloc);
+
+- if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
+- ext4_error_inode(inode, function, line, 0,
+- "iget: root inode unallocated");
+- ret = -EFSCORRUPTED;
+- goto bad_inode;
+- }
+-
+ if ((flags & EXT4_IGET_HANDLE) &&
+ (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
+ ret = -ESTALE;
+@@ -4800,11 +4793,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ * NeilBrown 1999oct15
+ */
+ if (inode->i_nlink == 0) {
+- if ((inode->i_mode == 0 ||
++ if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
+ !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
+ ino != EXT4_BOOT_LOADER_INO) {
+- /* this inode is deleted */
+- ret = -ESTALE;
++ /* this inode is deleted or unallocated */
++ if (flags & EXT4_IGET_SPECIAL) {
++ ext4_error_inode(inode, function, line, 0,
++ "iget: special inode unallocated");
++ ret = -EFSCORRUPTED;
++ } else
++ ret = -ESTALE;
+ goto bad_inode;
+ }
+ /* The only unlinked inodes we let through here have
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 1f47aeca71422..45f719c1e0023 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -3934,10 +3934,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
+ goto end_rename;
+ }
+ retval = ext4_rename_dir_prepare(handle, &old);
+- if (retval) {
+- inode_unlock(old.inode);
++ if (retval)
+ goto end_rename;
+- }
+ }
+ /*
+ * If we're renaming a file within an inline_data dir and adding or
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 60e122761352c..f3da1f2d4cb93 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -386,6 +386,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ struct inode *inode;
+ int err;
+
++ /*
++ * We have to check for this corruption early as otherwise
++ * iget_locked() could wait indefinitely for the state of our
++ * parent inode.
++ */
++ if (parent->i_ino == ea_ino) {
++ ext4_error(parent->i_sb,
++ "Parent and EA inode have the same ino %lu", ea_ino);
++ return -EFSCORRUPTED;
++ }
++
+ inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+diff --git a/fs/inode.c b/fs/inode.c
+index 9f49e0bdc2f77..7ec90788d8be9 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -1854,35 +1854,6 @@ skip_update:
+ }
+ EXPORT_SYMBOL(touch_atime);
+
+-/*
+- * The logic we want is
+- *
+- * if suid or (sgid and xgrp)
+- * remove privs
+- */
+-int should_remove_suid(struct dentry *dentry)
+-{
+- umode_t mode = d_inode(dentry)->i_mode;
+- int kill = 0;
+-
+- /* suid always must be killed */
+- if (unlikely(mode & S_ISUID))
+- kill = ATTR_KILL_SUID;
+-
+- /*
+- * sgid without any exec bits is just a mandatory locking mark; leave
+- * it alone. If some exec bits are set, it's a real sgid; kill it.
+- */
+- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+- kill |= ATTR_KILL_SGID;
+-
+- if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
+- return kill;
+-
+- return 0;
+-}
+-EXPORT_SYMBOL(should_remove_suid);
+-
+ /*
+ * Return mask of changes for notify_change() that need to be done as a
+ * response to write or truncate. Return 0 if nothing has to be changed.
+@@ -1897,7 +1868,7 @@ int dentry_needs_remove_privs(struct dentry *dentry)
+ if (IS_NOSEC(inode))
+ return 0;
+
+- mask = should_remove_suid(dentry);
++ mask = setattr_should_drop_suidgid(inode);
+ ret = security_inode_need_killpriv(dentry);
+ if (ret < 0)
+ return ret;
+@@ -2147,10 +2118,6 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
+ /* Directories are special, and always inherit S_ISGID */
+ if (S_ISDIR(mode))
+ mode |= S_ISGID;
+- else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
+- !in_group_p(inode->i_gid) &&
+- !capable_wrt_inode_uidgid(dir, CAP_FSETID))
+- mode &= ~S_ISGID;
+ } else
+ inode->i_gid = current_fsgid();
+ inode->i_mode = mode;
+@@ -2382,3 +2349,48 @@ int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
+ return 0;
+ }
+ EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
++
++/**
++ * in_group_or_capable - check whether caller is CAP_FSETID privileged
++ * @inode: inode to check
++ * @gid: the new/current gid of @inode
++ *
++ * Check wether @gid is in the caller's group list or if the caller is
++ * privileged with CAP_FSETID over @inode. This can be used to determine
++ * whether the setgid bit can be kept or must be dropped.
++ *
++ * Return: true if the caller is sufficiently privileged, false if not.
++ */
++bool in_group_or_capable(const struct inode *inode, kgid_t gid)
++{
++ if (in_group_p(gid))
++ return true;
++ if (capable_wrt_inode_uidgid(inode, CAP_FSETID))
++ return true;
++ return false;
++}
++
++/**
++ * mode_strip_sgid - handle the sgid bit for non-directories
++ * @dir: parent directory inode
++ * @mode: mode of the file to be created in @dir
++ *
++ * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
++ * raised and @dir has the S_ISGID bit raised ensure that the caller is
++ * either in the group of the parent directory or they have CAP_FSETID
++ * in their user namespace and are privileged over the parent directory.
++ * In all other cases, strip the S_ISGID bit from @mode.
++ *
++ * Return: the new mode to use for the file
++ */
++umode_t mode_strip_sgid(const struct inode *dir, umode_t mode)
++{
++ if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
++ return mode;
++ if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
++ return mode;
++ if (in_group_or_capable(dir, dir->i_gid))
++ return mode;
++ return mode & ~S_ISGID;
++}
++EXPORT_SYMBOL(mode_strip_sgid);
+diff --git a/fs/internal.h b/fs/internal.h
+index 06d313b9beecb..d5d9fcdae10c4 100644
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -149,6 +149,7 @@ extern int vfs_open(const struct path *, struct file *);
+ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
+ extern void inode_add_lru(struct inode *inode);
+ extern int dentry_needs_remove_privs(struct dentry *dentry);
++bool in_group_or_capable(const struct inode *inode, kgid_t gid);
+
+ /*
+ * fs-writeback.c
+@@ -196,3 +197,8 @@ int sb_init_dio_done_wq(struct super_block *sb);
+ */
+ int do_statx(int dfd, const char __user *filename, unsigned flags,
+ unsigned int mask, struct statx __user *buffer);
++
++/*
++ * fs/attr.c
++ */
++int setattr_should_drop_sgid(const struct inode *inode);
+diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
+index bd7d58d27bfc6..97a3c09fd96b6 100644
+--- a/fs/jffs2/file.c
++++ b/fs/jffs2/file.c
+@@ -138,19 +138,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ pgoff_t index = pos >> PAGE_SHIFT;
+- uint32_t pageofs = index << PAGE_SHIFT;
+ int ret = 0;
+
+ jffs2_dbg(1, "%s()\n", __func__);
+
+- if (pageofs > inode->i_size) {
+- /* Make new hole frag from old EOF to new page */
++ if (pos > inode->i_size) {
++ /* Make new hole frag from old EOF to new position */
+ struct jffs2_raw_inode ri;
+ struct jffs2_full_dnode *fn;
+ uint32_t alloc_len;
+
+- jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
+- (unsigned int)inode->i_size, pageofs);
++ jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new position\n",
++ (unsigned int)inode->i_size, (uint32_t)pos);
+
+ ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+ ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+@@ -170,10 +169,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ ri.mode = cpu_to_jemode(inode->i_mode);
+ ri.uid = cpu_to_je16(i_uid_read(inode));
+ ri.gid = cpu_to_je16(i_gid_read(inode));
+- ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
++ ri.isize = cpu_to_je32((uint32_t)pos);
+ ri.atime = ri.ctime = ri.mtime = cpu_to_je32(JFFS2_NOW());
+ ri.offset = cpu_to_je32(inode->i_size);
+- ri.dsize = cpu_to_je32(pageofs - inode->i_size);
++ ri.dsize = cpu_to_je32((uint32_t)pos - inode->i_size);
+ ri.csize = cpu_to_je32(0);
+ ri.compr = JFFS2_COMPR_ZERO;
+ ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
+@@ -203,7 +202,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ goto out_err;
+ }
+ jffs2_complete_reservation(c);
+- inode->i_size = pageofs;
++ inode->i_size = pos;
+ mutex_unlock(&f->sem);
+ }
+
+diff --git a/fs/namei.c b/fs/namei.c
+index 4159c140fa473..3d98db9802a77 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -2798,6 +2798,63 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
+ }
+ EXPORT_SYMBOL(unlock_rename);
+
++/**
++ * mode_strip_umask - handle vfs umask stripping
++ * @dir: parent directory of the new inode
++ * @mode: mode of the new inode to be created in @dir
++ *
++ * Umask stripping depends on whether or not the filesystem supports POSIX
++ * ACLs. If the filesystem doesn't support it umask stripping is done directly
++ * in here. If the filesystem does support POSIX ACLs umask stripping is
++ * deferred until the filesystem calls posix_acl_create().
++ *
++ * Returns: mode
++ */
++static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
++{
++ if (!IS_POSIXACL(dir))
++ mode &= ~current_umask();
++ return mode;
++}
++
++/**
++ * vfs_prepare_mode - prepare the mode to be used for a new inode
++ * @dir: parent directory of the new inode
++ * @mode: mode of the new inode
++ * @mask_perms: allowed permission by the vfs
++ * @type: type of file to be created
++ *
++ * This helper consolidates and enforces vfs restrictions on the @mode of a new
++ * object to be created.
++ *
++ * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
++ * the kernel documentation for mode_strip_umask()). Moving umask stripping
++ * after setgid stripping allows the same ordering for both non-POSIX ACL and
++ * POSIX ACL supporting filesystems.
++ *
++ * Note that it's currently valid for @type to be 0 if a directory is created.
++ * Filesystems raise that flag individually and we need to check whether each
++ * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
++ * non-zero type.
++ *
++ * Returns: mode to be passed to the filesystem
++ */
++static inline umode_t vfs_prepare_mode(const struct inode *dir, umode_t mode,
++ umode_t mask_perms, umode_t type)
++{
++ mode = mode_strip_sgid(dir, mode);
++ mode = mode_strip_umask(dir, mode);
++
++ /*
++ * Apply the vfs mandated allowed permission mask and set the type of
++ * file to be created before we call into the filesystem.
++ */
++ mode &= (mask_perms & ~S_IFMT);
++ mode |= (type & S_IFMT);
++
++ return mode;
++}
++
+ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool want_excl)
+ {
+@@ -2807,8 +2864,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+
+ if (!dir->i_op->create)
+ return -EACCES; /* shouldn't it be ENOSYS? */
+- mode &= S_IALLUGO;
+- mode |= S_IFREG;
++
++ mode = vfs_prepare_mode(dir, mode, S_IALLUGO, S_IFREG);
+ error = security_inode_create(dir, dentry, mode);
+ if (error)
+ return error;
+@@ -3072,8 +3129,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
+ if (open_flag & O_CREAT) {
+ if (open_flag & O_EXCL)
+ open_flag &= ~O_TRUNC;
+- if (!IS_POSIXACL(dir->d_inode))
+- mode &= ~current_umask();
++ mode = vfs_prepare_mode(dir->d_inode, mode, mode, mode);
+ if (likely(got_write))
+ create_error = may_o_create(&nd->path, dentry, mode);
+ else
+@@ -3286,8 +3342,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
+ child = d_alloc(dentry, &slash_name);
+ if (unlikely(!child))
+ goto out_err;
+- if (!IS_POSIXACL(dir))
+- mode &= ~current_umask();
++ mode = vfs_prepare_mode(dir, mode, mode, mode);
+ error = dir->i_op->tmpfile(dir, child, mode);
+ if (error)
+ goto out_err;
+@@ -3548,6 +3603,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+ if (!dir->i_op->mknod)
+ return -EPERM;
+
++ mode = vfs_prepare_mode(dir, mode, mode, mode);
+ error = devcgroup_inode_mknod(mode, dev);
+ if (error)
+ return error;
+@@ -3596,9 +3652,8 @@ retry:
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+- if (!IS_POSIXACL(path.dentry->d_inode))
+- mode &= ~current_umask();
+- error = security_path_mknod(&path, dentry, mode, dev);
++ error = security_path_mknod(&path, dentry,
++ mode_strip_umask(path.dentry->d_inode, mode), dev);
+ if (error)
+ goto out;
+ switch (mode & S_IFMT) {
+@@ -3646,7 +3701,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+ if (!dir->i_op->mkdir)
+ return -EPERM;
+
+- mode &= (S_IRWXUGO|S_ISVTX);
++ mode = vfs_prepare_mode(dir, mode, S_IRWXUGO | S_ISVTX, 0);
+ error = security_inode_mkdir(dir, dentry, mode);
+ if (error)
+ return error;
+@@ -3673,9 +3728,8 @@ retry:
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+- if (!IS_POSIXACL(path.dentry->d_inode))
+- mode &= ~current_umask();
+- error = security_path_mkdir(&path, dentry, mode);
++ error = security_path_mkdir(&path, dentry,
++ mode_strip_umask(path.dentry->d_inode, mode));
+ if (!error)
+ error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+ done_path_create(&path, dentry);
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index 1470b49adb2db..ca00cac5a12f7 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1994,7 +1994,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
+ }
+ }
+
+- if (file && should_remove_suid(file->f_path.dentry)) {
++ if (file && setattr_should_drop_suidgid(file_inode(file))) {
+ ret = __ocfs2_write_remove_suid(inode, di_bh);
+ if (ret) {
+ mlog_errno(ret);
+@@ -2282,7 +2282,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
+ * inode. There's also the dinode i_size state which
+ * can be lost via setattr during extending writes (we
+ * set inode->i_size at the end of a write. */
+- if (should_remove_suid(dentry)) {
++ if (setattr_should_drop_suidgid(inode)) {
+ if (meta_level == 0) {
+ ocfs2_inode_unlock_for_extent_tree(inode,
+ &di_bh,
+diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
+index 856474b0a1ae7..df1f6b7aa7979 100644
+--- a/fs/ocfs2/namei.c
++++ b/fs/ocfs2/namei.c
+@@ -198,6 +198,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
+ * callers. */
+ if (S_ISDIR(mode))
+ set_nlink(inode, 2);
++ mode = mode_strip_sgid(dir, mode);
+ inode_init_owner(inode, dir, mode);
+ status = dquot_initialize(inode);
+ if (status)
+diff --git a/fs/open.c b/fs/open.c
+index b3fbb4300fc96..1ca4b236fdbe0 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -665,10 +665,10 @@ retry_deleg:
+ newattrs.ia_valid |= ATTR_GID;
+ newattrs.ia_gid = gid;
+ }
+- if (!S_ISDIR(inode->i_mode))
+- newattrs.ia_valid |=
+- ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+ inode_lock(inode);
++ if (!S_ISDIR(inode->i_mode))
++ newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
++ setattr_should_drop_sgid(inode);
+ error = security_path_chown(path, uid, gid);
+ if (!error)
+ error = notify_change(path->dentry, &newattrs, &delegated_inode);
+diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
+index 24c7d30e41dfe..0926363179a76 100644
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -3190,7 +3190,7 @@ xfs_btree_insrec(
+ struct xfs_btree_block *block; /* btree block */
+ struct xfs_buf *bp; /* buffer for block */
+ union xfs_btree_ptr nptr; /* new block ptr */
+- struct xfs_btree_cur *ncur; /* new btree cursor */
++ struct xfs_btree_cur *ncur = NULL; /* new btree cursor */
+ union xfs_btree_key nkey; /* new block key */
+ union xfs_btree_key *lkey;
+ int optr; /* old key/record index */
+@@ -3270,7 +3270,7 @@ xfs_btree_insrec(
+ #ifdef DEBUG
+ error = xfs_btree_check_block(cur, block, level, bp);
+ if (error)
+- return error;
++ goto error0;
+ #endif
+
+ /*
+@@ -3290,7 +3290,7 @@ xfs_btree_insrec(
+ for (i = numrecs - ptr; i >= 0; i--) {
+ error = xfs_btree_debug_check_ptr(cur, pp, i, level);
+ if (error)
+- return error;
++ goto error0;
+ }
+
+ xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
+@@ -3375,6 +3375,8 @@ xfs_btree_insrec(
+ return 0;
+
+ error0:
++ if (ncur)
++ xfs_btree_del_cursor(ncur, error);
+ return error;
+ }
+
+diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
+index 7371a7f7c6529..fbab1042bc90b 100644
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -800,9 +800,6 @@ xfs_alloc_file_space(
+ quota_flag = XFS_QMOPT_RES_REGBLKS;
+ }
+
+- /*
+- * Allocate and setup the transaction.
+- */
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
+ resrtextents, 0, &tp);
+
+@@ -830,9 +827,9 @@ xfs_alloc_file_space(
+ if (error)
+ goto error0;
+
+- /*
+- * Complete the transaction
+- */
++ ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
++ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
++
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ if (error)
+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
+index 4d6bf8d4974fe..9b6c5ba5fdfb6 100644
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -94,8 +94,6 @@ xfs_update_prealloc_flags(
+ ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+- if (flags & XFS_PREALLOC_SYNC)
+- xfs_trans_set_sync(tp);
+ return xfs_trans_commit(tp);
+ }
+
+@@ -852,7 +850,6 @@ xfs_file_fallocate(
+ struct inode *inode = file_inode(file);
+ struct xfs_inode *ip = XFS_I(inode);
+ long error;
+- enum xfs_prealloc_flags flags = 0;
+ uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
+ loff_t new_size = 0;
+ bool do_file_insert = false;
+@@ -897,6 +894,10 @@ xfs_file_fallocate(
+ goto out_unlock;
+ }
+
++ error = file_modified(file);
++ if (error)
++ goto out_unlock;
++
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+@@ -946,8 +947,6 @@ xfs_file_fallocate(
+ }
+ do_file_insert = true;
+ } else {
+- flags |= XFS_PREALLOC_SET;
+-
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+@@ -1000,13 +999,6 @@ xfs_file_fallocate(
+ }
+ }
+
+- if (file->f_flags & O_DSYNC)
+- flags |= XFS_PREALLOC_SYNC;
+-
+- error = xfs_update_prealloc_flags(ip, flags);
+- if (error)
+- goto out_unlock;
+-
+ /* Change file size if needed */
+ if (new_size) {
+ struct iattr iattr;
+@@ -1024,8 +1016,14 @@ xfs_file_fallocate(
+ * leave shifted extents past EOF and hence losing access to
+ * the data that is contained within them.
+ */
+- if (do_file_insert)
++ if (do_file_insert) {
+ error = xfs_insert_file_space(ip, offset, len);
++ if (error)
++ goto out_unlock;
++ }
++
++ if (file->f_flags & O_DSYNC)
++ error = xfs_log_force_inode(ip);
+
+ out_unlock:
+ xfs_iunlock(ip, iolock);
+diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
+index 6a3026e78a9bb..69fef29df4284 100644
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -595,37 +595,6 @@ xfs_vn_getattr(
+ return 0;
+ }
+
+-static void
+-xfs_setattr_mode(
+- struct xfs_inode *ip,
+- struct iattr *iattr)
+-{
+- struct inode *inode = VFS_I(ip);
+- umode_t mode = iattr->ia_mode;
+-
+- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+-
+- inode->i_mode &= S_IFMT;
+- inode->i_mode |= mode & ~S_IFMT;
+-}
+-
+-void
+-xfs_setattr_time(
+- struct xfs_inode *ip,
+- struct iattr *iattr)
+-{
+- struct inode *inode = VFS_I(ip);
+-
+- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+-
+- if (iattr->ia_valid & ATTR_ATIME)
+- inode->i_atime = iattr->ia_atime;
+- if (iattr->ia_valid & ATTR_CTIME)
+- inode->i_ctime = iattr->ia_ctime;
+- if (iattr->ia_valid & ATTR_MTIME)
+- inode->i_mtime = iattr->ia_mtime;
+-}
+-
+ static int
+ xfs_vn_change_ok(
+ struct dentry *dentry,
+@@ -740,16 +709,6 @@ xfs_setattr_nonsize(
+ goto out_cancel;
+ }
+
+- /*
+- * CAP_FSETID overrides the following restrictions:
+- *
+- * The set-user-ID and set-group-ID bits of a file will be
+- * cleared upon successful return from chown()
+- */
+- if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
+- !capable(CAP_FSETID))
+- inode->i_mode &= ~(S_ISUID|S_ISGID);
+-
+ /*
+ * Change the ownerships and register quota modifications
+ * in the transaction.
+@@ -761,7 +720,6 @@ xfs_setattr_nonsize(
+ olddquot1 = xfs_qm_vop_chown(tp, ip,
+ &ip->i_udquot, udqp);
+ }
+- inode->i_uid = uid;
+ }
+ if (!gid_eq(igid, gid)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+@@ -772,15 +730,10 @@ xfs_setattr_nonsize(
+ olddquot2 = xfs_qm_vop_chown(tp, ip,
+ &ip->i_gdquot, gdqp);
+ }
+- inode->i_gid = gid;
+ }
+ }
+
+- if (mask & ATTR_MODE)
+- xfs_setattr_mode(ip, iattr);
+- if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+- xfs_setattr_time(ip, iattr);
+-
++ setattr_copy(inode, iattr);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_INC(mp, xs_ig_attrchg);
+@@ -1025,11 +978,8 @@ xfs_setattr_size(
+ xfs_inode_clear_eofblocks_tag(ip);
+ }
+
+- if (iattr->ia_valid & ATTR_MODE)
+- xfs_setattr_mode(ip, iattr);
+- if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+- xfs_setattr_time(ip, iattr);
+-
++ ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
++ setattr_copy(inode, iattr);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_INC(mp, xs_ig_attrchg);
+diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
+index 4d24ff309f593..dd1bd0332f8e3 100644
+--- a/fs/xfs/xfs_iops.h
++++ b/fs/xfs/xfs_iops.h
+@@ -18,7 +18,6 @@ extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
+ */
+ #define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */
+
+-extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
+ extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
+ int flags);
+ extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap);
+diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
+index a2a5a0fd92334..402cf828cc919 100644
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -126,7 +126,6 @@ __xfs_free_perag(
+ {
+ struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+
+- ASSERT(atomic_read(&pag->pag_ref) == 0);
+ kmem_free(pag);
+ }
+
+@@ -145,7 +144,7 @@ xfs_free_perag(
+ pag = radix_tree_delete(&mp->m_perag_tree, agno);
+ spin_unlock(&mp->m_perag_lock);
+ ASSERT(pag);
+- ASSERT(atomic_read(&pag->pag_ref) == 0);
++ XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
+ xfs_iunlink_destroy(pag);
+ xfs_buf_hash_destroy(pag);
+ call_rcu(&pag->rcu_head, __xfs_free_perag);
+diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
+index f3082a957d5e1..053b99929f835 100644
+--- a/fs/xfs/xfs_pnfs.c
++++ b/fs/xfs/xfs_pnfs.c
+@@ -164,10 +164,12 @@ xfs_fs_map_blocks(
+ * that the blocks allocated and handed out to the client are
+ * guaranteed to be present even after a server crash.
+ */
+- error = xfs_update_prealloc_flags(ip,
+- XFS_PREALLOC_SET | XFS_PREALLOC_SYNC);
++ error = xfs_update_prealloc_flags(ip, XFS_PREALLOC_SET);
++ if (!error)
++ error = xfs_log_force_inode(ip);
+ if (error)
+ goto out_unlock;
++
+ } else {
+ xfs_iunlock(ip, lock_flags);
+ }
+@@ -283,7 +285,8 @@ xfs_fs_commit_blocks(
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+- xfs_setattr_time(ip, iattr);
++ ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
++ setattr_copy(inode, iattr);
+ if (update_isize) {
+ i_size_write(inode, iattr->ia_size);
+ ip->i_d.di_size = iattr->ia_size;
+diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
+index 64e5da33733b9..3c17e0c0f8169 100644
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -1318,8 +1318,15 @@ xfs_qm_quotacheck(
+
+ error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
+ NULL);
+- if (error)
++ if (error) {
++ /*
++ * The inode walk may have partially populated the dquot
++ * caches. We must purge them before disabling quota and
++ * tearing down the quotainfo, or else the dquots will leak.
++ */
++ xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+ goto error_return;
++ }
+
+ /*
+ * We've made all the changes that we need to make incore. Flush them
+diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
+index 2195daa289d27..055486e35e68f 100644
+--- a/include/drm/drm_bridge.h
++++ b/include/drm/drm_bridge.h
+@@ -427,11 +427,11 @@ struct drm_bridge_funcs {
+ *
+ * The returned array must be allocated with kmalloc() and will be
+ * freed by the caller. If the allocation fails, NULL should be
+- * returned. num_output_fmts must be set to the returned array size.
++ * returned. num_input_fmts must be set to the returned array size.
+ * Formats listed in the returned array should be listed in decreasing
+ * preference order (the core will try all formats until it finds one
+ * that works). When the format is not supported NULL should be
+- * returned and num_output_fmts should be set to 0.
++ * returned and num_input_fmts should be set to 0.
+ *
+ * This method is called on all elements of the bridge chain as part of
+ * the bus format negotiation process that happens in
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 74e19bccbf738..8ce9e5c61ede8 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1768,6 +1768,7 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
+ extern void inode_init_owner(struct inode *inode, const struct inode *dir,
+ umode_t mode);
+ extern bool may_open_dev(const struct path *path);
++umode_t mode_strip_sgid(const struct inode *dir, umode_t mode);
+
+ /*
+ * This is the "filldir" function type, used by readdir() to let
+@@ -2959,7 +2960,7 @@ extern void __destroy_inode(struct inode *);
+ extern struct inode *new_inode_pseudo(struct super_block *sb);
+ extern struct inode *new_inode(struct super_block *sb);
+ extern void free_inode_nonrcu(struct inode *inode);
+-extern int should_remove_suid(struct dentry *);
++extern int setattr_should_drop_suidgid(struct inode *);
+ extern int file_remove_privs(struct file *);
+
+ extern void __insert_inode_hash(struct inode *, unsigned long hashval);
+@@ -3407,7 +3408,7 @@ int __init get_filesystem_list(char *buf);
+
+ static inline bool is_sxid(umode_t mode)
+ {
+- return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
++ return mode & (S_ISUID | S_ISGID);
+ }
+
+ static inline int check_sticky(struct inode *dir, struct inode *inode)
+diff --git a/include/linux/hid.h b/include/linux/hid.h
+index 2ba33d708942c..256f34f49167c 100644
+--- a/include/linux/hid.h
++++ b/include/linux/hid.h
+@@ -798,6 +798,7 @@ struct hid_driver {
+ * @raw_request: send raw report request to device (e.g. feature report)
+ * @output_report: send output report to device
+ * @idle: send idle request to device
++ * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE)
+ */
+ struct hid_ll_driver {
+ int (*start)(struct hid_device *hdev);
+@@ -822,6 +823,8 @@ struct hid_ll_driver {
+ int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len);
+
+ int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype);
++
++ unsigned int max_buffer_size;
+ };
+
+ extern struct hid_ll_driver i2c_hid_ll_driver;
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index b478a16ef284d..9ef63bc14b002 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -270,9 +270,11 @@ struct hh_cache {
+ * relationship HH alignment <= LL alignment.
+ */
+ #define LL_RESERVED_SPACE(dev) \
+- ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
++ ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom)) \
++ & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+ #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
+- ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
++ ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom) + (extra)) \
++ & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+
+ struct header_ops {
+ int (*create) (struct sk_buff *skb, struct net_device *dev,
+diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
+index c255273b02810..37ad81058d6ae 100644
+--- a/include/linux/sh_intc.h
++++ b/include/linux/sh_intc.h
+@@ -97,7 +97,10 @@ struct intc_hw_desc {
+ unsigned int nr_subgroups;
+ };
+
+-#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a)
++#define _INTC_SIZEOF_OR_ZERO(a) (_Generic(a, \
++ typeof(NULL): 0, \
++ default: sizeof(a)))
++#define _INTC_ARRAY(a) a, _INTC_SIZEOF_OR_ZERO(a)/sizeof(*a)
+
+ #define INTC_HW_DESC(vectors, groups, mask_regs, \
+ prio_regs, sense_regs, ack_regs) \
+diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
+index e4c5df71f0e74..4e1356c35fe62 100644
+--- a/include/linux/tracepoint.h
++++ b/include/linux/tracepoint.h
+@@ -234,12 +234,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ *
+- * When lockdep is enabled, we make sure to always do the RCU portions of
+- * the tracepoint code, regardless of whether tracing is on. However,
+- * don't check if the condition is false, due to interaction with idle
+- * instrumentation. This lets us find RCU issues triggered with tracepoints
+- * even when this tracepoint is off. This code has no purpose other than
+- * poking RCU a bit.
++ * When lockdep is enabled, we make sure to always test if RCU is
++ * "watching" regardless if the tracepoint is enabled or not. Tracepoints
++ * require RCU to be active, and it should always warn at the tracepoint
++ * site if it is not watching, as it will need to be active when the
++ * tracepoint is enabled.
+ */
+ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
+ extern int __traceiter_##name(data_proto); \
+@@ -253,9 +252,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+ TP_ARGS(data_args), \
+ TP_CONDITION(cond), 0); \
+ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
+- rcu_read_lock_sched_notrace(); \
+- rcu_dereference_sched(__tracepoint_##name.funcs);\
+- rcu_read_unlock_sched_notrace(); \
++ WARN_ON_ONCE(!rcu_is_watching()); \
+ } \
+ } \
+ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 445afda927f47..fd799567fc23a 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -5792,10 +5792,10 @@ static int io_arm_poll_handler(struct io_kiocb *req)
+ }
+ } else {
+ apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
++ if (unlikely(!apoll))
++ return IO_APOLL_ABORTED;
+ apoll->poll.retries = APOLL_MAX_RETRY;
+ }
+- if (unlikely(!apoll))
+- return IO_APOLL_ABORTED;
+ apoll->double_poll = NULL;
+ req->apoll = apoll;
+ req->flags |= REQ_F_POLLED;
+diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
+index d97c189695cbb..67829b6e07bdc 100644
+--- a/kernel/trace/ftrace.c
++++ b/kernel/trace/ftrace.c
+@@ -1538,7 +1538,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
+ key.flags = end; /* overload flags, as it is unsigned long */
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+- if (end < pg->records[0].ip ||
++ if (pg->index == 0 ||
++ end < pg->records[0].ip ||
+ start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
+ continue;
+ rec = bsearch(&key, pg->records, pg->index,
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 8637eab2986ee..ce45bdd9077db 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -4705,6 +4705,8 @@ loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
+ static const struct file_operations tracing_fops = {
+ .open = tracing_open,
+ .read = seq_read,
++ .read_iter = seq_read_iter,
++ .splice_read = generic_file_splice_read,
+ .write = tracing_write_stub,
+ .llseek = tracing_lseek,
+ .release = tracing_release,
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index ccc99cd23f3c4..9ed65191888ef 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -1087,6 +1087,9 @@ static const char *hist_field_name(struct hist_field *field,
+ {
+ const char *field_name = "";
+
++ if (WARN_ON_ONCE(!field))
++ return field_name;
++
+ if (level > 1)
+ return field_name;
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 9b15760e0541a..e4c690c21fc9c 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1994,7 +1994,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ {
+ struct mm_struct *mm = vma->vm_mm;
+ pgtable_t pgtable;
+- pmd_t _pmd;
++ pmd_t _pmd, old_pmd;
+ int i;
+
+ /*
+@@ -2005,7 +2005,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ *
+ * See Documentation/vm/mmu_notifier.rst
+ */
+- pmdp_huge_clear_flush(vma, haddr, pmd);
++ old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
+
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ pmd_populate(mm, &_pmd, pgtable);
+@@ -2014,6 +2014,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ pte_t *pte, entry;
+ entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
+ entry = pte_mkspecial(entry);
++ if (pmd_uffd_wp(old_pmd))
++ entry = pte_mkuffd_wp(entry);
+ pte = pte_offset_map(&_pmd, haddr);
+ VM_BUG_ON(!pte_none(*pte));
+ set_pte_at(mm, haddr, pte, entry);
+diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
+index 5f786ef662ead..41f890bf9d4c4 100644
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -573,6 +573,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
+ cfg->fc_scope = RT_SCOPE_UNIVERSE;
+ }
+
++ if (!cfg->fc_table)
++ cfg->fc_table = RT_TABLE_MAIN;
++
+ if (cmd == SIOCDELRT)
+ return 0;
+
+diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
+index be75b409445c2..99f70b990eb13 100644
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -613,10 +613,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ }
+
+ headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+- if (headroom > dev->needed_headroom)
+- dev->needed_headroom = headroom;
++ if (headroom > READ_ONCE(dev->needed_headroom))
++ WRITE_ONCE(dev->needed_headroom, headroom);
+
+- if (skb_cow_head(skb, dev->needed_headroom)) {
++ if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ ip_rt_put(rt);
+ goto tx_dropped;
+ }
+@@ -797,10 +797,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+
+ max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
+- if (max_headroom > dev->needed_headroom)
+- dev->needed_headroom = max_headroom;
++ if (max_headroom > READ_ONCE(dev->needed_headroom))
++ WRITE_ONCE(dev->needed_headroom, max_headroom);
+
+- if (skb_cow_head(skb, dev->needed_headroom)) {
++ if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ ip_rt_put(rt);
+ dev->stats.tx_dropped++;
+ kfree_skb(skb);
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index eefd032bc6dbd..e4ad274ec7a30 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3609,7 +3609,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
+ th->window = htons(min(req->rsk_rcv_wnd, 65535U));
+ tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+ th->doff = (tcp_header_size >> 2);
+- __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
++ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+
+ #ifdef CONFIG_TCP_MD5SIG
+ /* Okay, we have all we need - do the md5 hash if needed */
+diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
+index 0d4cab94c5dd2..a03a322e0cc1c 100644
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1267,8 +1267,8 @@ route_lookup:
+ */
+ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
+ + dst->header_len + t->hlen;
+- if (max_headroom > dev->needed_headroom)
+- dev->needed_headroom = max_headroom;
++ if (max_headroom > READ_ONCE(dev->needed_headroom))
++ WRITE_ONCE(dev->needed_headroom, max_headroom);
+
+ err = ip6_tnl_encap(skb, t, &proto, fl6);
+ if (err)
+diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
+index 349c6ac3313f7..6f84978a77265 100644
+--- a/net/iucv/iucv.c
++++ b/net/iucv/iucv.c
+@@ -83,7 +83,7 @@ struct iucv_irq_data {
+ u16 ippathid;
+ u8 ipflags1;
+ u8 iptype;
+- u32 res2[8];
++ u32 res2[9];
+ };
+
+ struct iucv_irq_list {
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 3b154ad4945c4..607519246bf28 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -275,7 +275,6 @@ void mptcp_subflow_reset(struct sock *ssk)
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+
+- tcp_set_state(ssk, TCP_CLOSE);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ tcp_done(ssk);
+ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
+diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
+index 9953e80537536..1818dbf089cad 100644
+--- a/net/netfilter/nft_masq.c
++++ b/net/netfilter/nft_masq.c
+@@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+ {
+- u32 plen = sizeof_field(struct nf_nat_range, min_addr.all);
++ u32 plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ struct nft_masq *priv = nft_expr_priv(expr);
+ int err;
+
+diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
+index db8f9116eeb43..cd4eb4996aff3 100644
+--- a/net/netfilter/nft_nat.c
++++ b/net/netfilter/nft_nat.c
+@@ -226,7 +226,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ priv->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+- plen = sizeof_field(struct nf_nat_range, min_addr.all);
++ plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ if (tb[NFTA_NAT_REG_PROTO_MIN]) {
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
+diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
+index ba09890dddb50..e64f531d66cfc 100644
+--- a/net/netfilter/nft_redir.c
++++ b/net/netfilter/nft_redir.c
+@@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx,
+ unsigned int plen;
+ int err;
+
+- plen = sizeof_field(struct nf_nat_range, min_addr.all);
++ plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
+ err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
+@@ -232,7 +232,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = {
+ .name = "redir",
+ .ops = &nft_redir_inet_ops,
+ .policy = nft_redir_policy,
+- .maxattr = NFTA_MASQ_MAX,
++ .maxattr = NFTA_REDIR_MAX,
+ .owner = THIS_MODULE,
+ };
+
+diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
+index 94503f36b9a61..9125d28d9ff5d 100644
+--- a/net/smc/smc_cdc.c
++++ b/net/smc/smc_cdc.c
+@@ -104,6 +104,9 @@ int smc_cdc_msg_send(struct smc_connection *conn,
+ union smc_host_cursor cfed;
+ int rc;
+
++ if (unlikely(!READ_ONCE(conn->sndbuf_desc)))
++ return -ENOBUFS;
++
+ smc_cdc_add_pending_send(conn, pend);
+
+ conn->tx_cdc_seq++;
+diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
+index bf485a2017a4e..e84241ff4ac4f 100644
+--- a/net/smc/smc_core.c
++++ b/net/smc/smc_core.c
+@@ -912,7 +912,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
+ if (lgr->terminating)
+ return; /* lgr already terminating */
+ /* cancel free_work sync, will terminate when lgr->freeing is set */
+- cancel_delayed_work_sync(&lgr->free_work);
++ cancel_delayed_work(&lgr->free_work);
+ lgr->terminating = 1;
+
+ /* kill remaining link group connections */
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index fdbd56ed4bd52..ba73014805a4f 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -2611,9 +2611,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
+ if (inner_mode == NULL)
+ goto error;
+
+- if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
+- goto error;
+-
+ x->inner_mode = *inner_mode;
+
+ if (x->props.family == AF_INET)
+diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
+index 2a5ba9dca6b08..f96e70c85f84a 100644
+--- a/sound/hda/intel-dsp-config.c
++++ b/sound/hda/intel-dsp-config.c
+@@ -359,6 +359,15 @@ static const struct config_entry config_table[] = {
+ },
+ #endif
+
++/* Meteor Lake */
++#if IS_ENABLED(CONFIG_SND_SOC_SOF_METEORLAKE)
++ /* Meteorlake-P */
++ {
++ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
++ .device = 0x7e28,
++ },
++#endif
++
+ };
+
+ static const struct config_entry *snd_intel_dsp_find_config
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index 494bfd2135a9e..de1fe604905f3 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -365,14 +365,15 @@ enum {
+ #define needs_eld_notify_link(chip) false
+ #endif
+
+-#define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \
++#define CONTROLLER_IN_GPU(pci) (((pci)->vendor == 0x8086) && \
++ (((pci)->device == 0x0a0c) || \
+ ((pci)->device == 0x0c0c) || \
+ ((pci)->device == 0x0d0c) || \
+ ((pci)->device == 0x160c) || \
+ ((pci)->device == 0x490d) || \
+ ((pci)->device == 0x4f90) || \
+ ((pci)->device == 0x4f91) || \
+- ((pci)->device == 0x4f92))
++ ((pci)->device == 0x4f92)))
+
+ #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index f2ef75c8de427..2cf6600c9ca83 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -9091,6 +9091,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP),
+ SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
+ SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP),
++ SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP),
+ SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
+diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
+index 834066d465fc1..f0fbd7367f4f6 100755
+--- a/tools/testing/selftests/net/devlink_port_split.py
++++ b/tools/testing/selftests/net/devlink_port_split.py
+@@ -57,6 +57,8 @@ class devlink_ports(object):
+ assert stderr == ""
+ ports = json.loads(stdout)['port']
+
++ validate_devlink_output(ports, 'flavour')
++
+ for port in ports:
+ if dev in port:
+ if ports[port]['flavour'] == 'physical':
+@@ -218,6 +220,27 @@ def split_splittable_port(port, k, lanes, dev):
+ unsplit(port.bus_info)
+
+
++def validate_devlink_output(devlink_data, target_property=None):
++ """
++ Determine if test should be skipped by checking:
++ 1. devlink_data contains values
++ 2. The target_property exist in devlink_data
++ """
++ skip_reason = None
++ if any(devlink_data.values()):
++ if target_property:
++ skip_reason = "{} not found in devlink output, test skipped".format(target_property)
++ for key in devlink_data:
++ if target_property in devlink_data[key]:
++ skip_reason = None
++ else:
++ skip_reason = 'devlink output is empty, test skipped'
++
++ if skip_reason:
++ print(skip_reason)
++ sys.exit(KSFT_SKIP)
++
++
+ def make_parser():
+ parser = argparse.ArgumentParser(description='A test for port splitting.')
+ parser.add_argument('--dev',
+@@ -238,6 +261,7 @@ def main(cmdline=None):
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
++ validate_devlink_output(json.loads(stdout))
+ devs = json.loads(stdout)['dev']
+ dev = list(devs.keys())[0]
+
+@@ -249,6 +273,7 @@ def main(cmdline=None):
+
+ ports = devlink_ports(dev)
+
++ found_max_lanes = False
+ for port in ports.if_names:
+ max_lanes = get_max_lanes(port.name)
+
+@@ -271,6 +296,11 @@ def main(cmdline=None):
+ split_splittable_port(port, lane, max_lanes, dev)
+
+ lane //= 2
++ found_max_lanes = True
++
++ if not found_max_lanes:
++ print(f"Test not started, no port of device {dev} reports max_lanes")
++ sys.exit(KSFT_SKIP)
+
+
+ if __name__ == "__main__":