2 files changed, 8756 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index f3c2cbaf..50964ce7 100644
--- a/0000_README
+++ b/0000_README
@@ -743,6 +743,10 @@ Patch:  1174_linux-5.10.175.patch
 From:   https://www.kernel.org
 Desc:   Linux 5.10.175
 
+Patch:  1175_linux-5.10.176.patch
+From:   https://www.kernel.org
+Desc:   Linux 5.10.176
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.
diff --git a/1175_linux-5.10.176.patch b/1175_linux-5.10.176.patch
new file mode 100644
index 00000000..8c46ce2a
--- /dev/null
+++ b/1175_linux-5.10.176.patch
@@ -0,0 +1,8752 @@
+diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
+index ca52c82e5bb54..f7b69a0e71e1c 100644
+--- a/Documentation/filesystems/vfs.rst
++++ b/Documentation/filesystems/vfs.rst
+@@ -1188,7 +1188,7 @@ defined:
+ 	return
+ 	-ECHILD and it will be called again in ref-walk mode.
+ 
+-``_weak_revalidate``
++``d_weak_revalidate``
+ 	called when the VFS needs to revalidate a "jumped" dentry.  This
+ 	is called when a path-walk ends at dentry that was not acquired
+ 	by doing a lookup in the parent directory.  This includes "/",
+diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
+index 87cf5c010d5dd..ed2e45f9b7627 100644
+--- a/Documentation/trace/ftrace.rst
++++ b/Documentation/trace/ftrace.rst
+@@ -2923,7 +2923,7 @@ Produces::
+               bash-1994  [000] ....  4342.324898: ima_get_action <-process_measurement
+               bash-1994  [000] ....  4342.324898: ima_match_policy <-ima_get_action
+               bash-1994  [000] ....  4342.324899: do_truncate <-do_last
+-              bash-1994  [000] ....  4342.324899: should_remove_suid <-do_truncate
++              bash-1994  [000] ....  4342.324899: setattr_should_drop_suidgid <-do_truncate
+               bash-1994  [000] ....  4342.324899: notify_change <-do_truncate
+               bash-1994  [000] ....  4342.324900: current_fs_time <-notify_change
+               bash-1994  [000] ....  4342.324900: current_kernel_time <-current_fs_time
+diff --git a/Makefile b/Makefile
+index e6b09052f222b..71caf59383615 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 10
+-SUBLEVEL = 175
++SUBLEVEL = 176
+ EXTRAVERSION =
+ NAME = Dare mighty things
+ 
+diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
+index 0b4965573656f..88bacf4999c47 100644
+--- a/arch/s390/boot/ipl_report.c
++++ b/arch/s390/boot/ipl_report.c
+@@ -57,11 +57,19 @@ repeat:
+ 	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+ 	    intersects(INITRD_START, INITRD_SIZE, safe_addr, size))
+ 		safe_addr = INITRD_START + INITRD_SIZE;
++	if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) {
++		safe_addr = (unsigned long)comps + comps->len;
++		goto repeat;
++	}
+ 	for_each_rb_entry(comp, comps)
+ 		if (intersects(safe_addr, size, comp->addr, comp->len)) {
+ 			safe_addr = comp->addr + comp->len;
+ 			goto repeat;
+ 		}
++	if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) {
++		safe_addr = (unsigned long)certs + certs->len;
++		goto repeat;
++	}
+ 	for_each_rb_entry(cert, certs)
+ 		if (intersects(safe_addr, size, cert->addr, cert->len)) {
+ 			safe_addr = cert->addr + cert->len;
+diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
+index 1906387a0faf4..0b7c81389c50a 100644
+--- a/arch/x86/kernel/cpu/mce/core.c
++++ b/arch/x86/kernel/cpu/mce/core.c
+@@ -2309,6 +2309,7 @@ static void mce_restart(void)
+ {
+ 	mce_timer_delete_all();
+ 	on_each_cpu(mce_cpu_restart, NULL, 1);
++	mce_schedule_work();
+ }
+ 
+ /* Toggle features for corrected errors */
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 91371b01eae0c..c165ddbb672fe 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2998,7 +2998,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ 					struct vmcs12 *vmcs12,
+ 					enum vm_entry_failure_code *entry_failure_code)
+ {
+-	bool ia32e;
++	bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
+ 
+ 	*entry_failure_code = ENTRY_FAIL_DEFAULT;
+ 
+@@ -3024,6 +3024,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ 					   vmcs12->guest_ia32_perf_global_ctrl)))
+ 		return -EINVAL;
+ 
++	if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
++		return -EINVAL;
++
++	if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
++	    CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
++		return -EINVAL;
++
+ 	/*
+ 	 * If the load IA32_EFER VM-entry control is 1, the following checks
+ 	 * are performed on the field for the IA32_EFER MSR:
+@@ -3035,7 +3042,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ 	 */
+ 	if (to_vmx(vcpu)->nested.nested_run_pending &&
+ 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
+-		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
+ 		if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
+ 		    CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
+ 		    CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
+diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
+index 011e042b47ba7..5ec47af786ddb 100644
+--- a/arch/x86/mm/mem_encrypt_identity.c
++++ b/arch/x86/mm/mem_encrypt_identity.c
+@@ -586,7 +586,8 @@ void __init sme_enable(struct boot_params *bp)
+ 	cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+ 				     ((u64)bp->ext_cmd_line_ptr << 32));
+ 
+-	cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
++	if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
++		return;
+ 
+ 	if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
+ 		sme_me_mask = me_mask;
+diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
+index 40c53632512b7..9617688b58b32 100644
+--- a/drivers/block/Kconfig
++++ b/drivers/block/Kconfig
+@@ -16,13 +16,7 @@ menuconfig BLK_DEV
+ 
+ if BLK_DEV
+ 
+-config BLK_DEV_NULL_BLK
+-	tristate "Null test block driver"
+-	select CONFIGFS_FS
+-
+-config BLK_DEV_NULL_BLK_FAULT_INJECTION
+-	bool "Support fault injection for Null test block driver"
+-	depends on BLK_DEV_NULL_BLK && FAULT_INJECTION
++source "drivers/block/null_blk/Kconfig"
+ 
+ config BLK_DEV_FD
+ 	tristate "Normal floppy disk support"
+diff --git a/drivers/block/Makefile b/drivers/block/Makefile
+index e1f63117ee94f..a3170859e01d4 100644
+--- a/drivers/block/Makefile
++++ b/drivers/block/Makefile
+@@ -41,12 +41,7 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
+ obj-$(CONFIG_ZRAM) += zram/
+ obj-$(CONFIG_BLK_DEV_RNBD)	+= rnbd/
+ 
+-obj-$(CONFIG_BLK_DEV_NULL_BLK)	+= null_blk.o
+-null_blk-objs	:= null_blk_main.o
+-ifeq ($(CONFIG_BLK_DEV_ZONED), y)
+-null_blk-$(CONFIG_TRACING) += null_blk_trace.o
+-endif
+-null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o
++obj-$(CONFIG_BLK_DEV_NULL_BLK)	+= null_blk/
+ 
+ skd-y		:= skd_main.o
+ swim_mod-y	:= swim.o swim_asm.o
+diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
+deleted file mode 100644
+index 7de703f28617b..0000000000000
+--- a/drivers/block/null_blk.h
++++ /dev/null
+@@ -1,137 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef __BLK_NULL_BLK_H
+-#define __BLK_NULL_BLK_H
+-
+-#undef pr_fmt
+-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+-
+-#include <linux/blkdev.h>
+-#include <linux/slab.h>
+-#include <linux/blk-mq.h>
+-#include <linux/hrtimer.h>
+-#include <linux/configfs.h>
+-#include <linux/badblocks.h>
+-#include <linux/fault-inject.h>
+-
+-struct nullb_cmd {
+-	struct request *rq;
+-	struct bio *bio;
+-	unsigned int tag;
+-	blk_status_t error;
+-	struct nullb_queue *nq;
+-	struct hrtimer timer;
+-	bool fake_timeout;
+-};
+-
+-struct nullb_queue {
+-	unsigned long *tag_map;
+-	wait_queue_head_t wait;
+-	unsigned int queue_depth;
+-	struct nullb_device *dev;
+-	unsigned int requeue_selection;
+-
+-	struct nullb_cmd *cmds;
+-};
+-
+-struct nullb_device {
+-	struct nullb *nullb;
+-	struct config_item item;
+-	struct radix_tree_root data; /* data stored in the disk */
+-	struct radix_tree_root cache; /* disk cache data */
+-	unsigned long flags; /* device flags */
+-	unsigned int curr_cache;
+-	struct badblocks badblocks;
+-
+-	unsigned int nr_zones;
+-	unsigned int nr_zones_imp_open;
+-	unsigned int nr_zones_exp_open;
+-	unsigned int nr_zones_closed;
+-	struct blk_zone *zones;
+-	sector_t zone_size_sects;
+-	spinlock_t zone_lock;
+-	unsigned long *zone_locks;
+-
+-	unsigned long size; /* device size in MB */
+-	unsigned long completion_nsec; /* time in ns to complete a request */
+-	unsigned long cache_size; /* disk cache size in MB */
+-	unsigned long zone_size; /* zone size in MB if device is zoned */
+-	unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
+-	unsigned int zone_nr_conv; /* number of conventional zones */
+-	unsigned int zone_max_open; /* max number of open zones */
+-	unsigned int zone_max_active; /* max number of active zones */
+-	unsigned int submit_queues; /* number of submission queues */
+-	unsigned int home_node; /* home node for the device */
+-	unsigned int queue_mode; /* block interface */
+-	unsigned int blocksize; /* block size */
+-	unsigned int irqmode; /* IRQ completion handler */
+-	unsigned int hw_queue_depth; /* queue depth */
+-	unsigned int index; /* index of the disk, only valid with a disk */
+-	unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
+-	bool blocking; /* blocking blk-mq device */
+-	bool use_per_node_hctx; /* use per-node allocation for hardware context */
+-	bool power; /* power on/off the device */
+-	bool memory_backed; /* if data is stored in memory */
+-	bool discard; /* if support discard */
+-	bool zoned; /* if device is zoned */
+-};
+-
+-struct nullb {
+-	struct nullb_device *dev;
+-	struct list_head list;
+-	unsigned int index;
+-	struct request_queue *q;
+-	struct gendisk *disk;
+-	struct blk_mq_tag_set *tag_set;
+-	struct blk_mq_tag_set __tag_set;
+-	unsigned int queue_depth;
+-	atomic_long_t cur_bytes;
+-	struct hrtimer bw_timer;
+-	unsigned long cache_flush_pos;
+-	spinlock_t lock;
+-
+-	struct nullb_queue *queues;
+-	unsigned int nr_queues;
+-	char disk_name[DISK_NAME_LEN];
+-};
+-
+-blk_status_t null_process_cmd(struct nullb_cmd *cmd,
+-			      enum req_opf op, sector_t sector,
+-			      unsigned int nr_sectors);
+-
+-#ifdef CONFIG_BLK_DEV_ZONED
+-int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
+-int null_register_zoned_dev(struct nullb *nullb);
+-void null_free_zoned_dev(struct nullb_device *dev);
+-int null_report_zones(struct gendisk *disk, sector_t sector,
+-		      unsigned int nr_zones, report_zones_cb cb, void *data);
+-blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
+-				    enum req_opf op, sector_t sector,
+-				    sector_t nr_sectors);
+-size_t null_zone_valid_read_len(struct nullb *nullb,
+-				sector_t sector, unsigned int len);
+-#else
+-static inline int null_init_zoned_dev(struct nullb_device *dev,
+-				      struct request_queue *q)
+-{
+-	pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
+-	return -EINVAL;
+-}
+-static inline int null_register_zoned_dev(struct nullb *nullb)
+-{
+-	return -ENODEV;
+-}
+-static inline void null_free_zoned_dev(struct nullb_device *dev) {}
+-static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
+-			enum req_opf op, sector_t sector, sector_t nr_sectors)
+-{
+-	return BLK_STS_NOTSUPP;
+-}
+-static inline size_t null_zone_valid_read_len(struct nullb *nullb,
+-					      sector_t sector,
+-					      unsigned int len)
+-{
+-	return len;
+-}
+-#define null_report_zones	NULL
+-#endif /* CONFIG_BLK_DEV_ZONED */
+-#endif /* __NULL_BLK_H */
+diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig
+new file mode 100644
+index 0000000000000..6bf1f8ca20a24
+--- /dev/null
++++ b/drivers/block/null_blk/Kconfig
+@@ -0,0 +1,12 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Null block device driver configuration
++#
++
++config BLK_DEV_NULL_BLK
++	tristate "Null test block driver"
++	select CONFIGFS_FS
++
++config BLK_DEV_NULL_BLK_FAULT_INJECTION
++	bool "Support fault injection for Null test block driver"
++	depends on BLK_DEV_NULL_BLK && FAULT_INJECTION
+diff --git a/drivers/block/null_blk/Makefile b/drivers/block/null_blk/Makefile
+new file mode 100644
+index 0000000000000..84c36e512ab89
+--- /dev/null
++++ b/drivers/block/null_blk/Makefile
+@@ -0,0 +1,11 @@
++# SPDX-License-Identifier: GPL-2.0
++
++# needed for trace events
++ccflags-y			+= -I$(src)
++
++obj-$(CONFIG_BLK_DEV_NULL_BLK)	+= null_blk.o
++null_blk-objs			:= main.o
++ifeq ($(CONFIG_BLK_DEV_ZONED), y)
++null_blk-$(CONFIG_TRACING) 	+= trace.o
++endif
++null_blk-$(CONFIG_BLK_DEV_ZONED) += zoned.o
+diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
+new file mode 100644
+index 0000000000000..25db095e943b7
+--- /dev/null
++++ b/drivers/block/null_blk/main.c
+@@ -0,0 +1,2036 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
++ * Shaohua Li <shli@fb.com>
++ */
++#include <linux/module.h>
++
++#include <linux/moduleparam.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include "null_blk.h"
++
++#define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
++#define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
++#define SECTOR_MASK		(PAGE_SECTORS - 1)
++
++#define FREE_BATCH		16
++
++#define TICKS_PER_SEC		50ULL
++#define TIMER_INTERVAL		(NSEC_PER_SEC / TICKS_PER_SEC)
++
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++static DECLARE_FAULT_ATTR(null_timeout_attr);
++static DECLARE_FAULT_ATTR(null_requeue_attr);
++static DECLARE_FAULT_ATTR(null_init_hctx_attr);
++#endif
++
++static inline u64 mb_per_tick(int mbps)
++{
++	return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
++}
++
++/*
++ * Status flags for nullb_device.
++ *
++ * CONFIGURED:	Device has been configured and turned on. Cannot reconfigure.
++ * UP:		Device is currently on and visible in userspace.
++ * THROTTLED:	Device is being throttled.
++ * CACHE:	Device is using a write-back cache.
++ */
++enum nullb_device_flags {
++	NULLB_DEV_FL_CONFIGURED	= 0,
++	NULLB_DEV_FL_UP		= 1,
++	NULLB_DEV_FL_THROTTLED	= 2,
++	NULLB_DEV_FL_CACHE	= 3,
++};
++
++#define MAP_SZ		((PAGE_SIZE >> SECTOR_SHIFT) + 2)
++/*
++ * nullb_page is a page in memory for nullb devices.
++ *
++ * @page:	The page holding the data.
++ * @bitmap:	The bitmap represents which sector in the page has data.
++ *		Each bit represents one block size. For example, sector 8
++ *		will use the 7th bit
++ * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
++ * page is being flushing to storage. FREE means the cache page is freed and
++ * should be skipped from flushing to storage. Please see
++ * null_make_cache_space
++ */
++struct nullb_page {
++	struct page *page;
++	DECLARE_BITMAP(bitmap, MAP_SZ);
++};
++#define NULLB_PAGE_LOCK (MAP_SZ - 1)
++#define NULLB_PAGE_FREE (MAP_SZ - 2)
++
++static LIST_HEAD(nullb_list);
++static struct mutex lock;
++static int null_major;
++static DEFINE_IDA(nullb_indexes);
++static struct blk_mq_tag_set tag_set;
++
++enum {
++	NULL_IRQ_NONE		= 0,
++	NULL_IRQ_SOFTIRQ	= 1,
++	NULL_IRQ_TIMER		= 2,
++};
++
++enum {
++	NULL_Q_BIO		= 0,
++	NULL_Q_RQ		= 1,
++	NULL_Q_MQ		= 2,
++};
++
++static int g_no_sched;
++module_param_named(no_sched, g_no_sched, int, 0444);
++MODULE_PARM_DESC(no_sched, "No io scheduler");
++
++static int g_submit_queues = 1;
++module_param_named(submit_queues, g_submit_queues, int, 0444);
++MODULE_PARM_DESC(submit_queues, "Number of submission queues");
++
++static int g_home_node = NUMA_NO_NODE;
++module_param_named(home_node, g_home_node, int, 0444);
++MODULE_PARM_DESC(home_node, "Home node for the device");
++
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++/*
++ * For more details about fault injection, please refer to
++ * Documentation/fault-injection/fault-injection.rst.
++ */
++static char g_timeout_str[80];
++module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
++MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
++
++static char g_requeue_str[80];
++module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
++MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
++
++static char g_init_hctx_str[80];
++module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
++MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
++#endif
++
++static int g_queue_mode = NULL_Q_MQ;
++
++static int null_param_store_val(const char *str, int *val, int min, int max)
++{
++	int ret, new_val;
++
++	ret = kstrtoint(str, 10, &new_val);
++	if (ret)
++		return -EINVAL;
++
++	if (new_val < min || new_val > max)
++		return -EINVAL;
++
++	*val = new_val;
++	return 0;
++}
++
++static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
++{
++	return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
++}
++
++static const struct kernel_param_ops null_queue_mode_param_ops = {
++	.set	= null_set_queue_mode,
++	.get	= param_get_int,
++};
++
++device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
++MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
++
++static int g_gb = 250;
++module_param_named(gb, g_gb, int, 0444);
++MODULE_PARM_DESC(gb, "Size in GB");
++
++static int g_bs = 512;
++module_param_named(bs, g_bs, int, 0444);
++MODULE_PARM_DESC(bs, "Block size (in bytes)");
++
++static unsigned int nr_devices = 1;
++module_param(nr_devices, uint, 0444);
++MODULE_PARM_DESC(nr_devices, "Number of devices to register");
++
++static bool g_blocking;
++module_param_named(blocking, g_blocking, bool, 0444);
++MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
++
++static bool shared_tags;
++module_param(shared_tags, bool, 0444);
++MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
++
++static bool g_shared_tag_bitmap;
++module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
++MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
++
++static int g_irqmode = NULL_IRQ_SOFTIRQ;
++
++static int null_set_irqmode(const char *str, const struct kernel_param *kp)
++{
++	return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
++					NULL_IRQ_TIMER);
++}
++
++static const struct kernel_param_ops null_irqmode_param_ops = {
++	.set	= null_set_irqmode,
++	.get	= param_get_int,
++};
++
++device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
++MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
++
++static unsigned long g_completion_nsec = 10000;
++module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
++MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
++
++static int g_hw_queue_depth = 64;
++module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
++MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
++
++static bool g_use_per_node_hctx;
++module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
++MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
++
++static bool g_zoned;
++module_param_named(zoned, g_zoned, bool, S_IRUGO);
++MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
++
++static unsigned long g_zone_size = 256;
++module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
++MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
++
++static unsigned long g_zone_capacity;
++module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
++MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
++
++static unsigned int g_zone_nr_conv;
++module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
++MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
++
++static unsigned int g_zone_max_open;
++module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
++MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
++
++static unsigned int g_zone_max_active;
++module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
++MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
++
++static struct nullb_device *null_alloc_dev(void);
++static void null_free_dev(struct nullb_device *dev);
++static void null_del_dev(struct nullb *nullb);
++static int null_add_dev(struct nullb_device *dev);
++static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
++
++static inline struct nullb_device *to_nullb_device(struct config_item *item)
++{
++	return item ? container_of(item, struct nullb_device, item) : NULL;
++}
++
++static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
++{
++	return snprintf(page, PAGE_SIZE, "%u\n", val);
++}
++
++static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
++	char *page)
++{
++	return snprintf(page, PAGE_SIZE, "%lu\n", val);
++}
++
++static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
++{
++	return snprintf(page, PAGE_SIZE, "%u\n", val);
++}
++
++static ssize_t nullb_device_uint_attr_store(unsigned int *val,
++	const char *page, size_t count)
++{
++	unsigned int tmp;
++	int result;
++
++	result = kstrtouint(page, 0, &tmp);
++	if (result < 0)
++		return result;
++
++	*val = tmp;
++	return count;
++}
++
++static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
++	const char *page, size_t count)
++{
++	int result;
++	unsigned long tmp;
++
++	result = kstrtoul(page, 0, &tmp);
++	if (result < 0)
++		return result;
++
++	*val = tmp;
++	return count;
++}
++
++static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
++	size_t count)
++{
++	bool tmp;
++	int result;
++
++	result = kstrtobool(page,  &tmp);
++	if (result < 0)
++		return result;
++
++	*val = tmp;
++	return count;
++}
++
++/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
++#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY)				\
++static ssize_t								\
++nullb_device_##NAME##_show(struct config_item *item, char *page)	\
++{									\
++	return nullb_device_##TYPE##_attr_show(				\
++				to_nullb_device(item)->NAME, page);	\
++}									\
++static ssize_t								\
++nullb_device_##NAME##_store(struct config_item *item, const char *page,	\
++			    size_t count)				\
++{									\
++	int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
++	struct nullb_device *dev = to_nullb_device(item);		\
++	TYPE new_value = 0;						\
++	int ret;							\
++									\
++	ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
++	if (ret < 0)							\
++		return ret;						\
++	if (apply_fn)							\
++		ret = apply_fn(dev, new_value);				\
++	else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) 	\
++		ret = -EBUSY;						\
++	if (ret < 0)							\
++		return ret;						\
++	dev->NAME = new_value;						\
++	return count;							\
++}									\
++CONFIGFS_ATTR(nullb_device_, NAME);
++
++static int nullb_apply_submit_queues(struct nullb_device *dev,
++				     unsigned int submit_queues)
++{
++	struct nullb *nullb = dev->nullb;
++	struct blk_mq_tag_set *set;
++
++	if (!nullb)
++		return 0;
++
++	/*
++	 * Make sure that null_init_hctx() does not access nullb->queues[] past
++	 * the end of that array.
++	 */
++	if (submit_queues > nr_cpu_ids)
++		return -EINVAL;
++	set = nullb->tag_set;
++	blk_mq_update_nr_hw_queues(set, submit_queues);
++	return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
++}
++
++NULLB_DEVICE_ATTR(size, ulong, NULL);
++NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
++NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
++NULLB_DEVICE_ATTR(home_node, uint, NULL);
++NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
++NULLB_DEVICE_ATTR(blocksize, uint, NULL);
++NULLB_DEVICE_ATTR(irqmode, uint, NULL);
++NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
++NULLB_DEVICE_ATTR(index, uint, NULL);
++NULLB_DEVICE_ATTR(blocking, bool, NULL);
++NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
++NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
++NULLB_DEVICE_ATTR(discard, bool, NULL);
++NULLB_DEVICE_ATTR(mbps, uint, NULL);
++NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
++NULLB_DEVICE_ATTR(zoned, bool, NULL);
++NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
++NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
++NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
++NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
++NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
++
++static ssize_t nullb_device_power_show(struct config_item *item, char *page)
++{
++	return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
++}
++
++static ssize_t nullb_device_power_store(struct config_item *item,
++				     const char *page, size_t count)
++{
++	struct nullb_device *dev = to_nullb_device(item);
++	bool newp = false;
++	ssize_t ret;
++
++	ret = nullb_device_bool_attr_store(&newp, page, count);
++	if (ret < 0)
++		return ret;
++
++	if (!dev->power && newp) {
++		if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
++			return count;
++		if (null_add_dev(dev)) {
++			clear_bit(NULLB_DEV_FL_UP, &dev->flags);
++			return -ENOMEM;
++		}
++
++		set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
++		dev->power = newp;
++	} else if (dev->power && !newp) {
++		if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
++			mutex_lock(&lock);
++			dev->power = newp;
++			null_del_dev(dev->nullb);
++			mutex_unlock(&lock);
++		}
++		clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
++	}
++
++	return count;
++}
++
++CONFIGFS_ATTR(nullb_device_, power);
++
++static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
++{
++	struct nullb_device *t_dev = to_nullb_device(item);
++
++	return badblocks_show(&t_dev->badblocks, page, 0);
++}
++
++static ssize_t nullb_device_badblocks_store(struct config_item *item,
++				     const char *page, size_t count)
++{
++	struct nullb_device *t_dev = to_nullb_device(item);
++	char *orig, *buf, *tmp;
++	u64 start, end;
++	int ret;
++
++	orig = kstrndup(page, count, GFP_KERNEL);
++	if (!orig)
++		return -ENOMEM;
++
++	buf = strstrip(orig);
++
++	ret = -EINVAL;
++	if (buf[0] != '+' && buf[0] != '-')
++		goto out;
++	tmp = strchr(&buf[1], '-');
++	if (!tmp)
++		goto out;
++	*tmp = '\0';
++	ret = kstrtoull(buf + 1, 0, &start);
++	if (ret)
++		goto out;
++	ret = kstrtoull(tmp + 1, 0, &end);
++	if (ret)
++		goto out;
++	ret = -EINVAL;
++	if (start > end)
++		goto out;
++	/* enable badblocks */
++	cmpxchg(&t_dev->badblocks.shift, -1, 0);
++	if (buf[0] == '+')
++		ret = badblocks_set(&t_dev->badblocks, start,
++			end - start + 1, 1);
++	else
++		ret = badblocks_clear(&t_dev->badblocks, start,
++			end - start + 1);
++	if (ret == 0)
++		ret = count;
++out:
++	kfree(orig);
++	return ret;
++}
++CONFIGFS_ATTR(nullb_device_, badblocks);
++
++static struct configfs_attribute *nullb_device_attrs[] = {
++	&nullb_device_attr_size,
++	&nullb_device_attr_completion_nsec,
++	&nullb_device_attr_submit_queues,
++	&nullb_device_attr_home_node,
++	&nullb_device_attr_queue_mode,
++	&nullb_device_attr_blocksize,
++	&nullb_device_attr_irqmode,
++	&nullb_device_attr_hw_queue_depth,
++	&nullb_device_attr_index,
++	&nullb_device_attr_blocking,
++	&nullb_device_attr_use_per_node_hctx,
++	&nullb_device_attr_power,
++	&nullb_device_attr_memory_backed,
++	&nullb_device_attr_discard,
++	&nullb_device_attr_mbps,
++	&nullb_device_attr_cache_size,
++	&nullb_device_attr_badblocks,
++	&nullb_device_attr_zoned,
++	&nullb_device_attr_zone_size,
++	&nullb_device_attr_zone_capacity,
++	&nullb_device_attr_zone_nr_conv,
++	&nullb_device_attr_zone_max_open,
++	&nullb_device_attr_zone_max_active,
++	NULL,
++};
++
++static void nullb_device_release(struct config_item *item)
++{
++	struct nullb_device *dev = to_nullb_device(item);
++
++	null_free_device_storage(dev, false);
++	null_free_dev(dev);
++}
++
++static struct configfs_item_operations nullb_device_ops = {
++	.release	= nullb_device_release,
++};
++
++static const struct config_item_type nullb_device_type = {
++	.ct_item_ops	= &nullb_device_ops,
++	.ct_attrs	= nullb_device_attrs,
++	.ct_owner	= THIS_MODULE,
++};
++
++static struct
++config_item *nullb_group_make_item(struct config_group *group, const char *name)
++{
++	struct nullb_device *dev;
++
++	dev = null_alloc_dev();
++	if (!dev)
++		return ERR_PTR(-ENOMEM);
++
++	config_item_init_type_name(&dev->item, name, &nullb_device_type);
++
++	return &dev->item;
++}
++
++static void
++nullb_group_drop_item(struct config_group *group, struct config_item *item)
++{
++	struct nullb_device *dev = to_nullb_device(item);
++
++	if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
++		mutex_lock(&lock);
++		dev->power = false;
++		null_del_dev(dev->nullb);
++		mutex_unlock(&lock);
++	}
++
++	config_item_put(item);
++}
++
++static ssize_t memb_group_features_show(struct config_item *item, char *page)
++{
++	return snprintf(page, PAGE_SIZE,
++			"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n");
++}
++
++CONFIGFS_ATTR_RO(memb_group_, features);
++
++static struct configfs_attribute *nullb_group_attrs[] = {
++	&memb_group_attr_features,
++	NULL,
++};
++
++static struct configfs_group_operations nullb_group_ops = {
++	.make_item	= nullb_group_make_item,
++	.drop_item	= nullb_group_drop_item,
++};
++
++static const struct config_item_type nullb_group_type = {
++	.ct_group_ops	= &nullb_group_ops,
++	.ct_attrs	= nullb_group_attrs,
++	.ct_owner	= THIS_MODULE,
++};
++
++static struct configfs_subsystem nullb_subsys = {
++	.su_group = {
++		.cg_item = {
++			.ci_namebuf = "nullb",
++			.ci_type = &nullb_group_type,
++		},
++	},
++};
++
++static inline int null_cache_active(struct nullb *nullb)
++{
++	return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
++}
++
++static struct nullb_device *null_alloc_dev(void)
++{
++	struct nullb_device *dev;
++
++	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev)
++		return NULL;
++	INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
++	INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
++	if (badblocks_init(&dev->badblocks, 0)) {
++		kfree(dev);
++		return NULL;
++	}
++
++	dev->size = g_gb * 1024;
++	dev->completion_nsec = g_completion_nsec;
++	dev->submit_queues = g_submit_queues;
++	dev->home_node = g_home_node;
++	dev->queue_mode = g_queue_mode;
++	dev->blocksize = g_bs;
++	dev->irqmode = g_irqmode;
++	dev->hw_queue_depth = g_hw_queue_depth;
++	dev->blocking = g_blocking;
++	dev->use_per_node_hctx = g_use_per_node_hctx;
++	dev->zoned = g_zoned;
++	dev->zone_size = g_zone_size;
++	dev->zone_capacity = g_zone_capacity;
++	dev->zone_nr_conv = g_zone_nr_conv;
++	dev->zone_max_open = g_zone_max_open;
++	dev->zone_max_active = g_zone_max_active;
++	return dev;
++}
++
++static void null_free_dev(struct nullb_device *dev)
++{
++	if (!dev)
++		return;
++
++	null_free_zoned_dev(dev);
++	badblocks_exit(&dev->badblocks);
++	kfree(dev);
++}
++
++static void put_tag(struct nullb_queue *nq, unsigned int tag)
++{
++	clear_bit_unlock(tag, nq->tag_map);
++
++	if (waitqueue_active(&nq->wait))
++		wake_up(&nq->wait);
++}
++
++static unsigned int get_tag(struct nullb_queue *nq)
++{
++	unsigned int tag;
++
++	do {
++		tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
++		if (tag >= nq->queue_depth)
++			return -1U;
++	} while (test_and_set_bit_lock(tag, nq->tag_map));
++
++	return tag;
++}
++
++static void free_cmd(struct nullb_cmd *cmd)
++{
++	put_tag(cmd->nq, cmd->tag);
++}
++
++static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
++
++static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
++{
++	struct nullb_cmd *cmd;
++	unsigned int tag;
++
++	tag = get_tag(nq);
++	if (tag != -1U) {
++		cmd = &nq->cmds[tag];
++		cmd->tag = tag;
++		cmd->error = BLK_STS_OK;
++		cmd->nq = nq;
++		if (nq->dev->irqmode == NULL_IRQ_TIMER) {
++			hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
++				     HRTIMER_MODE_REL);
++			cmd->timer.function = null_cmd_timer_expired;
++		}
++		return cmd;
++	}
++
++	return NULL;
++}
++
++static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
++{
++	struct nullb_cmd *cmd;
++	DEFINE_WAIT(wait);
++
++	cmd = __alloc_cmd(nq);
++	if (cmd || !can_wait)
++		return cmd;
++
++	do {
++		prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
++		cmd = __alloc_cmd(nq);
++		if (cmd)
++			break;
++
++		io_schedule();
++	} while (1);
++
++	finish_wait(&nq->wait, &wait);
++	return cmd;
++}
++
++static void end_cmd(struct nullb_cmd *cmd)
++{
++	int queue_mode = cmd->nq->dev->queue_mode;
++
++	switch (queue_mode)  {
++	case NULL_Q_MQ:
++		blk_mq_end_request(cmd->rq, cmd->error);
++		return;
++	case NULL_Q_BIO:
++		cmd->bio->bi_status = cmd->error;
++		bio_endio(cmd->bio);
++		break;
++	}
++
++	free_cmd(cmd);
++}
++
++static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
++{
++	end_cmd(container_of(timer, struct nullb_cmd, timer));
++
++	return HRTIMER_NORESTART;
++}
++
++static void null_cmd_end_timer(struct nullb_cmd *cmd)
++{
++	ktime_t kt = cmd->nq->dev->completion_nsec;
++
++	hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
++}
++
++static void null_complete_rq(struct request *rq)
++{
++	end_cmd(blk_mq_rq_to_pdu(rq));
++}
++
++static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
++{
++	struct nullb_page *t_page;
++
++	t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
++	if (!t_page)
++		goto out;
++
++	t_page->page = alloc_pages(gfp_flags, 0);
++	if (!t_page->page)
++		goto out_freepage;
++
++	memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
++	return t_page;
++out_freepage:
++	kfree(t_page);
++out:
++	return NULL;
++}
++
++static void null_free_page(struct nullb_page *t_page)
++{
++	__set_bit(NULLB_PAGE_FREE, t_page->bitmap);
++	if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
++		return;
++	__free_page(t_page->page);
++	kfree(t_page);
++}
++
++static bool null_page_empty(struct nullb_page *page)
++{
++	int size = MAP_SZ - 2;
++
++	return find_first_bit(page->bitmap, size) == size;
++}
++
++static void null_free_sector(struct nullb *nullb, sector_t sector,
++	bool is_cache)
++{
++	unsigned int sector_bit;
++	u64 idx;
++	struct nullb_page *t_page, *ret;
++	struct radix_tree_root *root;
++
++	root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
++	idx = sector >> PAGE_SECTORS_SHIFT;
++	sector_bit = (sector & SECTOR_MASK);
++
++	t_page = radix_tree_lookup(root, idx);
++	if (t_page) {
++		__clear_bit(sector_bit, t_page->bitmap);
++
++		if (null_page_empty(t_page)) {
++			ret = radix_tree_delete_item(root, idx, t_page);
++			WARN_ON(ret != t_page);
++			null_free_page(ret);
++			if (is_cache)
++				nullb->dev->curr_cache -= PAGE_SIZE;
++		}
++	}
++}
++
++static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
++	struct nullb_page *t_page, bool is_cache)
++{
++	struct radix_tree_root *root;
++
++	root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
++
++	if (radix_tree_insert(root, idx, t_page)) {
++		null_free_page(t_page);
++		t_page = radix_tree_lookup(root, idx);
++		WARN_ON(!t_page || t_page->page->index != idx);
++	} else if (is_cache)
++		nullb->dev->curr_cache += PAGE_SIZE;
++
++	return t_page;
++}
++
++static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
++{
++	unsigned long pos = 0;
++	int nr_pages;
++	struct nullb_page *ret, *t_pages[FREE_BATCH];
++	struct radix_tree_root *root;
++
++	root = is_cache ? &dev->cache : &dev->data;
++
++	do {
++		int i;
++
++		nr_pages = radix_tree_gang_lookup(root,
++				(void **)t_pages, pos, FREE_BATCH);
++
++		for (i = 0; i < nr_pages; i++) {
++			pos = t_pages[i]->page->index;
++			ret = radix_tree_delete_item(root, pos, t_pages[i]);
++			WARN_ON(ret != t_pages[i]);
++			null_free_page(ret);
++		}
++
++		pos++;
++	} while (nr_pages == FREE_BATCH);
++
++	if (is_cache)
++		dev->curr_cache = 0;
++}
++
++static struct nullb_page *__null_lookup_page(struct nullb *nullb,
++	sector_t sector, bool for_write, bool is_cache)
++{
++	unsigned int sector_bit;
++	u64 idx;
++	struct nullb_page *t_page;
++	struct radix_tree_root *root;
++
++	idx = sector >> PAGE_SECTORS_SHIFT;
++	sector_bit = (sector & SECTOR_MASK);
++
++	root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
++	t_page = radix_tree_lookup(root, idx);
++	WARN_ON(t_page && t_page->page->index != idx);
++
++	if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
++		return t_page;
++
++	return NULL;
++}
++
++static struct nullb_page *null_lookup_page(struct nullb *nullb,
++	sector_t sector, bool for_write, bool ignore_cache)
++{
++	struct nullb_page *page = NULL;
++
++	if (!ignore_cache)
++		page = __null_lookup_page(nullb, sector, for_write, true);
++	if (page)
++		return page;
++	return __null_lookup_page(nullb, sector, for_write, false);
++}
++
++static struct nullb_page *null_insert_page(struct nullb *nullb,
++					   sector_t sector, bool ignore_cache)
++	__releases(&nullb->lock)
++	__acquires(&nullb->lock)
++{
++	u64 idx;
++	struct nullb_page *t_page;
++
++	t_page = null_lookup_page(nullb, sector, true, ignore_cache);
++	if (t_page)
++		return t_page;
++
++	spin_unlock_irq(&nullb->lock);
++
++	t_page = null_alloc_page(GFP_NOIO);
++	if (!t_page)
++		goto out_lock;
++
++	if (radix_tree_preload(GFP_NOIO))
++		goto out_freepage;
++
++	spin_lock_irq(&nullb->lock);
++	idx = sector >> PAGE_SECTORS_SHIFT;
++	t_page->page->index = idx;
++	t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
++	radix_tree_preload_end();
++
++	return t_page;
++out_freepage:
++	null_free_page(t_page);
++out_lock:
++	spin_lock_irq(&nullb->lock);
++	return null_lookup_page(nullb, sector, true, ignore_cache);
++}
++
++static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
++{
++	int i;
++	unsigned int offset;
++	u64 idx;
++	struct nullb_page *t_page, *ret;
++	void *dst, *src;
++
++	idx = c_page->page->index;
++
++	t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
++
++	__clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
++	if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
++		null_free_page(c_page);
++		if (t_page && null_page_empty(t_page)) {
++			ret = radix_tree_delete_item(&nullb->dev->data,
++				idx, t_page);
++			null_free_page(t_page);
++		}
++		return 0;
++	}
++
++	if (!t_page)
++		return -ENOMEM;
++
++	src = kmap_atomic(c_page->page);
++	dst = kmap_atomic(t_page->page);
++
++	for (i = 0; i < PAGE_SECTORS;
++			i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
++		if (test_bit(i, c_page->bitmap)) {
++			offset = (i << SECTOR_SHIFT);
++			memcpy(dst + offset, src + offset,
++				nullb->dev->blocksize);
++			__set_bit(i, t_page->bitmap);
++		}
++	}
++
++	kunmap_atomic(dst);
++	kunmap_atomic(src);
++
++	ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
++	null_free_page(ret);
++	nullb->dev->curr_cache -= PAGE_SIZE;
++
++	return 0;
++}
++
++static int null_make_cache_space(struct nullb *nullb, unsigned long n)
++{
++	int i, err, nr_pages;
++	struct nullb_page *c_pages[FREE_BATCH];
++	unsigned long flushed = 0, one_round;
++
++again:
++	if ((nullb->dev->cache_size * 1024 * 1024) >
++	     nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
++		return 0;
++
++	nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
++			(void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
++	/*
++	 * nullb_flush_cache_page could unlock before using the c_pages. To
++	 * avoid race, we don't allow page free
++	 */
++	for (i = 0; i < nr_pages; i++) {
++		nullb->cache_flush_pos = c_pages[i]->page->index;
++		/*
++		 * We found the page which is being flushed to disk by other
++		 * threads
++		 */
++		if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
++			c_pages[i] = NULL;
++		else
++			__set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
++	}
++
++	one_round = 0;
++	for (i = 0; i < nr_pages; i++) {
++		if (c_pages[i] == NULL)
++			continue;
++		err = null_flush_cache_page(nullb, c_pages[i]);
++		if (err)
++			return err;
++		one_round++;
++	}
++	flushed += one_round << PAGE_SHIFT;
++
++	if (n > flushed) {
++		if (nr_pages == 0)
++			nullb->cache_flush_pos = 0;
++		if (one_round == 0) {
++			/* give other threads a chance */
++			spin_unlock_irq(&nullb->lock);
++			spin_lock_irq(&nullb->lock);
++		}
++		goto again;
++	}
++	return 0;
++}
++
++static int copy_to_nullb(struct nullb *nullb, struct page *source,
++	unsigned int off, sector_t sector, size_t n, bool is_fua)
++{
++	size_t temp, count = 0;
++	unsigned int offset;
++	struct nullb_page *t_page;
++	void *dst, *src;
++
++	while (count < n) {
++		temp = min_t(size_t, nullb->dev->blocksize, n - count);
++
++		if (null_cache_active(nullb) && !is_fua)
++			null_make_cache_space(nullb, PAGE_SIZE);
++
++		offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
++		t_page = null_insert_page(nullb, sector,
++			!null_cache_active(nullb) || is_fua);
++		if (!t_page)
++			return -ENOSPC;
++
++		src = kmap_atomic(source);
++		dst = kmap_atomic(t_page->page);
++		memcpy(dst + offset, src + off + count, temp);
++		kunmap_atomic(dst);
++		kunmap_atomic(src);
++
++		__set_bit(sector & SECTOR_MASK, t_page->bitmap);
++
++		if (is_fua)
++			null_free_sector(nullb, sector, true);
++
++		count += temp;
++		sector += temp >> SECTOR_SHIFT;
++	}
++	return 0;
++}
++
++static int copy_from_nullb(struct nullb *nullb, struct page *dest,
++	unsigned int off, sector_t sector, size_t n)
++{
++	size_t temp, count = 0;
++	unsigned int offset;
++	struct nullb_page *t_page;
++	void *dst, *src;
++
++	while (count < n) {
++		temp = min_t(size_t, nullb->dev->blocksize, n - count);
++
++		offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
++		t_page = null_lookup_page(nullb, sector, false,
++			!null_cache_active(nullb));
++
++		dst = kmap_atomic(dest);
++		if (!t_page) {
++			memset(dst + off + count, 0, temp);
++			goto next;
++		}
++		src = kmap_atomic(t_page->page);
++		memcpy(dst + off + count, src + offset, temp);
++		kunmap_atomic(src);
++next:
++		kunmap_atomic(dst);
++
++		count += temp;
++		sector += temp >> SECTOR_SHIFT;
++	}
++	return 0;
++}
++
++static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
++			       unsigned int len, unsigned int off)
++{
++	void *dst;
++
++	dst = kmap_atomic(page);
++	memset(dst + off, 0xFF, len);
++	kunmap_atomic(dst);
++}
++
++static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
++{
++	size_t temp;
++
++	spin_lock_irq(&nullb->lock);
++	while (n > 0) {
++		temp = min_t(size_t, n, nullb->dev->blocksize);
++		null_free_sector(nullb, sector, false);
++		if (null_cache_active(nullb))
++			null_free_sector(nullb, sector, true);
++		sector += temp >> SECTOR_SHIFT;
++		n -= temp;
++	}
++	spin_unlock_irq(&nullb->lock);
++}
++
++static int null_handle_flush(struct nullb *nullb)
++{
++	int err;
++
++	if (!null_cache_active(nullb))
++		return 0;
++
++	spin_lock_irq(&nullb->lock);
++	while (true) {
++		err = null_make_cache_space(nullb,
++			nullb->dev->cache_size * 1024 * 1024);
++		if (err || nullb->dev->curr_cache == 0)
++			break;
++	}
++
++	WARN_ON(!radix_tree_empty(&nullb->dev->cache));
++	spin_unlock_irq(&nullb->lock);
++	return err;
++}
++
++static int null_transfer(struct nullb *nullb, struct page *page,
++	unsigned int len, unsigned int off, bool is_write, sector_t sector,
++	bool is_fua)
++{
++	struct nullb_device *dev = nullb->dev;
++	unsigned int valid_len = len;
++	int err = 0;
++
++	if (!is_write) {
++		if (dev->zoned)
++			valid_len = null_zone_valid_read_len(nullb,
++				sector, len);
++
++		if (valid_len) {
++			err = copy_from_nullb(nullb, page, off,
++				sector, valid_len);
++			off += valid_len;
++			len -= valid_len;
++		}
++
++		if (len)
++			nullb_fill_pattern(nullb, page, len, off);
++		flush_dcache_page(page);
++	} else {
++		flush_dcache_page(page);
++		err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
++	}
++
++	return err;
++}
++
++static int null_handle_rq(struct nullb_cmd *cmd)
++{
++	struct request *rq = cmd->rq;
++	struct nullb *nullb = cmd->nq->dev->nullb;
++	int err;
++	unsigned int len;
++	sector_t sector;
++	struct req_iterator iter;
++	struct bio_vec bvec;
++
++	sector = blk_rq_pos(rq);
++
++	if (req_op(rq) == REQ_OP_DISCARD) {
++		null_handle_discard(nullb, sector, blk_rq_bytes(rq));
++		return 0;
++	}
++
++	spin_lock_irq(&nullb->lock);
++	rq_for_each_segment(bvec, rq, iter) {
++		len = bvec.bv_len;
++		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
++				     op_is_write(req_op(rq)), sector,
++				     rq->cmd_flags & REQ_FUA);
++		if (err) {
++			spin_unlock_irq(&nullb->lock);
++			return err;
++		}
++		sector += len >> SECTOR_SHIFT;
++	}
++	spin_unlock_irq(&nullb->lock);
++
++	return 0;
++}
++
++static int null_handle_bio(struct nullb_cmd *cmd)
++{
++	struct bio *bio = cmd->bio;
++	struct nullb *nullb = cmd->nq->dev->nullb;
++	int err;
++	unsigned int len;
++	sector_t sector;
++	struct bio_vec bvec;
++	struct bvec_iter iter;
++
++	sector = bio->bi_iter.bi_sector;
++
++	if (bio_op(bio) == REQ_OP_DISCARD) {
++		null_handle_discard(nullb, sector,
++			bio_sectors(bio) << SECTOR_SHIFT);
++		return 0;
++	}
++
++	spin_lock_irq(&nullb->lock);
++	bio_for_each_segment(bvec, bio, iter) {
++		len = bvec.bv_len;
++		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
++				     op_is_write(bio_op(bio)), sector,
++				     bio->bi_opf & REQ_FUA);
++		if (err) {
++			spin_unlock_irq(&nullb->lock);
++			return err;
++		}
++		sector += len >> SECTOR_SHIFT;
++	}
++	spin_unlock_irq(&nullb->lock);
++	return 0;
++}
++
++static void null_stop_queue(struct nullb *nullb)
++{
++	struct request_queue *q = nullb->q;
++
++	if (nullb->dev->queue_mode == NULL_Q_MQ)
++		blk_mq_stop_hw_queues(q);
++}
++
++static void null_restart_queue_async(struct nullb *nullb)
++{
++	struct request_queue *q = nullb->q;
++
++	if (nullb->dev->queue_mode == NULL_Q_MQ)
++		blk_mq_start_stopped_hw_queues(q, true);
++}
++
++static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
++{
++	struct nullb_device *dev = cmd->nq->dev;
++	struct nullb *nullb = dev->nullb;
++	blk_status_t sts = BLK_STS_OK;
++	struct request *rq = cmd->rq;
++
++	if (!hrtimer_active(&nullb->bw_timer))
++		hrtimer_restart(&nullb->bw_timer);
++
++	if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
++		null_stop_queue(nullb);
++		/* race with timer */
++		if (atomic_long_read(&nullb->cur_bytes) > 0)
++			null_restart_queue_async(nullb);
++		/* requeue request */
++		sts = BLK_STS_DEV_RESOURCE;
++	}
++	return sts;
++}
++
++static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
++						 sector_t sector,
++						 sector_t nr_sectors)
++{
++	struct badblocks *bb = &cmd->nq->dev->badblocks;
++	sector_t first_bad;
++	int bad_sectors;
++
++	if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
++		return BLK_STS_IOERR;
++
++	return BLK_STS_OK;
++}
++
++static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
++						     enum req_opf op)
++{
++	struct nullb_device *dev = cmd->nq->dev;
++	int err;
++
++	if (dev->queue_mode == NULL_Q_BIO)
++		err = null_handle_bio(cmd);
++	else
++		err = null_handle_rq(cmd);
++
++	return errno_to_blk_status(err);
++}
++
++static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
++{
++	struct nullb_device *dev = cmd->nq->dev;
++	struct bio *bio;
++
++	if (dev->memory_backed)
++		return;
++
++	if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
++		zero_fill_bio(cmd->bio);
++	} else if (req_op(cmd->rq) == REQ_OP_READ) {
++		__rq_for_each_bio(bio, cmd->rq)
++			zero_fill_bio(bio);
++	}
++}
++
++static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
++{
++	/*
++	 * Since root privileges are required to configure the null_blk
++	 * driver, it is fine that this driver does not initialize the
++	 * data buffers of read commands. Zero-initialize these buffers
++	 * anyway if KMSAN is enabled to prevent that KMSAN complains
++	 * about null_blk not initializing read data buffers.
++	 */
++	if (IS_ENABLED(CONFIG_KMSAN))
++		nullb_zero_read_cmd_buffer(cmd);
++
++	/* Complete IO by inline, softirq or timer */
++	switch (cmd->nq->dev->irqmode) {
++	case NULL_IRQ_SOFTIRQ:
++		switch (cmd->nq->dev->queue_mode) {
++		case NULL_Q_MQ:
++			blk_mq_complete_request(cmd->rq);
++			break;
++		case NULL_Q_BIO:
++			/*
++			 * XXX: no proper submitting cpu information available.
++			 */
++			end_cmd(cmd);
++			break;
++		}
++		break;
++	case NULL_IRQ_NONE:
++		end_cmd(cmd);
++		break;
++	case NULL_IRQ_TIMER:
++		null_cmd_end_timer(cmd);
++		break;
++	}
++}
++
++blk_status_t null_process_cmd(struct nullb_cmd *cmd,
++			      enum req_opf op, sector_t sector,
++			      unsigned int nr_sectors)
++{
++	struct nullb_device *dev = cmd->nq->dev;
++	blk_status_t ret;
++
++	if (dev->badblocks.shift != -1) {
++		ret = null_handle_badblocks(cmd, sector, nr_sectors);
++		if (ret != BLK_STS_OK)
++			return ret;
++	}
++
++	if (dev->memory_backed)
++		return null_handle_memory_backed(cmd, op);
++
++	return BLK_STS_OK;
++}
++
++static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
++				    sector_t nr_sectors, enum req_opf op)
++{
++	struct nullb_device *dev = cmd->nq->dev;
++	struct nullb *nullb = dev->nullb;
++	blk_status_t sts;
++
++	if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
++		sts = null_handle_throttled(cmd);
++		if (sts != BLK_STS_OK)
++			return sts;
++	}
++
++	if (op == REQ_OP_FLUSH) {
++		cmd->error = errno_to_blk_status(null_handle_flush(nullb));
++		goto out;
++	}
++
++	if (dev->zoned)
++		sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
++	else
++		sts = null_process_cmd(cmd, op, sector, nr_sectors);
++
++	/* Do not overwrite errors (e.g. timeout errors) */
++	if (cmd->error == BLK_STS_OK)
++		cmd->error = sts;
++
++out:
++	nullb_complete_cmd(cmd);
++	return BLK_STS_OK;
++}
++
++static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
++{
++	struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
++	ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
++	unsigned int mbps = nullb->dev->mbps;
++
++	if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
++		return HRTIMER_NORESTART;
++
++	atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
++	null_restart_queue_async(nullb);
++
++	hrtimer_forward_now(&nullb->bw_timer, timer_interval);
++
++	return HRTIMER_RESTART;
++}
++
++static void nullb_setup_bwtimer(struct nullb *nullb)
++{
++	ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
++
++	hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++	nullb->bw_timer.function = nullb_bwtimer_fn;
++	atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
++	hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
++}
++
++static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
++{
++	int index = 0;
++
++	if (nullb->nr_queues != 1)
++		index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
++
++	return &nullb->queues[index];
++}
++
++static blk_qc_t null_submit_bio(struct bio *bio)
++{
++	sector_t sector = bio->bi_iter.bi_sector;
++	sector_t nr_sectors = bio_sectors(bio);
++	struct nullb *nullb = bio->bi_disk->private_data;
++	struct nullb_queue *nq = nullb_to_queue(nullb);
++	struct nullb_cmd *cmd;
++
++	cmd = alloc_cmd(nq, 1);
++	cmd->bio = bio;
++
++	null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
++	return BLK_QC_T_NONE;
++}
++
++static bool should_timeout_request(struct request *rq)
++{
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++	if (g_timeout_str[0])
++		return should_fail(&null_timeout_attr, 1);
++#endif
++	return false;
++}
++
++static bool should_requeue_request(struct request *rq)
++{
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++	if (g_requeue_str[0])
++		return should_fail(&null_requeue_attr, 1);
++#endif
++	return false;
++}
++
++static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
++{
++	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
++
++	pr_info("rq %p timed out\n", rq);
++
++	/*
++	 * If the device is marked as blocking (i.e. memory backed or zoned
++	 * device), the submission path may be blocked waiting for resources
++	 * and cause real timeouts. For these real timeouts, the submission
++	 * path will complete the request using blk_mq_complete_request().
++	 * Only fake timeouts need to execute blk_mq_complete_request() here.
++	 */
++	cmd->error = BLK_STS_TIMEOUT;
++	if (cmd->fake_timeout)
++		blk_mq_complete_request(rq);
++	return BLK_EH_DONE;
++}
++
++static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
++			 const struct blk_mq_queue_data *bd)
++{
++	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
++	struct nullb_queue *nq = hctx->driver_data;
++	sector_t nr_sectors = blk_rq_sectors(bd->rq);
++	sector_t sector = blk_rq_pos(bd->rq);
++
++	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
++
++	if (nq->dev->irqmode == NULL_IRQ_TIMER) {
++		hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++		cmd->timer.function = null_cmd_timer_expired;
++	}
++	cmd->rq = bd->rq;
++	cmd->error = BLK_STS_OK;
++	cmd->nq = nq;
++	cmd->fake_timeout = should_timeout_request(bd->rq) ||
++		blk_should_fake_timeout(bd->rq->q);
++
++	blk_mq_start_request(bd->rq);
++
++	if (should_requeue_request(bd->rq)) {
++		/*
++		 * Alternate between hitting the core BUSY path, and the
++		 * driver driven requeue path
++		 */
++		nq->requeue_selection++;
++		if (nq->requeue_selection & 1)
++			return BLK_STS_RESOURCE;
++		else {
++			blk_mq_requeue_request(bd->rq, true);
++			return BLK_STS_OK;
++		}
++	}
++	if (cmd->fake_timeout)
++		return BLK_STS_OK;
++
++	return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
++}
++
++static void cleanup_queue(struct nullb_queue *nq)
++{
++	kfree(nq->tag_map);
++	kfree(nq->cmds);
++}
++
++static void cleanup_queues(struct nullb *nullb)
++{
++	int i;
++
++	for (i = 0; i < nullb->nr_queues; i++)
++		cleanup_queue(&nullb->queues[i]);
++
++	kfree(nullb->queues);
++}
++
++static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
++{
++	struct nullb_queue *nq = hctx->driver_data;
++	struct nullb *nullb = nq->dev->nullb;
++
++	nullb->nr_queues--;
++}
++
++static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
++{
++	init_waitqueue_head(&nq->wait);
++	nq->queue_depth = nullb->queue_depth;
++	nq->dev = nullb->dev;
++}
++
++static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
++			  unsigned int hctx_idx)
++{
++	struct nullb *nullb = hctx->queue->queuedata;
++	struct nullb_queue *nq;
++
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++	if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
++		return -EFAULT;
++#endif
++
++	nq = &nullb->queues[hctx_idx];
++	hctx->driver_data = nq;
++	null_init_queue(nullb, nq);
++	nullb->nr_queues++;
++
++	return 0;
++}
++
++static const struct blk_mq_ops null_mq_ops = {
++	.queue_rq       = null_queue_rq,
++	.complete	= null_complete_rq,
++	.timeout	= null_timeout_rq,
++	.init_hctx	= null_init_hctx,
++	.exit_hctx	= null_exit_hctx,
++};
++
++static void null_del_dev(struct nullb *nullb)
++{
++	struct nullb_device *dev;
++
++	if (!nullb)
++		return;
++
++	dev = nullb->dev;
++
++	ida_simple_remove(&nullb_indexes, nullb->index);
++
++	list_del_init(&nullb->list);
++
++	del_gendisk(nullb->disk);
++
++	if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
++		hrtimer_cancel(&nullb->bw_timer);
++		atomic_long_set(&nullb->cur_bytes, LONG_MAX);
++		null_restart_queue_async(nullb);
++	}
++
++	blk_cleanup_queue(nullb->q);
++	if (dev->queue_mode == NULL_Q_MQ &&
++	    nullb->tag_set == &nullb->__tag_set)
++		blk_mq_free_tag_set(nullb->tag_set);
++	put_disk(nullb->disk);
++	cleanup_queues(nullb);
++	if (null_cache_active(nullb))
++		null_free_device_storage(nullb->dev, true);
++	kfree(nullb);
++	dev->nullb = NULL;
++}
++
++static void null_config_discard(struct nullb *nullb)
++{
++	if (nullb->dev->discard == false)
++		return;
++
++	if (nullb->dev->zoned) {
++		nullb->dev->discard = false;
++		pr_info("discard option is ignored in zoned mode\n");
++		return;
++	}
++
++	nullb->q->limits.discard_granularity = nullb->dev->blocksize;
++	nullb->q->limits.discard_alignment = nullb->dev->blocksize;
++	blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
++	blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
++}
++
++static const struct block_device_operations null_bio_ops = {
++	.owner		= THIS_MODULE,
++	.submit_bio	= null_submit_bio,
++	.report_zones	= null_report_zones,
++};
++
++static const struct block_device_operations null_rq_ops = {
++	.owner		= THIS_MODULE,
++	.report_zones	= null_report_zones,
++};
++
++static int setup_commands(struct nullb_queue *nq)
++{
++	struct nullb_cmd *cmd;
++	int i, tag_size;
++
++	nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
++	if (!nq->cmds)
++		return -ENOMEM;
++
++	tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
++	nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
++	if (!nq->tag_map) {
++		kfree(nq->cmds);
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < nq->queue_depth; i++) {
++		cmd = &nq->cmds[i];
++		cmd->tag = -1U;
++	}
++
++	return 0;
++}
++
++static int setup_queues(struct nullb *nullb)
++{
++	nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue),
++				GFP_KERNEL);
++	if (!nullb->queues)
++		return -ENOMEM;
++
++	nullb->queue_depth = nullb->dev->hw_queue_depth;
++
++	return 0;
++}
++
++static int init_driver_queues(struct nullb *nullb)
++{
++	struct nullb_queue *nq;
++	int i, ret = 0;
++
++	for (i = 0; i < nullb->dev->submit_queues; i++) {
++		nq = &nullb->queues[i];
++
++		null_init_queue(nullb, nq);
++
++		ret = setup_commands(nq);
++		if (ret)
++			return ret;
++		nullb->nr_queues++;
++	}
++	return 0;
++}
++
++static int null_gendisk_register(struct nullb *nullb)
++{
++	sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
++	struct gendisk *disk;
++
++	disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
++	if (!disk)
++		return -ENOMEM;
++	set_capacity(disk, size);
++
++	disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
++	disk->major		= null_major;
++	disk->first_minor	= nullb->index;
++	if (queue_is_mq(nullb->q))
++		disk->fops		= &null_rq_ops;
++	else
++		disk->fops		= &null_bio_ops;
++	disk->private_data	= nullb;
++	disk->queue		= nullb->q;
++	strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
++
++	if (nullb->dev->zoned) {
++		int ret = null_register_zoned_dev(nullb);
++
++		if (ret)
++			return ret;
++	}
++
++	add_disk(disk);
++	return 0;
++}
++
++static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
++{
++	set->ops = &null_mq_ops;
++	set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
++						g_submit_queues;
++	set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
++						g_hw_queue_depth;
++	set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
++	set->cmd_size	= sizeof(struct nullb_cmd);
++	set->flags = BLK_MQ_F_SHOULD_MERGE;
++	if (g_no_sched)
++		set->flags |= BLK_MQ_F_NO_SCHED;
++	if (g_shared_tag_bitmap)
++		set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
++	set->driver_data = NULL;
++
++	if ((nullb && nullb->dev->blocking) || g_blocking)
++		set->flags |= BLK_MQ_F_BLOCKING;
++
++	return blk_mq_alloc_tag_set(set);
++}
++
++static int null_validate_conf(struct nullb_device *dev)
++{
++	dev->blocksize = round_down(dev->blocksize, 512);
++	dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
++
++	if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
++		if (dev->submit_queues != nr_online_nodes)
++			dev->submit_queues = nr_online_nodes;
++	} else if (dev->submit_queues > nr_cpu_ids)
++		dev->submit_queues = nr_cpu_ids;
++	else if (dev->submit_queues == 0)
++		dev->submit_queues = 1;
++
++	dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
++	dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
++
++	/* Do memory allocation, so set blocking */
++	if (dev->memory_backed)
++		dev->blocking = true;
++	else /* cache is meaningless */
++		dev->cache_size = 0;
++	dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
++						dev->cache_size);
++	dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
++	/* can not stop a queue */
++	if (dev->queue_mode == NULL_Q_BIO)
++		dev->mbps = 0;
++
++	if (dev->zoned &&
++	    (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
++		pr_err("zone_size must be power-of-two\n");
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++static bool __null_setup_fault(struct fault_attr *attr, char *str)
++{
++	if (!str[0])
++		return true;
++
++	if (!setup_fault_attr(attr, str))
++		return false;
++
++	attr->verbose = 0;
++	return true;
++}
++#endif
++
++static bool null_setup_fault(void)
++{
++#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
++	if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
++		return false;
++	if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
++		return false;
++	if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
++		return false;
++#endif
++	return true;
++}
++
++static int null_add_dev(struct nullb_device *dev)
++{
++	struct nullb *nullb;
++	int rv;
++
++	rv = null_validate_conf(dev);
++	if (rv)
++		return rv;
++
++	nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
++	if (!nullb) {
++		rv = -ENOMEM;
++		goto out;
++	}
++	nullb->dev = dev;
++	dev->nullb = nullb;
++
++	spin_lock_init(&nullb->lock);
++
++	rv = setup_queues(nullb);
++	if (rv)
++		goto out_free_nullb;
++
++	if (dev->queue_mode == NULL_Q_MQ) {
++		if (shared_tags) {
++			nullb->tag_set = &tag_set;
++			rv = 0;
++		} else {
++			nullb->tag_set = &nullb->__tag_set;
++			rv = null_init_tag_set(nullb, nullb->tag_set);
++		}
++
++		if (rv)
++			goto out_cleanup_queues;
++
++		if (!null_setup_fault())
++			goto out_cleanup_queues;
++
++		nullb->tag_set->timeout = 5 * HZ;
++		nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
++		if (IS_ERR(nullb->q)) {
++			rv = -ENOMEM;
++			goto out_cleanup_tags;
++		}
++	} else if (dev->queue_mode == NULL_Q_BIO) {
++		nullb->q = blk_alloc_queue(dev->home_node);
++		if (!nullb->q) {
++			rv = -ENOMEM;
++			goto out_cleanup_queues;
++		}
++		rv = init_driver_queues(nullb);
++		if (rv)
++			goto out_cleanup_blk_queue;
++	}
++
++	if (dev->mbps) {
++		set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
++		nullb_setup_bwtimer(nullb);
++	}
++
++	if (dev->cache_size > 0) {
++		set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
++		blk_queue_write_cache(nullb->q, true, true);
++	}
++
++	if (dev->zoned) {
++		rv = null_init_zoned_dev(dev, nullb->q);
++		if (rv)
++			goto out_cleanup_blk_queue;
++	}
++
++	nullb->q->queuedata = nullb;
++	blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
++	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
++
++	mutex_lock(&lock);
++	rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
++	if (rv < 0) {
++		mutex_unlock(&lock);
++		goto out_cleanup_zone;
++	}
++	nullb->index = rv;
++	dev->index = rv;
++	mutex_unlock(&lock);
++
++	blk_queue_logical_block_size(nullb->q, dev->blocksize);
++	blk_queue_physical_block_size(nullb->q, dev->blocksize);
++
++	null_config_discard(nullb);
++
++	sprintf(nullb->disk_name, "nullb%d", nullb->index);
++
++	rv = null_gendisk_register(nullb);
++	if (rv)
++		goto out_ida_free;
++
++	mutex_lock(&lock);
++	list_add_tail(&nullb->list, &nullb_list);
++	mutex_unlock(&lock);
++
++	return 0;
++
++out_ida_free:
++	ida_free(&nullb_indexes, nullb->index);
++out_cleanup_zone:
++	null_free_zoned_dev(dev);
++out_cleanup_blk_queue:
++	blk_cleanup_queue(nullb->q);
++out_cleanup_tags:
++	if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
++		blk_mq_free_tag_set(nullb->tag_set);
++out_cleanup_queues:
++	cleanup_queues(nullb);
++out_free_nullb:
++	kfree(nullb);
++	dev->nullb = NULL;
++out:
++	return rv;
++}
++
++static int __init null_init(void)
++{
++	int ret = 0;
++	unsigned int i;
++	struct nullb *nullb;
++	struct nullb_device *dev;
++
++	if (g_bs > PAGE_SIZE) {
++		pr_warn("invalid block size\n");
++		pr_warn("defaults block size to %lu\n", PAGE_SIZE);
++		g_bs = PAGE_SIZE;
++	}
++
++	if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
++		pr_err("invalid home_node value\n");
++		g_home_node = NUMA_NO_NODE;
++	}
++
++	if (g_queue_mode == NULL_Q_RQ) {
++		pr_err("legacy IO path no longer available\n");
++		return -EINVAL;
++	}
++	if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
++		if (g_submit_queues != nr_online_nodes) {
++			pr_warn("submit_queues param is set to %u.\n",
++							nr_online_nodes);
++			g_submit_queues = nr_online_nodes;
++		}
++	} else if (g_submit_queues > nr_cpu_ids)
++		g_submit_queues = nr_cpu_ids;
++	else if (g_submit_queues <= 0)
++		g_submit_queues = 1;
++
++	if (g_queue_mode == NULL_Q_MQ && shared_tags) {
++		ret = null_init_tag_set(NULL, &tag_set);
++		if (ret)
++			return ret;
++	}
++
++	config_group_init(&nullb_subsys.su_group);
++	mutex_init(&nullb_subsys.su_mutex);
++
++	ret = configfs_register_subsystem(&nullb_subsys);
++	if (ret)
++		goto err_tagset;
++
++	mutex_init(&lock);
++
++	null_major = register_blkdev(0, "nullb");
++	if (null_major < 0) {
++		ret = null_major;
++		goto err_conf;
++	}
++
++	for (i = 0; i < nr_devices; i++) {
++		dev = null_alloc_dev();
++		if (!dev) {
++			ret = -ENOMEM;
++			goto err_dev;
++		}
++		ret = null_add_dev(dev);
++		if (ret) {
++			null_free_dev(dev);
++			goto err_dev;
++		}
++	}
++
++	pr_info("module loaded\n");
++	return 0;
++
++err_dev:
++	while (!list_empty(&nullb_list)) {
++		nullb = list_entry(nullb_list.next, struct nullb, list);
++		dev = nullb->dev;
++		null_del_dev(nullb);
++		null_free_dev(dev);
++	}
++	unregister_blkdev(null_major, "nullb");
++err_conf:
++	configfs_unregister_subsystem(&nullb_subsys);
++err_tagset:
++	if (g_queue_mode == NULL_Q_MQ && shared_tags)
++		blk_mq_free_tag_set(&tag_set);
++	return ret;
++}
++
++static void __exit null_exit(void)
++{
++	struct nullb *nullb;
++
++	configfs_unregister_subsystem(&nullb_subsys);
++
++	unregister_blkdev(null_major, "nullb");
++
++	mutex_lock(&lock);
++	while (!list_empty(&nullb_list)) {
++		struct nullb_device *dev;
++
++		nullb = list_entry(nullb_list.next, struct nullb, list);
++		dev = nullb->dev;
++		null_del_dev(nullb);
++		null_free_dev(dev);
++	}
++	mutex_unlock(&lock);
++
++	if (g_queue_mode == NULL_Q_MQ && shared_tags)
++		blk_mq_free_tag_set(&tag_set);
++}
++
++module_init(null_init);
++module_exit(null_exit);
++
++MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
++MODULE_LICENSE("GPL");
+diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
+new file mode 100644
+index 0000000000000..7de703f28617b
+--- /dev/null
++++ b/drivers/block/null_blk/null_blk.h
+@@ -0,0 +1,137 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __BLK_NULL_BLK_H
++#define __BLK_NULL_BLK_H
++
++#undef pr_fmt
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++#include <linux/blk-mq.h>
++#include <linux/hrtimer.h>
++#include <linux/configfs.h>
++#include <linux/badblocks.h>
++#include <linux/fault-inject.h>
++
++struct nullb_cmd {
++	struct request *rq;
++	struct bio *bio;
++	unsigned int tag;
++	blk_status_t error;
++	struct nullb_queue *nq;
++	struct hrtimer timer;
++	bool fake_timeout;
++};
++
++struct nullb_queue {
++	unsigned long *tag_map;
++	wait_queue_head_t wait;
++	unsigned int queue_depth;
++	struct nullb_device *dev;
++	unsigned int requeue_selection;
++
++	struct nullb_cmd *cmds;
++};
++
++struct nullb_device {
++	struct nullb *nullb;
++	struct config_item item;
++	struct radix_tree_root data; /* data stored in the disk */
++	struct radix_tree_root cache; /* disk cache data */
++	unsigned long flags; /* device flags */
++	unsigned int curr_cache;
++	struct badblocks badblocks;
++
++	unsigned int nr_zones;
++	unsigned int nr_zones_imp_open;
++	unsigned int nr_zones_exp_open;
++	unsigned int nr_zones_closed;
++	struct blk_zone *zones;
++	sector_t zone_size_sects;
++	spinlock_t zone_lock;
++	unsigned long *zone_locks;
++
++	unsigned long size; /* device size in MB */
++	unsigned long completion_nsec; /* time in ns to complete a request */
++	unsigned long cache_size; /* disk cache size in MB */
++	unsigned long zone_size; /* zone size in MB if device is zoned */
++	unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
++	unsigned int zone_nr_conv; /* number of conventional zones */
++	unsigned int zone_max_open; /* max number of open zones */
++	unsigned int zone_max_active; /* max number of active zones */
++	unsigned int submit_queues; /* number of submission queues */
++	unsigned int home_node; /* home node for the device */
++	unsigned int queue_mode; /* block interface */
++	unsigned int blocksize; /* block size */
++	unsigned int irqmode; /* IRQ completion handler */
++	unsigned int hw_queue_depth; /* queue depth */
++	unsigned int index; /* index of the disk, only valid with a disk */
++	unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
++	bool blocking; /* blocking blk-mq device */
++	bool use_per_node_hctx; /* use per-node allocation for hardware context */
++	bool power; /* power on/off the device */
++	bool memory_backed; /* if data is stored in memory */
++	bool discard; /* if support discard */
++	bool zoned; /* if device is zoned */
++};
++
++struct nullb {
++	struct nullb_device *dev;
++	struct list_head list;
++	unsigned int index;
++	struct request_queue *q;
++	struct gendisk *disk;
++	struct blk_mq_tag_set *tag_set;
++	struct blk_mq_tag_set __tag_set;
++	unsigned int queue_depth;
++	atomic_long_t cur_bytes;
++	struct hrtimer bw_timer;
++	unsigned long cache_flush_pos;
++	spinlock_t lock;
++
++	struct nullb_queue *queues;
++	unsigned int nr_queues;
++	char disk_name[DISK_NAME_LEN];
++};
++
++blk_status_t null_process_cmd(struct nullb_cmd *cmd,
++			      enum req_opf op, sector_t sector,
++			      unsigned int nr_sectors);
++
++#ifdef CONFIG_BLK_DEV_ZONED
++int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
++int null_register_zoned_dev(struct nullb *nullb);
++void null_free_zoned_dev(struct nullb_device *dev);
++int null_report_zones(struct gendisk *disk, sector_t sector,
++		      unsigned int nr_zones, report_zones_cb cb, void *data);
++blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
++				    enum req_opf op, sector_t sector,
++				    sector_t nr_sectors);
++size_t null_zone_valid_read_len(struct nullb *nullb,
++				sector_t sector, unsigned int len);
++#else
++static inline int null_init_zoned_dev(struct nullb_device *dev,
++				      struct request_queue *q)
++{
++	pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
++	return -EINVAL;
++}
++static inline int null_register_zoned_dev(struct nullb *nullb)
++{
++	return -ENODEV;
++}
++static inline void null_free_zoned_dev(struct nullb_device *dev) {}
++static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
++			enum req_opf op, sector_t sector, sector_t nr_sectors)
++{
++	return BLK_STS_NOTSUPP;
++}
++static inline size_t null_zone_valid_read_len(struct nullb *nullb,
++					      sector_t sector,
++					      unsigned int len)
++{
++	return len;
++}
++#define null_report_zones	NULL
++#endif /* CONFIG_BLK_DEV_ZONED */
++#endif /* __NULL_BLK_H */
+diff --git a/drivers/block/null_blk/trace.c b/drivers/block/null_blk/trace.c
+new file mode 100644
+index 0000000000000..3711cba160715
+--- /dev/null
++++ b/drivers/block/null_blk/trace.c
+@@ -0,0 +1,21 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * null_blk trace related helpers.
++ *
++ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
++ */
++#include "trace.h"
++
++/*
++ * Helper to use for all null_blk traces to extract disk name.
++ */
++const char *nullb_trace_disk_name(struct trace_seq *p, char *name)
++{
++	const char *ret = trace_seq_buffer_ptr(p);
++
++	if (name && *name)
++		trace_seq_printf(p, "disk=%s, ", name);
++	trace_seq_putc(p, 0);
++
++	return ret;
++}
+diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h
+new file mode 100644
+index 0000000000000..ce3b430e88c57
+--- /dev/null
++++ b/drivers/block/null_blk/trace.h
+@@ -0,0 +1,79 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * null_blk device driver tracepoints.
++ *
++ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
++ */
++
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM nullb
++
++#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_NULLB_H
++
++#include <linux/tracepoint.h>
++#include <linux/trace_seq.h>
++
++#include "null_blk.h"
++
++const char *nullb_trace_disk_name(struct trace_seq *p, char *name);
++
++#define __print_disk_name(name) nullb_trace_disk_name(p, name)
++
++#ifndef TRACE_HEADER_MULTI_READ
++static inline void __assign_disk_name(char *name, struct gendisk *disk)
++{
++	if (disk)
++		memcpy(name, disk->disk_name, DISK_NAME_LEN);
++	else
++		memset(name, 0, DISK_NAME_LEN);
++}
++#endif
++
++TRACE_EVENT(nullb_zone_op,
++	    TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no,
++		     unsigned int zone_cond),
++	    TP_ARGS(cmd, zone_no, zone_cond),
++	    TP_STRUCT__entry(
++		__array(char, disk, DISK_NAME_LEN)
++		__field(enum req_opf, op)
++		__field(unsigned int, zone_no)
++		__field(unsigned int, zone_cond)
++	    ),
++	    TP_fast_assign(
++		__entry->op = req_op(cmd->rq);
++		__entry->zone_no = zone_no;
++		__entry->zone_cond = zone_cond;
++		__assign_disk_name(__entry->disk, cmd->rq->rq_disk);
++	    ),
++	    TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
++		      __print_disk_name(__entry->disk),
++		      blk_op_str(__entry->op),
++		      __entry->zone_no,
++		      blk_zone_cond_str(__entry->zone_cond))
++);
++
++TRACE_EVENT(nullb_report_zones,
++	    TP_PROTO(struct nullb *nullb, unsigned int nr_zones),
++	    TP_ARGS(nullb, nr_zones),
++	    TP_STRUCT__entry(
++		__array(char, disk, DISK_NAME_LEN)
++		__field(unsigned int, nr_zones)
++	    ),
++	    TP_fast_assign(
++		__entry->nr_zones = nr_zones;
++		__assign_disk_name(__entry->disk, nullb->disk);
++	    ),
++	    TP_printk("%s nr_zones=%u",
++		      __print_disk_name(__entry->disk), __entry->nr_zones)
++);
++
++#endif /* _TRACE_NULLB_H */
++
++#undef TRACE_INCLUDE_PATH
++#define TRACE_INCLUDE_PATH .
++#undef TRACE_INCLUDE_FILE
++#define TRACE_INCLUDE_FILE trace
++
++/* This part must be outside protection */
++#include <trace/define_trace.h>
+diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
+new file mode 100644
+index 0000000000000..41220ce59659b
+--- /dev/null
++++ b/drivers/block/null_blk/zoned.c
+@@ -0,0 +1,617 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <linux/vmalloc.h>
++#include <linux/bitmap.h>
++#include "null_blk.h"
++
++#define CREATE_TRACE_POINTS
++#include "trace.h"
++
++#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT)
++
++static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
++{
++	return sect >> ilog2(dev->zone_size_sects);
++}
++
++int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
++{
++	sector_t dev_capacity_sects, zone_capacity_sects;
++	sector_t sector = 0;
++	unsigned int i;
++
++	if (!is_power_of_2(dev->zone_size)) {
++		pr_err("zone_size must be power-of-two\n");
++		return -EINVAL;
++	}
++	if (dev->zone_size > dev->size) {
++		pr_err("Zone size larger than device capacity\n");
++		return -EINVAL;
++	}
++
++	if (!dev->zone_capacity)
++		dev->zone_capacity = dev->zone_size;
++
++	if (dev->zone_capacity > dev->zone_size) {
++		pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
++					dev->zone_capacity, dev->zone_size);
++		return -EINVAL;
++	}
++
++	zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity);
++	dev_capacity_sects = MB_TO_SECTS(dev->size);
++	dev->zone_size_sects = MB_TO_SECTS(dev->zone_size);
++	dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects);
++	if (dev_capacity_sects & (dev->zone_size_sects - 1))
++		dev->nr_zones++;
++
++	dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
++			GFP_KERNEL | __GFP_ZERO);
++	if (!dev->zones)
++		return -ENOMEM;
++
++	/*
++	 * With memory backing, the zone_lock spinlock needs to be temporarily
++	 * released to avoid scheduling in atomic context. To guarantee zone
++	 * information protection, use a bitmap to lock zones with
++	 * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
++	 * implies that the queue is marked with BLK_MQ_F_BLOCKING.
++	 */
++	spin_lock_init(&dev->zone_lock);
++	if (dev->memory_backed) {
++		dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
++		if (!dev->zone_locks) {
++			kvfree(dev->zones);
++			return -ENOMEM;
++		}
++	}
++
++	if (dev->zone_nr_conv >= dev->nr_zones) {
++		dev->zone_nr_conv = dev->nr_zones - 1;
++		pr_info("changed the number of conventional zones to %u",
++			dev->zone_nr_conv);
++	}
++
++	/* Max active zones has to be < nbr of seq zones in order to be enforceable */
++	if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
++		dev->zone_max_active = 0;
++		pr_info("zone_max_active limit disabled, limit >= zone count\n");
++	}
++
++	/* Max open zones has to be <= max active zones */
++	if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
++		dev->zone_max_open = dev->zone_max_active;
++		pr_info("changed the maximum number of open zones to %u\n",
++			dev->nr_zones);
++	} else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
++		dev->zone_max_open = 0;
++		pr_info("zone_max_open limit disabled, limit >= zone count\n");
++	}
++
++	for (i = 0; i <  dev->zone_nr_conv; i++) {
++		struct blk_zone *zone = &dev->zones[i];
++
++		zone->start = sector;
++		zone->len = dev->zone_size_sects;
++		zone->capacity = zone->len;
++		zone->wp = zone->start + zone->len;
++		zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
++		zone->cond = BLK_ZONE_COND_NOT_WP;
++
++		sector += dev->zone_size_sects;
++	}
++
++	for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
++		struct blk_zone *zone = &dev->zones[i];
++
++		zone->start = zone->wp = sector;
++		if (zone->start + dev->zone_size_sects > dev_capacity_sects)
++			zone->len = dev_capacity_sects - zone->start;
++		else
++			zone->len = dev->zone_size_sects;
++		zone->capacity =
++			min_t(sector_t, zone->len, zone_capacity_sects);
++		zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
++		zone->cond = BLK_ZONE_COND_EMPTY;
++
++		sector += dev->zone_size_sects;
++	}
++
++	q->limits.zoned = BLK_ZONED_HM;
++	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
++	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
++
++	return 0;
++}
++
++int null_register_zoned_dev(struct nullb *nullb)
++{
++	struct nullb_device *dev = nullb->dev;
++	struct request_queue *q = nullb->q;
++
++	if (queue_is_mq(q)) {
++		int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
++
++		if (ret)
++			return ret;
++	} else {
++		blk_queue_chunk_sectors(q, dev->zone_size_sects);
++		q->nr_zones = blkdev_nr_zones(nullb->disk);
++	}
++
++	blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
++	blk_queue_max_open_zones(q, dev->zone_max_open);
++	blk_queue_max_active_zones(q, dev->zone_max_active);
++
++	return 0;
++}
++
++void null_free_zoned_dev(struct nullb_device *dev)
++{
++	bitmap_free(dev->zone_locks);
++	kvfree(dev->zones);
++	dev->zones = NULL;
++}
++
++static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
++{
++	if (dev->memory_backed)
++		wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
++	spin_lock_irq(&dev->zone_lock);
++}
++
++static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
++{
++	spin_unlock_irq(&dev->zone_lock);
++
++	if (dev->memory_backed)
++		clear_and_wake_up_bit(zno, dev->zone_locks);
++}
++
++int null_report_zones(struct gendisk *disk, sector_t sector,
++		unsigned int nr_zones, report_zones_cb cb, void *data)
++{
++	struct nullb *nullb = disk->private_data;
++	struct nullb_device *dev = nullb->dev;
++	unsigned int first_zone, i, zno;
++	struct blk_zone zone;
++	int error;
++
++	first_zone = null_zone_no(dev, sector);
++	if (first_zone >= dev->nr_zones)
++		return 0;
++
++	nr_zones = min(nr_zones, dev->nr_zones - first_zone);
++	trace_nullb_report_zones(nullb, nr_zones);
++
++	zno = first_zone;
++	for (i = 0; i < nr_zones; i++, zno++) {
++		/*
++		 * Stacked DM target drivers will remap the zone information by
++		 * modifying the zone information passed to the report callback.
++		 * So use a local copy to avoid corruption of the device zone
++		 * array.
++		 */
++		null_lock_zone(dev, zno);
++		memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone));
++		null_unlock_zone(dev, zno);
++
++		error = cb(&zone, i, data);
++		if (error)
++			return error;
++	}
++
++	return nr_zones;
++}
++
++/*
++ * This is called in the case of memory backing from null_process_cmd()
++ * with the target zone already locked.
++ */
++size_t null_zone_valid_read_len(struct nullb *nullb,
++				sector_t sector, unsigned int len)
++{
++	struct nullb_device *dev = nullb->dev;
++	struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
++	unsigned int nr_sectors = len >> SECTOR_SHIFT;
++
++	/* Read must be below the write pointer position */
++	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
++	    sector + nr_sectors <= zone->wp)
++		return len;
++
++	if (sector > zone->wp)
++		return 0;
++
++	return (zone->wp - sector) << SECTOR_SHIFT;
++}
++
++static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone)
++{
++	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
++		return BLK_STS_IOERR;
++
++	switch (zone->cond) {
++	case BLK_ZONE_COND_CLOSED:
++		/* close operation on closed is not an error */
++		return BLK_STS_OK;
++	case BLK_ZONE_COND_IMP_OPEN:
++		dev->nr_zones_imp_open--;
++		break;
++	case BLK_ZONE_COND_EXP_OPEN:
++		dev->nr_zones_exp_open--;
++		break;
++	case BLK_ZONE_COND_EMPTY:
++	case BLK_ZONE_COND_FULL:
++	default:
++		return BLK_STS_IOERR;
++	}
++
++	if (zone->wp == zone->start) {
++		zone->cond = BLK_ZONE_COND_EMPTY;
++	} else {
++		zone->cond = BLK_ZONE_COND_CLOSED;
++		dev->nr_zones_closed++;
++	}
++
++	return BLK_STS_OK;
++}
++
++static void null_close_first_imp_zone(struct nullb_device *dev)
++{
++	unsigned int i;
++
++	for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
++		if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) {
++			null_close_zone(dev, &dev->zones[i]);
++			return;
++		}
++	}
++}
++
++static blk_status_t null_check_active(struct nullb_device *dev)
++{
++	if (!dev->zone_max_active)
++		return BLK_STS_OK;
++
++	if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
++			dev->nr_zones_closed < dev->zone_max_active)
++		return BLK_STS_OK;
++
++	return BLK_STS_ZONE_ACTIVE_RESOURCE;
++}
++
++static blk_status_t null_check_open(struct nullb_device *dev)
++{
++	if (!dev->zone_max_open)
++		return BLK_STS_OK;
++
++	if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
++		return BLK_STS_OK;
++
++	if (dev->nr_zones_imp_open) {
++		if (null_check_active(dev) == BLK_STS_OK) {
++			null_close_first_imp_zone(dev);
++			return BLK_STS_OK;
++		}
++	}
++
++	return BLK_STS_ZONE_OPEN_RESOURCE;
++}
++
++/*
++ * This function matches the manage open zone resources function in the ZBC standard,
++ * with the addition of max active zones support (added in the ZNS standard).
++ *
++ * The function determines if a zone can transition to implicit open or explicit open,
++ * while maintaining the max open zone (and max active zone) limit(s). It may close an
++ * implicit open zone in order to make additional zone resources available.
++ *
++ * ZBC states that an implicit open zone shall be closed only if there is not
++ * room within the open limit. However, with the addition of an active limit,
++ * it is not certain that closing an implicit open zone will allow a new zone
++ * to be opened, since we might already be at the active limit capacity.
++ */
++static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
++{
++	blk_status_t ret;
++
++	switch (zone->cond) {
++	case BLK_ZONE_COND_EMPTY:
++		ret = null_check_active(dev);
++		if (ret != BLK_STS_OK)
++			return ret;
++		fallthrough;
++	case BLK_ZONE_COND_CLOSED:
++		return null_check_open(dev);
++	default:
++		/* Should never be called for other states */
++		WARN_ON(1);
++		return BLK_STS_IOERR;
++	}
++}
++
++static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
++				    unsigned int nr_sectors, bool append)
++{
++	struct nullb_device *dev = cmd->nq->dev;
++	unsigned int zno = null_zone_no(dev, sector);
++	struct blk_zone *zone = &dev->zones[zno];
++	blk_status_t ret;
++
++	trace_nullb_zone_op(cmd, zno, zone->cond);
++
++	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
++		if (append)
++			return BLK_STS_IOERR;
++		return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
++	}
++
++	null_lock_zone(dev, zno);
++
++	switch (zone->cond) {
++	case BLK_ZONE_COND_FULL:
++		/* Cannot write to a full zone */
++		ret = BLK_STS_IOERR;
++		goto unlock;
++	case BLK_ZONE_COND_EMPTY:
++	case BLK_ZONE_COND_CLOSED:
++		ret = null_check_zone_resources(dev, zone);
++		if (ret != BLK_STS_OK)
++			goto unlock;
++		break;
++	case BLK_ZONE_COND_IMP_OPEN:
++	case BLK_ZONE_COND_EXP_OPEN:
++		break;
++	default:
++		/* Invalid zone condition */
++		ret = BLK_STS_IOERR;
++		goto unlock;
++	}
++
++	/*
++	 * Regular writes must be at the write pointer position.
++	 * Zone append writes are automatically issued at the write
++	 * pointer and the position returned using the request or BIO
++	 * sector.
++	 */
++	if (append) {
++		sector = zone->wp;
++		if (cmd->bio)
++			cmd->bio->bi_iter.bi_sector = sector;
++		else
++			cmd->rq->__sector = sector;
++	} else if (sector != zone->wp) {
++		ret = BLK_STS_IOERR;
++		goto unlock;
++	}
++
++	if (zone->wp + nr_sectors > zone->start + zone->capacity) {
++		ret = BLK_STS_IOERR;
++		goto unlock;
++	}
++
++	if (zone->cond == BLK_ZONE_COND_CLOSED) {
++		dev->nr_zones_closed--;
++		dev->nr_zones_imp_open++;
++	} else if (zone->cond == BLK_ZONE_COND_EMPTY) {
++		dev->nr_zones_imp_open++;
++	}
++	if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
++		zone->cond = BLK_ZONE_COND_IMP_OPEN;
++
++	/*
++	 * Memory backing allocation may sleep: release the zone_lock spinlock
++	 * to avoid scheduling in atomic context. Zone operation atomicity is
++	 * still guaranteed through the zone_locks bitmap.
++	 */
++	if (dev->memory_backed)
++		spin_unlock_irq(&dev->zone_lock);
++	ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
++	if (dev->memory_backed)
++		spin_lock_irq(&dev->zone_lock);
++
++	if (ret != BLK_STS_OK)
++		goto unlock;
++
++	zone->wp += nr_sectors;
++	if (zone->wp == zone->start + zone->capacity) {
++		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
++			dev->nr_zones_exp_open--;
++		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
++			dev->nr_zones_imp_open--;
++		zone->cond = BLK_ZONE_COND_FULL;
++	}
++	ret = BLK_STS_OK;
++
++unlock:
++	null_unlock_zone(dev, zno);
++
++	return ret;
++}
++
++static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
++{
++	blk_status_t ret;
++
++	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
++		return BLK_STS_IOERR;
++
++	switch (zone->cond) {
++	case BLK_ZONE_COND_EXP_OPEN:
++		/* open operation on exp open is not an error */
++		return BLK_STS_OK;
++	case BLK_ZONE_COND_EMPTY:
++		ret = null_check_zone_resources(dev, zone);
++		if (ret != BLK_STS_OK)
++			return ret;
++		break;
++	case BLK_ZONE_COND_IMP_OPEN:
++		dev->nr_zones_imp_open--;
++		break;
++	case BLK_ZONE_COND_CLOSED:
++		ret = null_check_zone_resources(dev, zone);
++		if (ret != BLK_STS_OK)
++			return ret;
++		dev->nr_zones_closed--;
++		break;
++	case BLK_ZONE_COND_FULL:
++	default:
++		return BLK_STS_IOERR;
++	}
++
++	zone->cond = BLK_ZONE_COND_EXP_OPEN;
++	dev->nr_zones_exp_open++;
++
++	return BLK_STS_OK;
++}
++
++static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
++{
++	blk_status_t ret;
++
++	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
++		return BLK_STS_IOERR;
++
++	switch (zone->cond) {
++	case BLK_ZONE_COND_FULL:
++		/* finish operation on full is not an error */
++		return BLK_STS_OK;
++	case BLK_ZONE_COND_EMPTY:
++		ret = null_check_zone_resources(dev, zone);
++		if (ret != BLK_STS_OK)
++			return ret;
++		break;
++	case BLK_ZONE_COND_IMP_OPEN:
++		dev->nr_zones_imp_open--;
++		break;
++	case BLK_ZONE_COND_EXP_OPEN:
++		dev->nr_zones_exp_open--;
++		break;
++	case BLK_ZONE_COND_CLOSED:
++		ret = null_check_zone_resources(dev, zone);
++		if (ret != BLK_STS_OK)
++			return ret;
++		dev->nr_zones_closed--;
++		break;
++	default:
++		return BLK_STS_IOERR;
++	}
++
++	zone->cond = BLK_ZONE_COND_FULL;
++	zone->wp = zone->start + zone->len;
++
++	return BLK_STS_OK;
++}
++
++static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone)
++{
++	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
++		return BLK_STS_IOERR;
++
++	switch (zone->cond) {
++	case BLK_ZONE_COND_EMPTY:
++		/* reset operation on empty is not an error */
++		return BLK_STS_OK;
++	case BLK_ZONE_COND_IMP_OPEN:
++		dev->nr_zones_imp_open--;
++		break;
++	case BLK_ZONE_COND_EXP_OPEN:
++		dev->nr_zones_exp_open--;
++		break;
++	case BLK_ZONE_COND_CLOSED:
++		dev->nr_zones_closed--;
++		break;
++	case BLK_ZONE_COND_FULL:
++		break;
++	default:
++		return BLK_STS_IOERR;
++	}
++
++	zone->cond = BLK_ZONE_COND_EMPTY;
++	zone->wp = zone->start;
++
++	return BLK_STS_OK;
++}
++
++static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
++				   sector_t sector)
++{
++	struct nullb_device *dev = cmd->nq->dev;
++	unsigned int zone_no;
++	struct blk_zone *zone;
++	blk_status_t ret;
++	size_t i;
++
++	if (op == REQ_OP_ZONE_RESET_ALL) {
++		for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
++			null_lock_zone(dev, i);
++			zone = &dev->zones[i];
++			if (zone->cond != BLK_ZONE_COND_EMPTY) {
++				null_reset_zone(dev, zone);
++				trace_nullb_zone_op(cmd, i, zone->cond);
++			}
++			null_unlock_zone(dev, i);
++		}
++		return BLK_STS_OK;
++	}
++
++	zone_no = null_zone_no(dev, sector);
++	zone = &dev->zones[zone_no];
++
++	null_lock_zone(dev, zone_no);
++
++	switch (op) {
++	case REQ_OP_ZONE_RESET:
++		ret = null_reset_zone(dev, zone);
++		break;
++	case REQ_OP_ZONE_OPEN:
++		ret = null_open_zone(dev, zone);
++		break;
++	case REQ_OP_ZONE_CLOSE:
++		ret = null_close_zone(dev, zone);
++		break;
++	case REQ_OP_ZONE_FINISH:
++		ret = null_finish_zone(dev, zone);
++		break;
++	default:
++		ret = BLK_STS_NOTSUPP;
++		break;
++	}
++
++	if (ret == BLK_STS_OK)
++		trace_nullb_zone_op(cmd, zone_no, zone->cond);
++
++	null_unlock_zone(dev, zone_no);
++
++	return ret;
++}
++
++blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
++				    sector_t sector, sector_t nr_sectors)
++{
++	struct nullb_device *dev = cmd->nq->dev;
++	unsigned int zno = null_zone_no(dev, sector);
++	blk_status_t sts;
++
++	switch (op) {
++	case REQ_OP_WRITE:
++		sts = null_zone_write(cmd, sector, nr_sectors, false);
++		break;
++	case REQ_OP_ZONE_APPEND:
++		sts = null_zone_write(cmd, sector, nr_sectors, true);
++		break;
++	case REQ_OP_ZONE_RESET:
++	case REQ_OP_ZONE_RESET_ALL:
++	case REQ_OP_ZONE_OPEN:
++	case REQ_OP_ZONE_CLOSE:
++	case REQ_OP_ZONE_FINISH:
++		sts = null_zone_mgmt(cmd, op, sector);
++		break;
++	default:
++		null_lock_zone(dev, zno);
++		sts = null_process_cmd(cmd, op, sector, nr_sectors);
++		null_unlock_zone(dev, zno);
++	}
++
++	return sts;
++}
+diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
+deleted file mode 100644
+index c6ba8f9f3f311..0000000000000
+--- a/drivers/block/null_blk_main.c
++++ /dev/null
+@@ -1,2036 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
+- * Shaohua Li <shli@fb.com>
+- */
+-#include <linux/module.h>
+-
+-#include <linux/moduleparam.h>
+-#include <linux/sched.h>
+-#include <linux/fs.h>
+-#include <linux/init.h>
+-#include "null_blk.h"
+-
+-#define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
+-#define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
+-#define SECTOR_MASK		(PAGE_SECTORS - 1)
+-
+-#define FREE_BATCH		16
+-
+-#define TICKS_PER_SEC		50ULL
+-#define TIMER_INTERVAL		(NSEC_PER_SEC / TICKS_PER_SEC)
+-
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-static DECLARE_FAULT_ATTR(null_timeout_attr);
+-static DECLARE_FAULT_ATTR(null_requeue_attr);
+-static DECLARE_FAULT_ATTR(null_init_hctx_attr);
+-#endif
+-
+-static inline u64 mb_per_tick(int mbps)
+-{
+-	return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
+-}
+-
+-/*
+- * Status flags for nullb_device.
+- *
+- * CONFIGURED:	Device has been configured and turned on. Cannot reconfigure.
+- * UP:		Device is currently on and visible in userspace.
+- * THROTTLED:	Device is being throttled.
+- * CACHE:	Device is using a write-back cache.
+- */
+-enum nullb_device_flags {
+-	NULLB_DEV_FL_CONFIGURED	= 0,
+-	NULLB_DEV_FL_UP		= 1,
+-	NULLB_DEV_FL_THROTTLED	= 2,
+-	NULLB_DEV_FL_CACHE	= 3,
+-};
+-
+-#define MAP_SZ		((PAGE_SIZE >> SECTOR_SHIFT) + 2)
+-/*
+- * nullb_page is a page in memory for nullb devices.
+- *
+- * @page:	The page holding the data.
+- * @bitmap:	The bitmap represents which sector in the page has data.
+- *		Each bit represents one block size. For example, sector 8
+- *		will use the 7th bit
+- * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
+- * page is being flushing to storage. FREE means the cache page is freed and
+- * should be skipped from flushing to storage. Please see
+- * null_make_cache_space
+- */
+-struct nullb_page {
+-	struct page *page;
+-	DECLARE_BITMAP(bitmap, MAP_SZ);
+-};
+-#define NULLB_PAGE_LOCK (MAP_SZ - 1)
+-#define NULLB_PAGE_FREE (MAP_SZ - 2)
+-
+-static LIST_HEAD(nullb_list);
+-static struct mutex lock;
+-static int null_major;
+-static DEFINE_IDA(nullb_indexes);
+-static struct blk_mq_tag_set tag_set;
+-
+-enum {
+-	NULL_IRQ_NONE		= 0,
+-	NULL_IRQ_SOFTIRQ	= 1,
+-	NULL_IRQ_TIMER		= 2,
+-};
+-
+-enum {
+-	NULL_Q_BIO		= 0,
+-	NULL_Q_RQ		= 1,
+-	NULL_Q_MQ		= 2,
+-};
+-
+-static int g_no_sched;
+-module_param_named(no_sched, g_no_sched, int, 0444);
+-MODULE_PARM_DESC(no_sched, "No io scheduler");
+-
+-static int g_submit_queues = 1;
+-module_param_named(submit_queues, g_submit_queues, int, 0444);
+-MODULE_PARM_DESC(submit_queues, "Number of submission queues");
+-
+-static int g_home_node = NUMA_NO_NODE;
+-module_param_named(home_node, g_home_node, int, 0444);
+-MODULE_PARM_DESC(home_node, "Home node for the device");
+-
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-/*
+- * For more details about fault injection, please refer to
+- * Documentation/fault-injection/fault-injection.rst.
+- */
+-static char g_timeout_str[80];
+-module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
+-MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
+-
+-static char g_requeue_str[80];
+-module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
+-MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
+-
+-static char g_init_hctx_str[80];
+-module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
+-MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
+-#endif
+-
+-static int g_queue_mode = NULL_Q_MQ;
+-
+-static int null_param_store_val(const char *str, int *val, int min, int max)
+-{
+-	int ret, new_val;
+-
+-	ret = kstrtoint(str, 10, &new_val);
+-	if (ret)
+-		return -EINVAL;
+-
+-	if (new_val < min || new_val > max)
+-		return -EINVAL;
+-
+-	*val = new_val;
+-	return 0;
+-}
+-
+-static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
+-{
+-	return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
+-}
+-
+-static const struct kernel_param_ops null_queue_mode_param_ops = {
+-	.set	= null_set_queue_mode,
+-	.get	= param_get_int,
+-};
+-
+-device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
+-MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
+-
+-static int g_gb = 250;
+-module_param_named(gb, g_gb, int, 0444);
+-MODULE_PARM_DESC(gb, "Size in GB");
+-
+-static int g_bs = 512;
+-module_param_named(bs, g_bs, int, 0444);
+-MODULE_PARM_DESC(bs, "Block size (in bytes)");
+-
+-static unsigned int nr_devices = 1;
+-module_param(nr_devices, uint, 0444);
+-MODULE_PARM_DESC(nr_devices, "Number of devices to register");
+-
+-static bool g_blocking;
+-module_param_named(blocking, g_blocking, bool, 0444);
+-MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
+-
+-static bool shared_tags;
+-module_param(shared_tags, bool, 0444);
+-MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
+-
+-static bool g_shared_tag_bitmap;
+-module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
+-MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
+-
+-static int g_irqmode = NULL_IRQ_SOFTIRQ;
+-
+-static int null_set_irqmode(const char *str, const struct kernel_param *kp)
+-{
+-	return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
+-					NULL_IRQ_TIMER);
+-}
+-
+-static const struct kernel_param_ops null_irqmode_param_ops = {
+-	.set	= null_set_irqmode,
+-	.get	= param_get_int,
+-};
+-
+-device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
+-MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
+-
+-static unsigned long g_completion_nsec = 10000;
+-module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
+-MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
+-
+-static int g_hw_queue_depth = 64;
+-module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
+-MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
+-
+-static bool g_use_per_node_hctx;
+-module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
+-MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
+-
+-static bool g_zoned;
+-module_param_named(zoned, g_zoned, bool, S_IRUGO);
+-MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
+-
+-static unsigned long g_zone_size = 256;
+-module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
+-MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
+-
+-static unsigned long g_zone_capacity;
+-module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
+-MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
+-
+-static unsigned int g_zone_nr_conv;
+-module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
+-MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
+-
+-static unsigned int g_zone_max_open;
+-module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
+-MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
+-
+-static unsigned int g_zone_max_active;
+-module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
+-MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
+-
+-static struct nullb_device *null_alloc_dev(void);
+-static void null_free_dev(struct nullb_device *dev);
+-static void null_del_dev(struct nullb *nullb);
+-static int null_add_dev(struct nullb_device *dev);
+-static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
+-
+-static inline struct nullb_device *to_nullb_device(struct config_item *item)
+-{
+-	return item ? container_of(item, struct nullb_device, item) : NULL;
+-}
+-
+-static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
+-{
+-	return snprintf(page, PAGE_SIZE, "%u\n", val);
+-}
+-
+-static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
+-	char *page)
+-{
+-	return snprintf(page, PAGE_SIZE, "%lu\n", val);
+-}
+-
+-static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
+-{
+-	return snprintf(page, PAGE_SIZE, "%u\n", val);
+-}
+-
+-static ssize_t nullb_device_uint_attr_store(unsigned int *val,
+-	const char *page, size_t count)
+-{
+-	unsigned int tmp;
+-	int result;
+-
+-	result = kstrtouint(page, 0, &tmp);
+-	if (result < 0)
+-		return result;
+-
+-	*val = tmp;
+-	return count;
+-}
+-
+-static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
+-	const char *page, size_t count)
+-{
+-	int result;
+-	unsigned long tmp;
+-
+-	result = kstrtoul(page, 0, &tmp);
+-	if (result < 0)
+-		return result;
+-
+-	*val = tmp;
+-	return count;
+-}
+-
+-static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
+-	size_t count)
+-{
+-	bool tmp;
+-	int result;
+-
+-	result = kstrtobool(page,  &tmp);
+-	if (result < 0)
+-		return result;
+-
+-	*val = tmp;
+-	return count;
+-}
+-
+-/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
+-#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY)				\
+-static ssize_t								\
+-nullb_device_##NAME##_show(struct config_item *item, char *page)	\
+-{									\
+-	return nullb_device_##TYPE##_attr_show(				\
+-				to_nullb_device(item)->NAME, page);	\
+-}									\
+-static ssize_t								\
+-nullb_device_##NAME##_store(struct config_item *item, const char *page,	\
+-			    size_t count)				\
+-{									\
+-	int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
+-	struct nullb_device *dev = to_nullb_device(item);		\
+-	TYPE new_value = 0;						\
+-	int ret;							\
+-									\
+-	ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
+-	if (ret < 0)							\
+-		return ret;						\
+-	if (apply_fn)							\
+-		ret = apply_fn(dev, new_value);				\
+-	else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) 	\
+-		ret = -EBUSY;						\
+-	if (ret < 0)							\
+-		return ret;						\
+-	dev->NAME = new_value;						\
+-	return count;							\
+-}									\
+-CONFIGFS_ATTR(nullb_device_, NAME);
+-
+-static int nullb_apply_submit_queues(struct nullb_device *dev,
+-				     unsigned int submit_queues)
+-{
+-	struct nullb *nullb = dev->nullb;
+-	struct blk_mq_tag_set *set;
+-
+-	if (!nullb)
+-		return 0;
+-
+-	/*
+-	 * Make sure that null_init_hctx() does not access nullb->queues[] past
+-	 * the end of that array.
+-	 */
+-	if (submit_queues > nr_cpu_ids)
+-		return -EINVAL;
+-	set = nullb->tag_set;
+-	blk_mq_update_nr_hw_queues(set, submit_queues);
+-	return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
+-}
+-
+-NULLB_DEVICE_ATTR(size, ulong, NULL);
+-NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
+-NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
+-NULLB_DEVICE_ATTR(home_node, uint, NULL);
+-NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
+-NULLB_DEVICE_ATTR(blocksize, uint, NULL);
+-NULLB_DEVICE_ATTR(irqmode, uint, NULL);
+-NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
+-NULLB_DEVICE_ATTR(index, uint, NULL);
+-NULLB_DEVICE_ATTR(blocking, bool, NULL);
+-NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
+-NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
+-NULLB_DEVICE_ATTR(discard, bool, NULL);
+-NULLB_DEVICE_ATTR(mbps, uint, NULL);
+-NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
+-NULLB_DEVICE_ATTR(zoned, bool, NULL);
+-NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
+-NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
+-NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
+-NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
+-NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
+-
+-static ssize_t nullb_device_power_show(struct config_item *item, char *page)
+-{
+-	return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
+-}
+-
+-static ssize_t nullb_device_power_store(struct config_item *item,
+-				     const char *page, size_t count)
+-{
+-	struct nullb_device *dev = to_nullb_device(item);
+-	bool newp = false;
+-	ssize_t ret;
+-
+-	ret = nullb_device_bool_attr_store(&newp, page, count);
+-	if (ret < 0)
+-		return ret;
+-
+-	if (!dev->power && newp) {
+-		if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
+-			return count;
+-		if (null_add_dev(dev)) {
+-			clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+-			return -ENOMEM;
+-		}
+-
+-		set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+-		dev->power = newp;
+-	} else if (dev->power && !newp) {
+-		if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
+-			mutex_lock(&lock);
+-			dev->power = newp;
+-			null_del_dev(dev->nullb);
+-			mutex_unlock(&lock);
+-		}
+-		clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+-	}
+-
+-	return count;
+-}
+-
+-CONFIGFS_ATTR(nullb_device_, power);
+-
+-static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
+-{
+-	struct nullb_device *t_dev = to_nullb_device(item);
+-
+-	return badblocks_show(&t_dev->badblocks, page, 0);
+-}
+-
+-static ssize_t nullb_device_badblocks_store(struct config_item *item,
+-				     const char *page, size_t count)
+-{
+-	struct nullb_device *t_dev = to_nullb_device(item);
+-	char *orig, *buf, *tmp;
+-	u64 start, end;
+-	int ret;
+-
+-	orig = kstrndup(page, count, GFP_KERNEL);
+-	if (!orig)
+-		return -ENOMEM;
+-
+-	buf = strstrip(orig);
+-
+-	ret = -EINVAL;
+-	if (buf[0] != '+' && buf[0] != '-')
+-		goto out;
+-	tmp = strchr(&buf[1], '-');
+-	if (!tmp)
+-		goto out;
+-	*tmp = '\0';
+-	ret = kstrtoull(buf + 1, 0, &start);
+-	if (ret)
+-		goto out;
+-	ret = kstrtoull(tmp + 1, 0, &end);
+-	if (ret)
+-		goto out;
+-	ret = -EINVAL;
+-	if (start > end)
+-		goto out;
+-	/* enable badblocks */
+-	cmpxchg(&t_dev->badblocks.shift, -1, 0);
+-	if (buf[0] == '+')
+-		ret = badblocks_set(&t_dev->badblocks, start,
+-			end - start + 1, 1);
+-	else
+-		ret = badblocks_clear(&t_dev->badblocks, start,
+-			end - start + 1);
+-	if (ret == 0)
+-		ret = count;
+-out:
+-	kfree(orig);
+-	return ret;
+-}
+-CONFIGFS_ATTR(nullb_device_, badblocks);
+-
+-static struct configfs_attribute *nullb_device_attrs[] = {
+-	&nullb_device_attr_size,
+-	&nullb_device_attr_completion_nsec,
+-	&nullb_device_attr_submit_queues,
+-	&nullb_device_attr_home_node,
+-	&nullb_device_attr_queue_mode,
+-	&nullb_device_attr_blocksize,
+-	&nullb_device_attr_irqmode,
+-	&nullb_device_attr_hw_queue_depth,
+-	&nullb_device_attr_index,
+-	&nullb_device_attr_blocking,
+-	&nullb_device_attr_use_per_node_hctx,
+-	&nullb_device_attr_power,
+-	&nullb_device_attr_memory_backed,
+-	&nullb_device_attr_discard,
+-	&nullb_device_attr_mbps,
+-	&nullb_device_attr_cache_size,
+-	&nullb_device_attr_badblocks,
+-	&nullb_device_attr_zoned,
+-	&nullb_device_attr_zone_size,
+-	&nullb_device_attr_zone_capacity,
+-	&nullb_device_attr_zone_nr_conv,
+-	&nullb_device_attr_zone_max_open,
+-	&nullb_device_attr_zone_max_active,
+-	NULL,
+-};
+-
+-static void nullb_device_release(struct config_item *item)
+-{
+-	struct nullb_device *dev = to_nullb_device(item);
+-
+-	null_free_device_storage(dev, false);
+-	null_free_dev(dev);
+-}
+-
+-static struct configfs_item_operations nullb_device_ops = {
+-	.release	= nullb_device_release,
+-};
+-
+-static const struct config_item_type nullb_device_type = {
+-	.ct_item_ops	= &nullb_device_ops,
+-	.ct_attrs	= nullb_device_attrs,
+-	.ct_owner	= THIS_MODULE,
+-};
+-
+-static struct
+-config_item *nullb_group_make_item(struct config_group *group, const char *name)
+-{
+-	struct nullb_device *dev;
+-
+-	dev = null_alloc_dev();
+-	if (!dev)
+-		return ERR_PTR(-ENOMEM);
+-
+-	config_item_init_type_name(&dev->item, name, &nullb_device_type);
+-
+-	return &dev->item;
+-}
+-
+-static void
+-nullb_group_drop_item(struct config_group *group, struct config_item *item)
+-{
+-	struct nullb_device *dev = to_nullb_device(item);
+-
+-	if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
+-		mutex_lock(&lock);
+-		dev->power = false;
+-		null_del_dev(dev->nullb);
+-		mutex_unlock(&lock);
+-	}
+-
+-	config_item_put(item);
+-}
+-
+-static ssize_t memb_group_features_show(struct config_item *item, char *page)
+-{
+-	return snprintf(page, PAGE_SIZE,
+-			"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n");
+-}
+-
+-CONFIGFS_ATTR_RO(memb_group_, features);
+-
+-static struct configfs_attribute *nullb_group_attrs[] = {
+-	&memb_group_attr_features,
+-	NULL,
+-};
+-
+-static struct configfs_group_operations nullb_group_ops = {
+-	.make_item	= nullb_group_make_item,
+-	.drop_item	= nullb_group_drop_item,
+-};
+-
+-static const struct config_item_type nullb_group_type = {
+-	.ct_group_ops	= &nullb_group_ops,
+-	.ct_attrs	= nullb_group_attrs,
+-	.ct_owner	= THIS_MODULE,
+-};
+-
+-static struct configfs_subsystem nullb_subsys = {
+-	.su_group = {
+-		.cg_item = {
+-			.ci_namebuf = "nullb",
+-			.ci_type = &nullb_group_type,
+-		},
+-	},
+-};
+-
+-static inline int null_cache_active(struct nullb *nullb)
+-{
+-	return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+-}
+-
+-static struct nullb_device *null_alloc_dev(void)
+-{
+-	struct nullb_device *dev;
+-
+-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+-	if (!dev)
+-		return NULL;
+-	INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
+-	INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
+-	if (badblocks_init(&dev->badblocks, 0)) {
+-		kfree(dev);
+-		return NULL;
+-	}
+-
+-	dev->size = g_gb * 1024;
+-	dev->completion_nsec = g_completion_nsec;
+-	dev->submit_queues = g_submit_queues;
+-	dev->home_node = g_home_node;
+-	dev->queue_mode = g_queue_mode;
+-	dev->blocksize = g_bs;
+-	dev->irqmode = g_irqmode;
+-	dev->hw_queue_depth = g_hw_queue_depth;
+-	dev->blocking = g_blocking;
+-	dev->use_per_node_hctx = g_use_per_node_hctx;
+-	dev->zoned = g_zoned;
+-	dev->zone_size = g_zone_size;
+-	dev->zone_capacity = g_zone_capacity;
+-	dev->zone_nr_conv = g_zone_nr_conv;
+-	dev->zone_max_open = g_zone_max_open;
+-	dev->zone_max_active = g_zone_max_active;
+-	return dev;
+-}
+-
+-static void null_free_dev(struct nullb_device *dev)
+-{
+-	if (!dev)
+-		return;
+-
+-	null_free_zoned_dev(dev);
+-	badblocks_exit(&dev->badblocks);
+-	kfree(dev);
+-}
+-
+-static void put_tag(struct nullb_queue *nq, unsigned int tag)
+-{
+-	clear_bit_unlock(tag, nq->tag_map);
+-
+-	if (waitqueue_active(&nq->wait))
+-		wake_up(&nq->wait);
+-}
+-
+-static unsigned int get_tag(struct nullb_queue *nq)
+-{
+-	unsigned int tag;
+-
+-	do {
+-		tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
+-		if (tag >= nq->queue_depth)
+-			return -1U;
+-	} while (test_and_set_bit_lock(tag, nq->tag_map));
+-
+-	return tag;
+-}
+-
+-static void free_cmd(struct nullb_cmd *cmd)
+-{
+-	put_tag(cmd->nq, cmd->tag);
+-}
+-
+-static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
+-
+-static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
+-{
+-	struct nullb_cmd *cmd;
+-	unsigned int tag;
+-
+-	tag = get_tag(nq);
+-	if (tag != -1U) {
+-		cmd = &nq->cmds[tag];
+-		cmd->tag = tag;
+-		cmd->error = BLK_STS_OK;
+-		cmd->nq = nq;
+-		if (nq->dev->irqmode == NULL_IRQ_TIMER) {
+-			hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
+-				     HRTIMER_MODE_REL);
+-			cmd->timer.function = null_cmd_timer_expired;
+-		}
+-		return cmd;
+-	}
+-
+-	return NULL;
+-}
+-
+-static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
+-{
+-	struct nullb_cmd *cmd;
+-	DEFINE_WAIT(wait);
+-
+-	cmd = __alloc_cmd(nq);
+-	if (cmd || !can_wait)
+-		return cmd;
+-
+-	do {
+-		prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
+-		cmd = __alloc_cmd(nq);
+-		if (cmd)
+-			break;
+-
+-		io_schedule();
+-	} while (1);
+-
+-	finish_wait(&nq->wait, &wait);
+-	return cmd;
+-}
+-
+-static void end_cmd(struct nullb_cmd *cmd)
+-{
+-	int queue_mode = cmd->nq->dev->queue_mode;
+-
+-	switch (queue_mode)  {
+-	case NULL_Q_MQ:
+-		blk_mq_end_request(cmd->rq, cmd->error);
+-		return;
+-	case NULL_Q_BIO:
+-		cmd->bio->bi_status = cmd->error;
+-		bio_endio(cmd->bio);
+-		break;
+-	}
+-
+-	free_cmd(cmd);
+-}
+-
+-static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
+-{
+-	end_cmd(container_of(timer, struct nullb_cmd, timer));
+-
+-	return HRTIMER_NORESTART;
+-}
+-
+-static void null_cmd_end_timer(struct nullb_cmd *cmd)
+-{
+-	ktime_t kt = cmd->nq->dev->completion_nsec;
+-
+-	hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
+-}
+-
+-static void null_complete_rq(struct request *rq)
+-{
+-	end_cmd(blk_mq_rq_to_pdu(rq));
+-}
+-
+-static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
+-{
+-	struct nullb_page *t_page;
+-
+-	t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
+-	if (!t_page)
+-		goto out;
+-
+-	t_page->page = alloc_pages(gfp_flags, 0);
+-	if (!t_page->page)
+-		goto out_freepage;
+-
+-	memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
+-	return t_page;
+-out_freepage:
+-	kfree(t_page);
+-out:
+-	return NULL;
+-}
+-
+-static void null_free_page(struct nullb_page *t_page)
+-{
+-	__set_bit(NULLB_PAGE_FREE, t_page->bitmap);
+-	if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
+-		return;
+-	__free_page(t_page->page);
+-	kfree(t_page);
+-}
+-
+-static bool null_page_empty(struct nullb_page *page)
+-{
+-	int size = MAP_SZ - 2;
+-
+-	return find_first_bit(page->bitmap, size) == size;
+-}
+-
+-static void null_free_sector(struct nullb *nullb, sector_t sector,
+-	bool is_cache)
+-{
+-	unsigned int sector_bit;
+-	u64 idx;
+-	struct nullb_page *t_page, *ret;
+-	struct radix_tree_root *root;
+-
+-	root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+-	idx = sector >> PAGE_SECTORS_SHIFT;
+-	sector_bit = (sector & SECTOR_MASK);
+-
+-	t_page = radix_tree_lookup(root, idx);
+-	if (t_page) {
+-		__clear_bit(sector_bit, t_page->bitmap);
+-
+-		if (null_page_empty(t_page)) {
+-			ret = radix_tree_delete_item(root, idx, t_page);
+-			WARN_ON(ret != t_page);
+-			null_free_page(ret);
+-			if (is_cache)
+-				nullb->dev->curr_cache -= PAGE_SIZE;
+-		}
+-	}
+-}
+-
+-static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
+-	struct nullb_page *t_page, bool is_cache)
+-{
+-	struct radix_tree_root *root;
+-
+-	root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+-
+-	if (radix_tree_insert(root, idx, t_page)) {
+-		null_free_page(t_page);
+-		t_page = radix_tree_lookup(root, idx);
+-		WARN_ON(!t_page || t_page->page->index != idx);
+-	} else if (is_cache)
+-		nullb->dev->curr_cache += PAGE_SIZE;
+-
+-	return t_page;
+-}
+-
+-static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
+-{
+-	unsigned long pos = 0;
+-	int nr_pages;
+-	struct nullb_page *ret, *t_pages[FREE_BATCH];
+-	struct radix_tree_root *root;
+-
+-	root = is_cache ? &dev->cache : &dev->data;
+-
+-	do {
+-		int i;
+-
+-		nr_pages = radix_tree_gang_lookup(root,
+-				(void **)t_pages, pos, FREE_BATCH);
+-
+-		for (i = 0; i < nr_pages; i++) {
+-			pos = t_pages[i]->page->index;
+-			ret = radix_tree_delete_item(root, pos, t_pages[i]);
+-			WARN_ON(ret != t_pages[i]);
+-			null_free_page(ret);
+-		}
+-
+-		pos++;
+-	} while (nr_pages == FREE_BATCH);
+-
+-	if (is_cache)
+-		dev->curr_cache = 0;
+-}
+-
+-static struct nullb_page *__null_lookup_page(struct nullb *nullb,
+-	sector_t sector, bool for_write, bool is_cache)
+-{
+-	unsigned int sector_bit;
+-	u64 idx;
+-	struct nullb_page *t_page;
+-	struct radix_tree_root *root;
+-
+-	idx = sector >> PAGE_SECTORS_SHIFT;
+-	sector_bit = (sector & SECTOR_MASK);
+-
+-	root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+-	t_page = radix_tree_lookup(root, idx);
+-	WARN_ON(t_page && t_page->page->index != idx);
+-
+-	if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
+-		return t_page;
+-
+-	return NULL;
+-}
+-
+-static struct nullb_page *null_lookup_page(struct nullb *nullb,
+-	sector_t sector, bool for_write, bool ignore_cache)
+-{
+-	struct nullb_page *page = NULL;
+-
+-	if (!ignore_cache)
+-		page = __null_lookup_page(nullb, sector, for_write, true);
+-	if (page)
+-		return page;
+-	return __null_lookup_page(nullb, sector, for_write, false);
+-}
+-
+-static struct nullb_page *null_insert_page(struct nullb *nullb,
+-					   sector_t sector, bool ignore_cache)
+-	__releases(&nullb->lock)
+-	__acquires(&nullb->lock)
+-{
+-	u64 idx;
+-	struct nullb_page *t_page;
+-
+-	t_page = null_lookup_page(nullb, sector, true, ignore_cache);
+-	if (t_page)
+-		return t_page;
+-
+-	spin_unlock_irq(&nullb->lock);
+-
+-	t_page = null_alloc_page(GFP_NOIO);
+-	if (!t_page)
+-		goto out_lock;
+-
+-	if (radix_tree_preload(GFP_NOIO))
+-		goto out_freepage;
+-
+-	spin_lock_irq(&nullb->lock);
+-	idx = sector >> PAGE_SECTORS_SHIFT;
+-	t_page->page->index = idx;
+-	t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
+-	radix_tree_preload_end();
+-
+-	return t_page;
+-out_freepage:
+-	null_free_page(t_page);
+-out_lock:
+-	spin_lock_irq(&nullb->lock);
+-	return null_lookup_page(nullb, sector, true, ignore_cache);
+-}
+-
+-static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
+-{
+-	int i;
+-	unsigned int offset;
+-	u64 idx;
+-	struct nullb_page *t_page, *ret;
+-	void *dst, *src;
+-
+-	idx = c_page->page->index;
+-
+-	t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
+-
+-	__clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
+-	if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
+-		null_free_page(c_page);
+-		if (t_page && null_page_empty(t_page)) {
+-			ret = radix_tree_delete_item(&nullb->dev->data,
+-				idx, t_page);
+-			null_free_page(t_page);
+-		}
+-		return 0;
+-	}
+-
+-	if (!t_page)
+-		return -ENOMEM;
+-
+-	src = kmap_atomic(c_page->page);
+-	dst = kmap_atomic(t_page->page);
+-
+-	for (i = 0; i < PAGE_SECTORS;
+-			i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
+-		if (test_bit(i, c_page->bitmap)) {
+-			offset = (i << SECTOR_SHIFT);
+-			memcpy(dst + offset, src + offset,
+-				nullb->dev->blocksize);
+-			__set_bit(i, t_page->bitmap);
+-		}
+-	}
+-
+-	kunmap_atomic(dst);
+-	kunmap_atomic(src);
+-
+-	ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
+-	null_free_page(ret);
+-	nullb->dev->curr_cache -= PAGE_SIZE;
+-
+-	return 0;
+-}
+-
+-static int null_make_cache_space(struct nullb *nullb, unsigned long n)
+-{
+-	int i, err, nr_pages;
+-	struct nullb_page *c_pages[FREE_BATCH];
+-	unsigned long flushed = 0, one_round;
+-
+-again:
+-	if ((nullb->dev->cache_size * 1024 * 1024) >
+-	     nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
+-		return 0;
+-
+-	nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
+-			(void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
+-	/*
+-	 * nullb_flush_cache_page could unlock before using the c_pages. To
+-	 * avoid race, we don't allow page free
+-	 */
+-	for (i = 0; i < nr_pages; i++) {
+-		nullb->cache_flush_pos = c_pages[i]->page->index;
+-		/*
+-		 * We found the page which is being flushed to disk by other
+-		 * threads
+-		 */
+-		if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
+-			c_pages[i] = NULL;
+-		else
+-			__set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
+-	}
+-
+-	one_round = 0;
+-	for (i = 0; i < nr_pages; i++) {
+-		if (c_pages[i] == NULL)
+-			continue;
+-		err = null_flush_cache_page(nullb, c_pages[i]);
+-		if (err)
+-			return err;
+-		one_round++;
+-	}
+-	flushed += one_round << PAGE_SHIFT;
+-
+-	if (n > flushed) {
+-		if (nr_pages == 0)
+-			nullb->cache_flush_pos = 0;
+-		if (one_round == 0) {
+-			/* give other threads a chance */
+-			spin_unlock_irq(&nullb->lock);
+-			spin_lock_irq(&nullb->lock);
+-		}
+-		goto again;
+-	}
+-	return 0;
+-}
+-
+-static int copy_to_nullb(struct nullb *nullb, struct page *source,
+-	unsigned int off, sector_t sector, size_t n, bool is_fua)
+-{
+-	size_t temp, count = 0;
+-	unsigned int offset;
+-	struct nullb_page *t_page;
+-	void *dst, *src;
+-
+-	while (count < n) {
+-		temp = min_t(size_t, nullb->dev->blocksize, n - count);
+-
+-		if (null_cache_active(nullb) && !is_fua)
+-			null_make_cache_space(nullb, PAGE_SIZE);
+-
+-		offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+-		t_page = null_insert_page(nullb, sector,
+-			!null_cache_active(nullb) || is_fua);
+-		if (!t_page)
+-			return -ENOSPC;
+-
+-		src = kmap_atomic(source);
+-		dst = kmap_atomic(t_page->page);
+-		memcpy(dst + offset, src + off + count, temp);
+-		kunmap_atomic(dst);
+-		kunmap_atomic(src);
+-
+-		__set_bit(sector & SECTOR_MASK, t_page->bitmap);
+-
+-		if (is_fua)
+-			null_free_sector(nullb, sector, true);
+-
+-		count += temp;
+-		sector += temp >> SECTOR_SHIFT;
+-	}
+-	return 0;
+-}
+-
+-static int copy_from_nullb(struct nullb *nullb, struct page *dest,
+-	unsigned int off, sector_t sector, size_t n)
+-{
+-	size_t temp, count = 0;
+-	unsigned int offset;
+-	struct nullb_page *t_page;
+-	void *dst, *src;
+-
+-	while (count < n) {
+-		temp = min_t(size_t, nullb->dev->blocksize, n - count);
+-
+-		offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+-		t_page = null_lookup_page(nullb, sector, false,
+-			!null_cache_active(nullb));
+-
+-		dst = kmap_atomic(dest);
+-		if (!t_page) {
+-			memset(dst + off + count, 0, temp);
+-			goto next;
+-		}
+-		src = kmap_atomic(t_page->page);
+-		memcpy(dst + off + count, src + offset, temp);
+-		kunmap_atomic(src);
+-next:
+-		kunmap_atomic(dst);
+-
+-		count += temp;
+-		sector += temp >> SECTOR_SHIFT;
+-	}
+-	return 0;
+-}
+-
+-static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
+-			       unsigned int len, unsigned int off)
+-{
+-	void *dst;
+-
+-	dst = kmap_atomic(page);
+-	memset(dst + off, 0xFF, len);
+-	kunmap_atomic(dst);
+-}
+-
+-static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
+-{
+-	size_t temp;
+-
+-	spin_lock_irq(&nullb->lock);
+-	while (n > 0) {
+-		temp = min_t(size_t, n, nullb->dev->blocksize);
+-		null_free_sector(nullb, sector, false);
+-		if (null_cache_active(nullb))
+-			null_free_sector(nullb, sector, true);
+-		sector += temp >> SECTOR_SHIFT;
+-		n -= temp;
+-	}
+-	spin_unlock_irq(&nullb->lock);
+-}
+-
+-static int null_handle_flush(struct nullb *nullb)
+-{
+-	int err;
+-
+-	if (!null_cache_active(nullb))
+-		return 0;
+-
+-	spin_lock_irq(&nullb->lock);
+-	while (true) {
+-		err = null_make_cache_space(nullb,
+-			nullb->dev->cache_size * 1024 * 1024);
+-		if (err || nullb->dev->curr_cache == 0)
+-			break;
+-	}
+-
+-	WARN_ON(!radix_tree_empty(&nullb->dev->cache));
+-	spin_unlock_irq(&nullb->lock);
+-	return err;
+-}
+-
+-static int null_transfer(struct nullb *nullb, struct page *page,
+-	unsigned int len, unsigned int off, bool is_write, sector_t sector,
+-	bool is_fua)
+-{
+-	struct nullb_device *dev = nullb->dev;
+-	unsigned int valid_len = len;
+-	int err = 0;
+-
+-	if (!is_write) {
+-		if (dev->zoned)
+-			valid_len = null_zone_valid_read_len(nullb,
+-				sector, len);
+-
+-		if (valid_len) {
+-			err = copy_from_nullb(nullb, page, off,
+-				sector, valid_len);
+-			off += valid_len;
+-			len -= valid_len;
+-		}
+-
+-		if (len)
+-			nullb_fill_pattern(nullb, page, len, off);
+-		flush_dcache_page(page);
+-	} else {
+-		flush_dcache_page(page);
+-		err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
+-	}
+-
+-	return err;
+-}
+-
+-static int null_handle_rq(struct nullb_cmd *cmd)
+-{
+-	struct request *rq = cmd->rq;
+-	struct nullb *nullb = cmd->nq->dev->nullb;
+-	int err;
+-	unsigned int len;
+-	sector_t sector;
+-	struct req_iterator iter;
+-	struct bio_vec bvec;
+-
+-	sector = blk_rq_pos(rq);
+-
+-	if (req_op(rq) == REQ_OP_DISCARD) {
+-		null_handle_discard(nullb, sector, blk_rq_bytes(rq));
+-		return 0;
+-	}
+-
+-	spin_lock_irq(&nullb->lock);
+-	rq_for_each_segment(bvec, rq, iter) {
+-		len = bvec.bv_len;
+-		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+-				     op_is_write(req_op(rq)), sector,
+-				     rq->cmd_flags & REQ_FUA);
+-		if (err) {
+-			spin_unlock_irq(&nullb->lock);
+-			return err;
+-		}
+-		sector += len >> SECTOR_SHIFT;
+-	}
+-	spin_unlock_irq(&nullb->lock);
+-
+-	return 0;
+-}
+-
+-static int null_handle_bio(struct nullb_cmd *cmd)
+-{
+-	struct bio *bio = cmd->bio;
+-	struct nullb *nullb = cmd->nq->dev->nullb;
+-	int err;
+-	unsigned int len;
+-	sector_t sector;
+-	struct bio_vec bvec;
+-	struct bvec_iter iter;
+-
+-	sector = bio->bi_iter.bi_sector;
+-
+-	if (bio_op(bio) == REQ_OP_DISCARD) {
+-		null_handle_discard(nullb, sector,
+-			bio_sectors(bio) << SECTOR_SHIFT);
+-		return 0;
+-	}
+-
+-	spin_lock_irq(&nullb->lock);
+-	bio_for_each_segment(bvec, bio, iter) {
+-		len = bvec.bv_len;
+-		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+-				     op_is_write(bio_op(bio)), sector,
+-				     bio->bi_opf & REQ_FUA);
+-		if (err) {
+-			spin_unlock_irq(&nullb->lock);
+-			return err;
+-		}
+-		sector += len >> SECTOR_SHIFT;
+-	}
+-	spin_unlock_irq(&nullb->lock);
+-	return 0;
+-}
+-
+-static void null_stop_queue(struct nullb *nullb)
+-{
+-	struct request_queue *q = nullb->q;
+-
+-	if (nullb->dev->queue_mode == NULL_Q_MQ)
+-		blk_mq_stop_hw_queues(q);
+-}
+-
+-static void null_restart_queue_async(struct nullb *nullb)
+-{
+-	struct request_queue *q = nullb->q;
+-
+-	if (nullb->dev->queue_mode == NULL_Q_MQ)
+-		blk_mq_start_stopped_hw_queues(q, true);
+-}
+-
+-static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
+-{
+-	struct nullb_device *dev = cmd->nq->dev;
+-	struct nullb *nullb = dev->nullb;
+-	blk_status_t sts = BLK_STS_OK;
+-	struct request *rq = cmd->rq;
+-
+-	if (!hrtimer_active(&nullb->bw_timer))
+-		hrtimer_restart(&nullb->bw_timer);
+-
+-	if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
+-		null_stop_queue(nullb);
+-		/* race with timer */
+-		if (atomic_long_read(&nullb->cur_bytes) > 0)
+-			null_restart_queue_async(nullb);
+-		/* requeue request */
+-		sts = BLK_STS_DEV_RESOURCE;
+-	}
+-	return sts;
+-}
+-
+-static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
+-						 sector_t sector,
+-						 sector_t nr_sectors)
+-{
+-	struct badblocks *bb = &cmd->nq->dev->badblocks;
+-	sector_t first_bad;
+-	int bad_sectors;
+-
+-	if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
+-		return BLK_STS_IOERR;
+-
+-	return BLK_STS_OK;
+-}
+-
+-static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
+-						     enum req_opf op)
+-{
+-	struct nullb_device *dev = cmd->nq->dev;
+-	int err;
+-
+-	if (dev->queue_mode == NULL_Q_BIO)
+-		err = null_handle_bio(cmd);
+-	else
+-		err = null_handle_rq(cmd);
+-
+-	return errno_to_blk_status(err);
+-}
+-
+-static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
+-{
+-	struct nullb_device *dev = cmd->nq->dev;
+-	struct bio *bio;
+-
+-	if (dev->memory_backed)
+-		return;
+-
+-	if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
+-		zero_fill_bio(cmd->bio);
+-	} else if (req_op(cmd->rq) == REQ_OP_READ) {
+-		__rq_for_each_bio(bio, cmd->rq)
+-			zero_fill_bio(bio);
+-	}
+-}
+-
+-static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
+-{
+-	/*
+-	 * Since root privileges are required to configure the null_blk
+-	 * driver, it is fine that this driver does not initialize the
+-	 * data buffers of read commands. Zero-initialize these buffers
+-	 * anyway if KMSAN is enabled to prevent that KMSAN complains
+-	 * about null_blk not initializing read data buffers.
+-	 */
+-	if (IS_ENABLED(CONFIG_KMSAN))
+-		nullb_zero_read_cmd_buffer(cmd);
+-
+-	/* Complete IO by inline, softirq or timer */
+-	switch (cmd->nq->dev->irqmode) {
+-	case NULL_IRQ_SOFTIRQ:
+-		switch (cmd->nq->dev->queue_mode) {
+-		case NULL_Q_MQ:
+-			if (likely(!blk_should_fake_timeout(cmd->rq->q)))
+-				blk_mq_complete_request(cmd->rq);
+-			break;
+-		case NULL_Q_BIO:
+-			/*
+-			 * XXX: no proper submitting cpu information available.
+-			 */
+-			end_cmd(cmd);
+-			break;
+-		}
+-		break;
+-	case NULL_IRQ_NONE:
+-		end_cmd(cmd);
+-		break;
+-	case NULL_IRQ_TIMER:
+-		null_cmd_end_timer(cmd);
+-		break;
+-	}
+-}
+-
+-blk_status_t null_process_cmd(struct nullb_cmd *cmd,
+-			      enum req_opf op, sector_t sector,
+-			      unsigned int nr_sectors)
+-{
+-	struct nullb_device *dev = cmd->nq->dev;
+-	blk_status_t ret;
+-
+-	if (dev->badblocks.shift != -1) {
+-		ret = null_handle_badblocks(cmd, sector, nr_sectors);
+-		if (ret != BLK_STS_OK)
+-			return ret;
+-	}
+-
+-	if (dev->memory_backed)
+-		return null_handle_memory_backed(cmd, op);
+-
+-	return BLK_STS_OK;
+-}
+-
+-static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
+-				    sector_t nr_sectors, enum req_opf op)
+-{
+-	struct nullb_device *dev = cmd->nq->dev;
+-	struct nullb *nullb = dev->nullb;
+-	blk_status_t sts;
+-
+-	if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
+-		sts = null_handle_throttled(cmd);
+-		if (sts != BLK_STS_OK)
+-			return sts;
+-	}
+-
+-	if (op == REQ_OP_FLUSH) {
+-		cmd->error = errno_to_blk_status(null_handle_flush(nullb));
+-		goto out;
+-	}
+-
+-	if (dev->zoned)
+-		sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
+-	else
+-		sts = null_process_cmd(cmd, op, sector, nr_sectors);
+-
+-	/* Do not overwrite errors (e.g. timeout errors) */
+-	if (cmd->error == BLK_STS_OK)
+-		cmd->error = sts;
+-
+-out:
+-	nullb_complete_cmd(cmd);
+-	return BLK_STS_OK;
+-}
+-
+-static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
+-{
+-	struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
+-	ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+-	unsigned int mbps = nullb->dev->mbps;
+-
+-	if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
+-		return HRTIMER_NORESTART;
+-
+-	atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
+-	null_restart_queue_async(nullb);
+-
+-	hrtimer_forward_now(&nullb->bw_timer, timer_interval);
+-
+-	return HRTIMER_RESTART;
+-}
+-
+-static void nullb_setup_bwtimer(struct nullb *nullb)
+-{
+-	ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+-
+-	hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+-	nullb->bw_timer.function = nullb_bwtimer_fn;
+-	atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
+-	hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
+-}
+-
+-static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
+-{
+-	int index = 0;
+-
+-	if (nullb->nr_queues != 1)
+-		index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
+-
+-	return &nullb->queues[index];
+-}
+-
+-static blk_qc_t null_submit_bio(struct bio *bio)
+-{
+-	sector_t sector = bio->bi_iter.bi_sector;
+-	sector_t nr_sectors = bio_sectors(bio);
+-	struct nullb *nullb = bio->bi_disk->private_data;
+-	struct nullb_queue *nq = nullb_to_queue(nullb);
+-	struct nullb_cmd *cmd;
+-
+-	cmd = alloc_cmd(nq, 1);
+-	cmd->bio = bio;
+-
+-	null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
+-	return BLK_QC_T_NONE;
+-}
+-
+-static bool should_timeout_request(struct request *rq)
+-{
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-	if (g_timeout_str[0])
+-		return should_fail(&null_timeout_attr, 1);
+-#endif
+-	return false;
+-}
+-
+-static bool should_requeue_request(struct request *rq)
+-{
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-	if (g_requeue_str[0])
+-		return should_fail(&null_requeue_attr, 1);
+-#endif
+-	return false;
+-}
+-
+-static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
+-{
+-	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+-
+-	pr_info("rq %p timed out\n", rq);
+-
+-	/*
+-	 * If the device is marked as blocking (i.e. memory backed or zoned
+-	 * device), the submission path may be blocked waiting for resources
+-	 * and cause real timeouts. For these real timeouts, the submission
+-	 * path will complete the request using blk_mq_complete_request().
+-	 * Only fake timeouts need to execute blk_mq_complete_request() here.
+-	 */
+-	cmd->error = BLK_STS_TIMEOUT;
+-	if (cmd->fake_timeout)
+-		blk_mq_complete_request(rq);
+-	return BLK_EH_DONE;
+-}
+-
+-static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
+-			 const struct blk_mq_queue_data *bd)
+-{
+-	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+-	struct nullb_queue *nq = hctx->driver_data;
+-	sector_t nr_sectors = blk_rq_sectors(bd->rq);
+-	sector_t sector = blk_rq_pos(bd->rq);
+-
+-	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
+-
+-	if (nq->dev->irqmode == NULL_IRQ_TIMER) {
+-		hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+-		cmd->timer.function = null_cmd_timer_expired;
+-	}
+-	cmd->rq = bd->rq;
+-	cmd->error = BLK_STS_OK;
+-	cmd->nq = nq;
+-	cmd->fake_timeout = should_timeout_request(bd->rq);
+-
+-	blk_mq_start_request(bd->rq);
+-
+-	if (should_requeue_request(bd->rq)) {
+-		/*
+-		 * Alternate between hitting the core BUSY path, and the
+-		 * driver driven requeue path
+-		 */
+-		nq->requeue_selection++;
+-		if (nq->requeue_selection & 1)
+-			return BLK_STS_RESOURCE;
+-		else {
+-			blk_mq_requeue_request(bd->rq, true);
+-			return BLK_STS_OK;
+-		}
+-	}
+-	if (cmd->fake_timeout)
+-		return BLK_STS_OK;
+-
+-	return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
+-}
+-
+-static void cleanup_queue(struct nullb_queue *nq)
+-{
+-	kfree(nq->tag_map);
+-	kfree(nq->cmds);
+-}
+-
+-static void cleanup_queues(struct nullb *nullb)
+-{
+-	int i;
+-
+-	for (i = 0; i < nullb->nr_queues; i++)
+-		cleanup_queue(&nullb->queues[i]);
+-
+-	kfree(nullb->queues);
+-}
+-
+-static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+-{
+-	struct nullb_queue *nq = hctx->driver_data;
+-	struct nullb *nullb = nq->dev->nullb;
+-
+-	nullb->nr_queues--;
+-}
+-
+-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+-{
+-	init_waitqueue_head(&nq->wait);
+-	nq->queue_depth = nullb->queue_depth;
+-	nq->dev = nullb->dev;
+-}
+-
+-static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
+-			  unsigned int hctx_idx)
+-{
+-	struct nullb *nullb = hctx->queue->queuedata;
+-	struct nullb_queue *nq;
+-
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-	if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
+-		return -EFAULT;
+-#endif
+-
+-	nq = &nullb->queues[hctx_idx];
+-	hctx->driver_data = nq;
+-	null_init_queue(nullb, nq);
+-	nullb->nr_queues++;
+-
+-	return 0;
+-}
+-
+-static const struct blk_mq_ops null_mq_ops = {
+-	.queue_rq       = null_queue_rq,
+-	.complete	= null_complete_rq,
+-	.timeout	= null_timeout_rq,
+-	.init_hctx	= null_init_hctx,
+-	.exit_hctx	= null_exit_hctx,
+-};
+-
+-static void null_del_dev(struct nullb *nullb)
+-{
+-	struct nullb_device *dev;
+-
+-	if (!nullb)
+-		return;
+-
+-	dev = nullb->dev;
+-
+-	ida_simple_remove(&nullb_indexes, nullb->index);
+-
+-	list_del_init(&nullb->list);
+-
+-	del_gendisk(nullb->disk);
+-
+-	if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
+-		hrtimer_cancel(&nullb->bw_timer);
+-		atomic_long_set(&nullb->cur_bytes, LONG_MAX);
+-		null_restart_queue_async(nullb);
+-	}
+-
+-	blk_cleanup_queue(nullb->q);
+-	if (dev->queue_mode == NULL_Q_MQ &&
+-	    nullb->tag_set == &nullb->__tag_set)
+-		blk_mq_free_tag_set(nullb->tag_set);
+-	put_disk(nullb->disk);
+-	cleanup_queues(nullb);
+-	if (null_cache_active(nullb))
+-		null_free_device_storage(nullb->dev, true);
+-	kfree(nullb);
+-	dev->nullb = NULL;
+-}
+-
+-static void null_config_discard(struct nullb *nullb)
+-{
+-	if (nullb->dev->discard == false)
+-		return;
+-
+-	if (nullb->dev->zoned) {
+-		nullb->dev->discard = false;
+-		pr_info("discard option is ignored in zoned mode\n");
+-		return;
+-	}
+-
+-	nullb->q->limits.discard_granularity = nullb->dev->blocksize;
+-	nullb->q->limits.discard_alignment = nullb->dev->blocksize;
+-	blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
+-	blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
+-}
+-
+-static const struct block_device_operations null_bio_ops = {
+-	.owner		= THIS_MODULE,
+-	.submit_bio	= null_submit_bio,
+-	.report_zones	= null_report_zones,
+-};
+-
+-static const struct block_device_operations null_rq_ops = {
+-	.owner		= THIS_MODULE,
+-	.report_zones	= null_report_zones,
+-};
+-
+-static int setup_commands(struct nullb_queue *nq)
+-{
+-	struct nullb_cmd *cmd;
+-	int i, tag_size;
+-
+-	nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
+-	if (!nq->cmds)
+-		return -ENOMEM;
+-
+-	tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
+-	nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
+-	if (!nq->tag_map) {
+-		kfree(nq->cmds);
+-		return -ENOMEM;
+-	}
+-
+-	for (i = 0; i < nq->queue_depth; i++) {
+-		cmd = &nq->cmds[i];
+-		cmd->tag = -1U;
+-	}
+-
+-	return 0;
+-}
+-
+-static int setup_queues(struct nullb *nullb)
+-{
+-	nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue),
+-				GFP_KERNEL);
+-	if (!nullb->queues)
+-		return -ENOMEM;
+-
+-	nullb->queue_depth = nullb->dev->hw_queue_depth;
+-
+-	return 0;
+-}
+-
+-static int init_driver_queues(struct nullb *nullb)
+-{
+-	struct nullb_queue *nq;
+-	int i, ret = 0;
+-
+-	for (i = 0; i < nullb->dev->submit_queues; i++) {
+-		nq = &nullb->queues[i];
+-
+-		null_init_queue(nullb, nq);
+-
+-		ret = setup_commands(nq);
+-		if (ret)
+-			return ret;
+-		nullb->nr_queues++;
+-	}
+-	return 0;
+-}
+-
+-static int null_gendisk_register(struct nullb *nullb)
+-{
+-	sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
+-	struct gendisk *disk;
+-
+-	disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
+-	if (!disk)
+-		return -ENOMEM;
+-	set_capacity(disk, size);
+-
+-	disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
+-	disk->major		= null_major;
+-	disk->first_minor	= nullb->index;
+-	if (queue_is_mq(nullb->q))
+-		disk->fops		= &null_rq_ops;
+-	else
+-		disk->fops		= &null_bio_ops;
+-	disk->private_data	= nullb;
+-	disk->queue		= nullb->q;
+-	strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+-
+-	if (nullb->dev->zoned) {
+-		int ret = null_register_zoned_dev(nullb);
+-
+-		if (ret)
+-			return ret;
+-	}
+-
+-	add_disk(disk);
+-	return 0;
+-}
+-
+-static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
+-{
+-	set->ops = &null_mq_ops;
+-	set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
+-						g_submit_queues;
+-	set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
+-						g_hw_queue_depth;
+-	set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
+-	set->cmd_size	= sizeof(struct nullb_cmd);
+-	set->flags = BLK_MQ_F_SHOULD_MERGE;
+-	if (g_no_sched)
+-		set->flags |= BLK_MQ_F_NO_SCHED;
+-	if (g_shared_tag_bitmap)
+-		set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+-	set->driver_data = NULL;
+-
+-	if ((nullb && nullb->dev->blocking) || g_blocking)
+-		set->flags |= BLK_MQ_F_BLOCKING;
+-
+-	return blk_mq_alloc_tag_set(set);
+-}
+-
+-static int null_validate_conf(struct nullb_device *dev)
+-{
+-	dev->blocksize = round_down(dev->blocksize, 512);
+-	dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
+-
+-	if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
+-		if (dev->submit_queues != nr_online_nodes)
+-			dev->submit_queues = nr_online_nodes;
+-	} else if (dev->submit_queues > nr_cpu_ids)
+-		dev->submit_queues = nr_cpu_ids;
+-	else if (dev->submit_queues == 0)
+-		dev->submit_queues = 1;
+-
+-	dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
+-	dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
+-
+-	/* Do memory allocation, so set blocking */
+-	if (dev->memory_backed)
+-		dev->blocking = true;
+-	else /* cache is meaningless */
+-		dev->cache_size = 0;
+-	dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
+-						dev->cache_size);
+-	dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
+-	/* can not stop a queue */
+-	if (dev->queue_mode == NULL_Q_BIO)
+-		dev->mbps = 0;
+-
+-	if (dev->zoned &&
+-	    (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
+-		pr_err("zone_size must be power-of-two\n");
+-		return -EINVAL;
+-	}
+-
+-	return 0;
+-}
+-
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-static bool __null_setup_fault(struct fault_attr *attr, char *str)
+-{
+-	if (!str[0])
+-		return true;
+-
+-	if (!setup_fault_attr(attr, str))
+-		return false;
+-
+-	attr->verbose = 0;
+-	return true;
+-}
+-#endif
+-
+-static bool null_setup_fault(void)
+-{
+-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+-	if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
+-		return false;
+-	if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
+-		return false;
+-	if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
+-		return false;
+-#endif
+-	return true;
+-}
+-
+-static int null_add_dev(struct nullb_device *dev)
+-{
+-	struct nullb *nullb;
+-	int rv;
+-
+-	rv = null_validate_conf(dev);
+-	if (rv)
+-		return rv;
+-
+-	nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
+-	if (!nullb) {
+-		rv = -ENOMEM;
+-		goto out;
+-	}
+-	nullb->dev = dev;
+-	dev->nullb = nullb;
+-
+-	spin_lock_init(&nullb->lock);
+-
+-	rv = setup_queues(nullb);
+-	if (rv)
+-		goto out_free_nullb;
+-
+-	if (dev->queue_mode == NULL_Q_MQ) {
+-		if (shared_tags) {
+-			nullb->tag_set = &tag_set;
+-			rv = 0;
+-		} else {
+-			nullb->tag_set = &nullb->__tag_set;
+-			rv = null_init_tag_set(nullb, nullb->tag_set);
+-		}
+-
+-		if (rv)
+-			goto out_cleanup_queues;
+-
+-		if (!null_setup_fault())
+-			goto out_cleanup_queues;
+-
+-		nullb->tag_set->timeout = 5 * HZ;
+-		nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
+-		if (IS_ERR(nullb->q)) {
+-			rv = -ENOMEM;
+-			goto out_cleanup_tags;
+-		}
+-	} else if (dev->queue_mode == NULL_Q_BIO) {
+-		nullb->q = blk_alloc_queue(dev->home_node);
+-		if (!nullb->q) {
+-			rv = -ENOMEM;
+-			goto out_cleanup_queues;
+-		}
+-		rv = init_driver_queues(nullb);
+-		if (rv)
+-			goto out_cleanup_blk_queue;
+-	}
+-
+-	if (dev->mbps) {
+-		set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
+-		nullb_setup_bwtimer(nullb);
+-	}
+-
+-	if (dev->cache_size > 0) {
+-		set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+-		blk_queue_write_cache(nullb->q, true, true);
+-	}
+-
+-	if (dev->zoned) {
+-		rv = null_init_zoned_dev(dev, nullb->q);
+-		if (rv)
+-			goto out_cleanup_blk_queue;
+-	}
+-
+-	nullb->q->queuedata = nullb;
+-	blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
+-	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
+-
+-	mutex_lock(&lock);
+-	rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+-	if (rv < 0) {
+-		mutex_unlock(&lock);
+-		goto out_cleanup_zone;
+-	}
+-	nullb->index = rv;
+-	dev->index = rv;
+-	mutex_unlock(&lock);
+-
+-	blk_queue_logical_block_size(nullb->q, dev->blocksize);
+-	blk_queue_physical_block_size(nullb->q, dev->blocksize);
+-
+-	null_config_discard(nullb);
+-
+-	sprintf(nullb->disk_name, "nullb%d", nullb->index);
+-
+-	rv = null_gendisk_register(nullb);
+-	if (rv)
+-		goto out_ida_free;
+-
+-	mutex_lock(&lock);
+-	list_add_tail(&nullb->list, &nullb_list);
+-	mutex_unlock(&lock);
+-
+-	return 0;
+-
+-out_ida_free:
+-	ida_free(&nullb_indexes, nullb->index);
+-out_cleanup_zone:
+-	null_free_zoned_dev(dev);
+-out_cleanup_blk_queue:
+-	blk_cleanup_queue(nullb->q);
+-out_cleanup_tags:
+-	if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+-		blk_mq_free_tag_set(nullb->tag_set);
+-out_cleanup_queues:
+-	cleanup_queues(nullb);
+-out_free_nullb:
+-	kfree(nullb);
+-	dev->nullb = NULL;
+-out:
+-	return rv;
+-}
+-
+-static int __init null_init(void)
+-{
+-	int ret = 0;
+-	unsigned int i;
+-	struct nullb *nullb;
+-	struct nullb_device *dev;
+-
+-	if (g_bs > PAGE_SIZE) {
+-		pr_warn("invalid block size\n");
+-		pr_warn("defaults block size to %lu\n", PAGE_SIZE);
+-		g_bs = PAGE_SIZE;
+-	}
+-
+-	if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
+-		pr_err("invalid home_node value\n");
+-		g_home_node = NUMA_NO_NODE;
+-	}
+-
+-	if (g_queue_mode == NULL_Q_RQ) {
+-		pr_err("legacy IO path no longer available\n");
+-		return -EINVAL;
+-	}
+-	if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+-		if (g_submit_queues != nr_online_nodes) {
+-			pr_warn("submit_queues param is set to %u.\n",
+-							nr_online_nodes);
+-			g_submit_queues = nr_online_nodes;
+-		}
+-	} else if (g_submit_queues > nr_cpu_ids)
+-		g_submit_queues = nr_cpu_ids;
+-	else if (g_submit_queues <= 0)
+-		g_submit_queues = 1;
+-
+-	if (g_queue_mode == NULL_Q_MQ && shared_tags) {
+-		ret = null_init_tag_set(NULL, &tag_set);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	config_group_init(&nullb_subsys.su_group);
+-	mutex_init(&nullb_subsys.su_mutex);
+-
+-	ret = configfs_register_subsystem(&nullb_subsys);
+-	if (ret)
+-		goto err_tagset;
+-
+-	mutex_init(&lock);
+-
+-	null_major = register_blkdev(0, "nullb");
+-	if (null_major < 0) {
+-		ret = null_major;
+-		goto err_conf;
+-	}
+-
+-	for (i = 0; i < nr_devices; i++) {
+-		dev = null_alloc_dev();
+-		if (!dev) {
+-			ret = -ENOMEM;
+-			goto err_dev;
+-		}
+-		ret = null_add_dev(dev);
+-		if (ret) {
+-			null_free_dev(dev);
+-			goto err_dev;
+-		}
+-	}
+-
+-	pr_info("module loaded\n");
+-	return 0;
+-
+-err_dev:
+-	while (!list_empty(&nullb_list)) {
+-		nullb = list_entry(nullb_list.next, struct nullb, list);
+-		dev = nullb->dev;
+-		null_del_dev(nullb);
+-		null_free_dev(dev);
+-	}
+-	unregister_blkdev(null_major, "nullb");
+-err_conf:
+-	configfs_unregister_subsystem(&nullb_subsys);
+-err_tagset:
+-	if (g_queue_mode == NULL_Q_MQ && shared_tags)
+-		blk_mq_free_tag_set(&tag_set);
+-	return ret;
+-}
+-
+-static void __exit null_exit(void)
+-{
+-	struct nullb *nullb;
+-
+-	configfs_unregister_subsystem(&nullb_subsys);
+-
+-	unregister_blkdev(null_major, "nullb");
+-
+-	mutex_lock(&lock);
+-	while (!list_empty(&nullb_list)) {
+-		struct nullb_device *dev;
+-
+-		nullb = list_entry(nullb_list.next, struct nullb, list);
+-		dev = nullb->dev;
+-		null_del_dev(nullb);
+-		null_free_dev(dev);
+-	}
+-	mutex_unlock(&lock);
+-
+-	if (g_queue_mode == NULL_Q_MQ && shared_tags)
+-		blk_mq_free_tag_set(&tag_set);
+-}
+-
+-module_init(null_init);
+-module_exit(null_exit);
+-
+-MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
+-MODULE_LICENSE("GPL");
+diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk_trace.c
+deleted file mode 100644
+index f246e7bff6982..0000000000000
+--- a/drivers/block/null_blk_trace.c
++++ /dev/null
+@@ -1,21 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-/*
+- * null_blk trace related helpers.
+- *
+- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+- */
+-#include "null_blk_trace.h"
+-
+-/*
+- * Helper to use for all null_blk traces to extract disk name.
+- */
+-const char *nullb_trace_disk_name(struct trace_seq *p, char *name)
+-{
+-	const char *ret = trace_seq_buffer_ptr(p);
+-
+-	if (name && *name)
+-		trace_seq_printf(p, "disk=%s, ", name);
+-	trace_seq_putc(p, 0);
+-
+-	return ret;
+-}
+diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk_trace.h
+deleted file mode 100644
+index 4f83032eb5441..0000000000000
+--- a/drivers/block/null_blk_trace.h
++++ /dev/null
+@@ -1,79 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * null_blk device driver tracepoints.
+- *
+- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+- */
+-
+-#undef TRACE_SYSTEM
+-#define TRACE_SYSTEM nullb
+-
+-#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ)
+-#define _TRACE_NULLB_H
+-
+-#include <linux/tracepoint.h>
+-#include <linux/trace_seq.h>
+-
+-#include "null_blk.h"
+-
+-const char *nullb_trace_disk_name(struct trace_seq *p, char *name);
+-
+-#define __print_disk_name(name) nullb_trace_disk_name(p, name)
+-
+-#ifndef TRACE_HEADER_MULTI_READ
+-static inline void __assign_disk_name(char *name, struct gendisk *disk)
+-{
+-	if (disk)
+-		memcpy(name, disk->disk_name, DISK_NAME_LEN);
+-	else
+-		memset(name, 0, DISK_NAME_LEN);
+-}
+-#endif
+-
+-TRACE_EVENT(nullb_zone_op,
+-	    TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no,
+-		     unsigned int zone_cond),
+-	    TP_ARGS(cmd, zone_no, zone_cond),
+-	    TP_STRUCT__entry(
+-		__array(char, disk, DISK_NAME_LEN)
+-		__field(enum req_opf, op)
+-		__field(unsigned int, zone_no)
+-		__field(unsigned int, zone_cond)
+-	    ),
+-	    TP_fast_assign(
+-		__entry->op = req_op(cmd->rq);
+-		__entry->zone_no = zone_no;
+-		__entry->zone_cond = zone_cond;
+-		__assign_disk_name(__entry->disk, cmd->rq->rq_disk);
+-	    ),
+-	    TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
+-		      __print_disk_name(__entry->disk),
+-		      blk_op_str(__entry->op),
+-		      __entry->zone_no,
+-		      blk_zone_cond_str(__entry->zone_cond))
+-);
+-
+-TRACE_EVENT(nullb_report_zones,
+-	    TP_PROTO(struct nullb *nullb, unsigned int nr_zones),
+-	    TP_ARGS(nullb, nr_zones),
+-	    TP_STRUCT__entry(
+-		__array(char, disk, DISK_NAME_LEN)
+-		__field(unsigned int, nr_zones)
+-	    ),
+-	    TP_fast_assign(
+-		__entry->nr_zones = nr_zones;
+-		__assign_disk_name(__entry->disk, nullb->disk);
+-	    ),
+-	    TP_printk("%s nr_zones=%u",
+-		      __print_disk_name(__entry->disk), __entry->nr_zones)
+-);
+-
+-#endif /* _TRACE_NULLB_H */
+-
+-#undef TRACE_INCLUDE_PATH
+-#define TRACE_INCLUDE_PATH .
+-#undef TRACE_INCLUDE_FILE
+-#define TRACE_INCLUDE_FILE null_blk_trace
+-
+-/* This part must be outside protection */
+-#include <trace/define_trace.h>
+diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
+deleted file mode 100644
+index f5df82c26c16f..0000000000000
+--- a/drivers/block/null_blk_zoned.c
++++ /dev/null
+@@ -1,617 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0
+-#include <linux/vmalloc.h>
+-#include <linux/bitmap.h>
+-#include "null_blk.h"
+-
+-#define CREATE_TRACE_POINTS
+-#include "null_blk_trace.h"
+-
+-#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT)
+-
+-static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
+-{
+-	return sect >> ilog2(dev->zone_size_sects);
+-}
+-
+-int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
+-{
+-	sector_t dev_capacity_sects, zone_capacity_sects;
+-	sector_t sector = 0;
+-	unsigned int i;
+-
+-	if (!is_power_of_2(dev->zone_size)) {
+-		pr_err("zone_size must be power-of-two\n");
+-		return -EINVAL;
+-	}
+-	if (dev->zone_size > dev->size) {
+-		pr_err("Zone size larger than device capacity\n");
+-		return -EINVAL;
+-	}
+-
+-	if (!dev->zone_capacity)
+-		dev->zone_capacity = dev->zone_size;
+-
+-	if (dev->zone_capacity > dev->zone_size) {
+-		pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
+-					dev->zone_capacity, dev->zone_size);
+-		return -EINVAL;
+-	}
+-
+-	zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity);
+-	dev_capacity_sects = MB_TO_SECTS(dev->size);
+-	dev->zone_size_sects = MB_TO_SECTS(dev->zone_size);
+-	dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects);
+-	if (dev_capacity_sects & (dev->zone_size_sects - 1))
+-		dev->nr_zones++;
+-
+-	dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
+-			GFP_KERNEL | __GFP_ZERO);
+-	if (!dev->zones)
+-		return -ENOMEM;
+-
+-	/*
+-	 * With memory backing, the zone_lock spinlock needs to be temporarily
+-	 * released to avoid scheduling in atomic context. To guarantee zone
+-	 * information protection, use a bitmap to lock zones with
+-	 * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
+-	 * implies that the queue is marked with BLK_MQ_F_BLOCKING.
+-	 */
+-	spin_lock_init(&dev->zone_lock);
+-	if (dev->memory_backed) {
+-		dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
+-		if (!dev->zone_locks) {
+-			kvfree(dev->zones);
+-			return -ENOMEM;
+-		}
+-	}
+-
+-	if (dev->zone_nr_conv >= dev->nr_zones) {
+-		dev->zone_nr_conv = dev->nr_zones - 1;
+-		pr_info("changed the number of conventional zones to %u",
+-			dev->zone_nr_conv);
+-	}
+-
+-	/* Max active zones has to be < nbr of seq zones in order to be enforceable */
+-	if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
+-		dev->zone_max_active = 0;
+-		pr_info("zone_max_active limit disabled, limit >= zone count\n");
+-	}
+-
+-	/* Max open zones has to be <= max active zones */
+-	if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
+-		dev->zone_max_open = dev->zone_max_active;
+-		pr_info("changed the maximum number of open zones to %u\n",
+-			dev->nr_zones);
+-	} else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
+-		dev->zone_max_open = 0;
+-		pr_info("zone_max_open limit disabled, limit >= zone count\n");
+-	}
+-
+-	for (i = 0; i <  dev->zone_nr_conv; i++) {
+-		struct blk_zone *zone = &dev->zones[i];
+-
+-		zone->start = sector;
+-		zone->len = dev->zone_size_sects;
+-		zone->capacity = zone->len;
+-		zone->wp = zone->start + zone->len;
+-		zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
+-		zone->cond = BLK_ZONE_COND_NOT_WP;
+-
+-		sector += dev->zone_size_sects;
+-	}
+-
+-	for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
+-		struct blk_zone *zone = &dev->zones[i];
+-
+-		zone->start = zone->wp = sector;
+-		if (zone->start + dev->zone_size_sects > dev_capacity_sects)
+-			zone->len = dev_capacity_sects - zone->start;
+-		else
+-			zone->len = dev->zone_size_sects;
+-		zone->capacity =
+-			min_t(sector_t, zone->len, zone_capacity_sects);
+-		zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
+-		zone->cond = BLK_ZONE_COND_EMPTY;
+-
+-		sector += dev->zone_size_sects;
+-	}
+-
+-	q->limits.zoned = BLK_ZONED_HM;
+-	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
+-	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
+-
+-	return 0;
+-}
+-
+-int null_register_zoned_dev(struct nullb *nullb)
+-{
+-	struct nullb_device *dev = nullb->dev;
+-	struct request_queue *q = nullb->q;
+-
+-	if (queue_is_mq(q)) {
+-		int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
+-
+-		if (ret)
+-			return ret;
+-	} else {
+-		blk_queue_chunk_sectors(q, dev->zone_size_sects);
+-		q->nr_zones = blkdev_nr_zones(nullb->disk);
+-	}
+-
+-	blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
+-	blk_queue_max_open_zones(q, dev->zone_max_open);
+-	blk_queue_max_active_zones(q, dev->zone_max_active);
+-
+-	return 0;
+-}
+-
+-void null_free_zoned_dev(struct nullb_device *dev)
+-{
+-	bitmap_free(dev->zone_locks);
+-	kvfree(dev->zones);
+-	dev->zones = NULL;
+-}
+-
+-static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
+-{
+-	if (dev->memory_backed)
+-		wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
+-	spin_lock_irq(&dev->zone_lock);
+-}
+-
+-static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
+-{
+-	spin_unlock_irq(&dev->zone_lock);
+-
+-	if (dev->memory_backed)
+-		clear_and_wake_up_bit(zno, dev->zone_locks);
+-}
+-
+-int null_report_zones(struct gendisk *disk, sector_t sector,
+-		unsigned int nr_zones, report_zones_cb cb, void *data)
+-{
+-	struct nullb *nullb = disk->private_data;
+-	struct nullb_device *dev = nullb->dev;
+-	unsigned int first_zone, i, zno;
+-	struct blk_zone zone;
+-	int error;
+-
+-	first_zone = null_zone_no(dev, sector);
+-	if (first_zone >= dev->nr_zones)
+-		return 0;
+-
+-	nr_zones = min(nr_zones, dev->nr_zones - first_zone);
+-	trace_nullb_report_zones(nullb, nr_zones);
+-
+-	zno = first_zone;
+-	for (i = 0; i < nr_zones; i++, zno++) {
+-		/*
+-		 * Stacked DM target drivers will remap the zone information by
+-		 * modifying the zone information passed to the report callback.
+-		 * So use a local copy to avoid corruption of the device zone
+-		 * array.
+-		 */
+-		null_lock_zone(dev, zno);
+-		memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone));
+-		null_unlock_zone(dev, zno);
+-
+-		error = cb(&zone, i, data);
+-		if (error)
+-			return error;
+-	}
+-
+-	return nr_zones;
+-}
+-
+-/*
+- * This is called in the case of memory backing from null_process_cmd()
+- * with the target zone already locked.
+- */
+-size_t null_zone_valid_read_len(struct nullb *nullb,
+-				sector_t sector, unsigned int len)
+-{
+-	struct nullb_device *dev = nullb->dev;
+-	struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+-	unsigned int nr_sectors = len >> SECTOR_SHIFT;
+-
+-	/* Read must be below the write pointer position */
+-	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
+-	    sector + nr_sectors <= zone->wp)
+-		return len;
+-
+-	if (sector > zone->wp)
+-		return 0;
+-
+-	return (zone->wp - sector) << SECTOR_SHIFT;
+-}
+-
+-static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone)
+-{
+-	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+-		return BLK_STS_IOERR;
+-
+-	switch (zone->cond) {
+-	case BLK_ZONE_COND_CLOSED:
+-		/* close operation on closed is not an error */
+-		return BLK_STS_OK;
+-	case BLK_ZONE_COND_IMP_OPEN:
+-		dev->nr_zones_imp_open--;
+-		break;
+-	case BLK_ZONE_COND_EXP_OPEN:
+-		dev->nr_zones_exp_open--;
+-		break;
+-	case BLK_ZONE_COND_EMPTY:
+-	case BLK_ZONE_COND_FULL:
+-	default:
+-		return BLK_STS_IOERR;
+-	}
+-
+-	if (zone->wp == zone->start) {
+-		zone->cond = BLK_ZONE_COND_EMPTY;
+-	} else {
+-		zone->cond = BLK_ZONE_COND_CLOSED;
+-		dev->nr_zones_closed++;
+-	}
+-
+-	return BLK_STS_OK;
+-}
+-
+-static void null_close_first_imp_zone(struct nullb_device *dev)
+-{
+-	unsigned int i;
+-
+-	for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
+-		if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) {
+-			null_close_zone(dev, &dev->zones[i]);
+-			return;
+-		}
+-	}
+-}
+-
+-static blk_status_t null_check_active(struct nullb_device *dev)
+-{
+-	if (!dev->zone_max_active)
+-		return BLK_STS_OK;
+-
+-	if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
+-			dev->nr_zones_closed < dev->zone_max_active)
+-		return BLK_STS_OK;
+-
+-	return BLK_STS_ZONE_ACTIVE_RESOURCE;
+-}
+-
+-static blk_status_t null_check_open(struct nullb_device *dev)
+-{
+-	if (!dev->zone_max_open)
+-		return BLK_STS_OK;
+-
+-	if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
+-		return BLK_STS_OK;
+-
+-	if (dev->nr_zones_imp_open) {
+-		if (null_check_active(dev) == BLK_STS_OK) {
+-			null_close_first_imp_zone(dev);
+-			return BLK_STS_OK;
+-		}
+-	}
+-
+-	return BLK_STS_ZONE_OPEN_RESOURCE;
+-}
+-
+-/*
+- * This function matches the manage open zone resources function in the ZBC standard,
+- * with the addition of max active zones support (added in the ZNS standard).
+- *
+- * The function determines if a zone can transition to implicit open or explicit open,
+- * while maintaining the max open zone (and max active zone) limit(s). It may close an
+- * implicit open zone in order to make additional zone resources available.
+- *
+- * ZBC states that an implicit open zone shall be closed only if there is not
+- * room within the open limit. However, with the addition of an active limit,
+- * it is not certain that closing an implicit open zone will allow a new zone
+- * to be opened, since we might already be at the active limit capacity.
+- */
+-static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
+-{
+-	blk_status_t ret;
+-
+-	switch (zone->cond) {
+-	case BLK_ZONE_COND_EMPTY:
+-		ret = null_check_active(dev);
+-		if (ret != BLK_STS_OK)
+-			return ret;
+-		fallthrough;
+-	case BLK_ZONE_COND_CLOSED:
+-		return null_check_open(dev);
+-	default:
+-		/* Should never be called for other states */
+-		WARN_ON(1);
+-		return BLK_STS_IOERR;
+-	}
+-}
+-
+-static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
+-				    unsigned int nr_sectors, bool append)
+-{
+-	struct nullb_device *dev = cmd->nq->dev;
+-	unsigned int zno = null_zone_no(dev, sector);
+-	struct blk_zone *zone = &dev->zones[zno];
+-	blk_status_t ret;
+-
+-	trace_nullb_zone_op(cmd, zno, zone->cond);
+-
+-	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
+-		if (append)
+-			return BLK_STS_IOERR;
+-		return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
+-	}
+-
+-	null_lock_zone(dev, zno);
+-
+-	switch (zone->cond) {
+-	case BLK_ZONE_COND_FULL:
+-		/* Cannot write to a full zone */
+-		ret = BLK_STS_IOERR;
+-		goto unlock;
+-	case BLK_ZONE_COND_EMPTY:
+-	case BLK_ZONE_COND_CLOSED:
+-		ret = null_check_zone_resources(dev, zone);
+-		if (ret != BLK_STS_OK)
+-			goto unlock;
+-		break;
+-	case BLK_ZONE_COND_IMP_OPEN:
+-	case BLK_ZONE_COND_EXP_OPEN:
+-		break;
+-	default:
+-		/* Invalid zone condition */
+-		ret = BLK_STS_IOERR;
+-		goto unlock;
+-	}
+-
+-	/*
+-	 * Regular writes must be at the write pointer position.
+-	 * Zone append writes are automatically issued at the write
+-	 * pointer and the position returned using the request or BIO
+-	 * sector.
+-	 */
+-	if (append) {
+-		sector = zone->wp;
+-		if (cmd->bio)
+-			cmd->bio->bi_iter.bi_sector = sector;
+-		else
+-			cmd->rq->__sector = sector;
+-	} else if (sector != zone->wp) {
+-		ret = BLK_STS_IOERR;
+-		goto unlock;
+-	}
+-
+-	if (zone->wp + nr_sectors > zone->start + zone->capacity) {
+-		ret = BLK_STS_IOERR;
+-		goto unlock;
+-	}
+-
+-	if (zone->cond == BLK_ZONE_COND_CLOSED) {
+-		dev->nr_zones_closed--;
+-		dev->nr_zones_imp_open++;
+-	} else if (zone->cond == BLK_ZONE_COND_EMPTY) {
+-		dev->nr_zones_imp_open++;
+-	}
+-	if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
+-		zone->cond = BLK_ZONE_COND_IMP_OPEN;
+-
+-	/*
+-	 * Memory backing allocation may sleep: release the zone_lock spinlock
+-	 * to avoid scheduling in atomic context. Zone operation atomicity is
+-	 * still guaranteed through the zone_locks bitmap.
+-	 */
+-	if (dev->memory_backed)
+-		spin_unlock_irq(&dev->zone_lock);
+-	ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
+-	if (dev->memory_backed)
+-		spin_lock_irq(&dev->zone_lock);
+-
+-	if (ret != BLK_STS_OK)
+-		goto unlock;
+-
+-	zone->wp += nr_sectors;
+-	if (zone->wp == zone->start + zone->capacity) {
+-		if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
+-			dev->nr_zones_exp_open--;
+-		else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
+-			dev->nr_zones_imp_open--;
+-		zone->cond = BLK_ZONE_COND_FULL;
+-	}
+-	ret = BLK_STS_OK;
+-
+-unlock:
+-	null_unlock_zone(dev, zno);
+-
+-	return ret;
+-}
+-
+-static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
+-{
+-	blk_status_t ret;
+-
+-	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+-		return BLK_STS_IOERR;
+-
+-	switch (zone->cond) {
+-	case BLK_ZONE_COND_EXP_OPEN:
+-		/* open operation on exp open is not an error */
+-		return BLK_STS_OK;
+-	case BLK_ZONE_COND_EMPTY:
+-		ret = null_check_zone_resources(dev, zone);
+-		if (ret != BLK_STS_OK)
+-			return ret;
+-		break;
+-	case BLK_ZONE_COND_IMP_OPEN:
+-		dev->nr_zones_imp_open--;
+-		break;
+-	case BLK_ZONE_COND_CLOSED:
+-		ret = null_check_zone_resources(dev, zone);
+-		if (ret != BLK_STS_OK)
+-			return ret;
+-		dev->nr_zones_closed--;
+-		break;
+-	case BLK_ZONE_COND_FULL:
+-	default:
+-		return BLK_STS_IOERR;
+-	}
+-
+-	zone->cond = BLK_ZONE_COND_EXP_OPEN;
+-	dev->nr_zones_exp_open++;
+-
+-	return BLK_STS_OK;
+-}
+-
+-static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
+-{
+-	blk_status_t ret;
+-
+-	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+-		return BLK_STS_IOERR;
+-
+-	switch (zone->cond) {
+-	case BLK_ZONE_COND_FULL:
+-		/* finish operation on full is not an error */
+-		return BLK_STS_OK;
+-	case BLK_ZONE_COND_EMPTY:
+-		ret = null_check_zone_resources(dev, zone);
+-		if (ret != BLK_STS_OK)
+-			return ret;
+-		break;
+-	case BLK_ZONE_COND_IMP_OPEN:
+-		dev->nr_zones_imp_open--;
+-		break;
+-	case BLK_ZONE_COND_EXP_OPEN:
+-		dev->nr_zones_exp_open--;
+-		break;
+-	case BLK_ZONE_COND_CLOSED:
+-		ret = null_check_zone_resources(dev, zone);
+-		if (ret != BLK_STS_OK)
+-			return ret;
+-		dev->nr_zones_closed--;
+-		break;
+-	default:
+-		return BLK_STS_IOERR;
+-	}
+-
+-	zone->cond = BLK_ZONE_COND_FULL;
+-	zone->wp = zone->start + zone->len;
+-
+-	return BLK_STS_OK;
+-}
+-
+-static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone)
+-{
+-	if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+-		return BLK_STS_IOERR;
+-
+-	switch (zone->cond) {
+-	case BLK_ZONE_COND_EMPTY:
+-		/* reset operation on empty is not an error */
+-		return BLK_STS_OK;
+-	case BLK_ZONE_COND_IMP_OPEN:
+-		dev->nr_zones_imp_open--;
+-		break;
+-	case BLK_ZONE_COND_EXP_OPEN:
+-		dev->nr_zones_exp_open--;
+-		break;
+-	case BLK_ZONE_COND_CLOSED:
+-		dev->nr_zones_closed--;
+-		break;
+-	case BLK_ZONE_COND_FULL:
+-		break;
+-	default:
+-		return BLK_STS_IOERR;
+-	}
+-
+-	zone->cond = BLK_ZONE_COND_EMPTY;
+-	zone->wp = zone->start;
+-
+-	return BLK_STS_OK;
+-}
+-
+-static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+-				   sector_t sector)
+-{
+-	struct nullb_device *dev = cmd->nq->dev;
+-	unsigned int zone_no;
+-	struct blk_zone *zone;
+-	blk_status_t ret;
+-	size_t i;
+-
+-	if (op == REQ_OP_ZONE_RESET_ALL) {
+-		for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
+-			null_lock_zone(dev, i);
+-			zone = &dev->zones[i];
+-			if (zone->cond != BLK_ZONE_COND_EMPTY) {
+-				null_reset_zone(dev, zone);
+-				trace_nullb_zone_op(cmd, i, zone->cond);
+-			}
+-			null_unlock_zone(dev, i);
+-		}
+-		return BLK_STS_OK;
+-	}
+-
+-	zone_no = null_zone_no(dev, sector);
+-	zone = &dev->zones[zone_no];
+-
+-	null_lock_zone(dev, zone_no);
+-
+-	switch (op) {
+-	case REQ_OP_ZONE_RESET:
+-		ret = null_reset_zone(dev, zone);
+-		break;
+-	case REQ_OP_ZONE_OPEN:
+-		ret = null_open_zone(dev, zone);
+-		break;
+-	case REQ_OP_ZONE_CLOSE:
+-		ret = null_close_zone(dev, zone);
+-		break;
+-	case REQ_OP_ZONE_FINISH:
+-		ret = null_finish_zone(dev, zone);
+-		break;
+-	default:
+-		ret = BLK_STS_NOTSUPP;
+-		break;
+-	}
+-
+-	if (ret == BLK_STS_OK)
+-		trace_nullb_zone_op(cmd, zone_no, zone->cond);
+-
+-	null_unlock_zone(dev, zone_no);
+-
+-	return ret;
+-}
+-
+-blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
+-				    sector_t sector, sector_t nr_sectors)
+-{
+-	struct nullb_device *dev = cmd->nq->dev;
+-	unsigned int zno = null_zone_no(dev, sector);
+-	blk_status_t sts;
+-
+-	switch (op) {
+-	case REQ_OP_WRITE:
+-		sts = null_zone_write(cmd, sector, nr_sectors, false);
+-		break;
+-	case REQ_OP_ZONE_APPEND:
+-		sts = null_zone_write(cmd, sector, nr_sectors, true);
+-		break;
+-	case REQ_OP_ZONE_RESET:
+-	case REQ_OP_ZONE_RESET_ALL:
+-	case REQ_OP_ZONE_OPEN:
+-	case REQ_OP_ZONE_CLOSE:
+-	case REQ_OP_ZONE_FINISH:
+-		sts = null_zone_mgmt(cmd, op, sector);
+-		break;
+-	default:
+-		null_lock_zone(dev, zno);
+-		sts = null_process_cmd(cmd, op, sector, nr_sectors);
+-		null_unlock_zone(dev, zno);
+-	}
+-
+-	return sts;
+-}
+diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
+index 39aeebc6837da..d9e41d3bbe717 100644
+--- a/drivers/block/sunvdc.c
++++ b/drivers/block/sunvdc.c
+@@ -984,6 +984,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+ 	print_version();
+ 
+ 	hp = mdesc_grab();
++	if (!hp)
++		return -ENODEV;
+ 
+ 	err = -ENODEV;
+ 	if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
+diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
+index c715d4681a0b8..4ae49eae45869 100644
+--- a/drivers/clk/Kconfig
++++ b/drivers/clk/Kconfig
+@@ -79,7 +79,7 @@ config COMMON_CLK_RK808
+ config COMMON_CLK_HI655X
+ 	tristate "Clock driver for Hi655x" if EXPERT
+ 	depends on (MFD_HI655X_PMIC || COMPILE_TEST)
+-	depends on REGMAP
++	select REGMAP
+ 	default MFD_HI655X_PMIC
+ 	help
+ 	  This driver supports the hi655x PMIC clock. This
+diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
+index 4a031c62f92a1..5098639d41f12 100644
+--- a/drivers/cpuidle/cpuidle-psci-domain.c
++++ b/drivers/cpuidle/cpuidle-psci-domain.c
+@@ -182,7 +182,8 @@ static void psci_pd_remove(void)
+ 	struct psci_pd_provider *pd_provider, *it;
+ 	struct generic_pm_domain *genpd;
+ 
+-	list_for_each_entry_safe(pd_provider, it, &psci_pd_providers, link) {
++	list_for_each_entry_safe_reverse(pd_provider, it,
++					 &psci_pd_providers, link) {
+ 		of_genpd_del_provider(pd_provider->node);
+ 
+ 		genpd = of_genpd_remove_last(pd_provider->node);
+diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
+index 9e6504592646e..300ba2991936b 100644
+--- a/drivers/firmware/xilinx/zynqmp.c
++++ b/drivers/firmware/xilinx/zynqmp.c
+@@ -171,7 +171,7 @@ static int zynqmp_pm_feature(u32 api_id)
+ 	}
+ 
+ 	/* Add new entry if not present */
+-	feature_data = kmalloc(sizeof(*feature_data), GFP_KERNEL);
++	feature_data = kmalloc(sizeof(*feature_data), GFP_ATOMIC);
+ 	if (!feature_data)
+ 		return -ENOMEM;
+ 
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+index 159be13ef20bb..2c19b3775179b 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+@@ -528,16 +528,13 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
+ 	struct kfd_event_waiter *event_waiters;
+ 	uint32_t i;
+ 
+-	event_waiters = kmalloc_array(num_events,
+-					sizeof(struct kfd_event_waiter),
+-					GFP_KERNEL);
++	event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
++				GFP_KERNEL);
+ 	if (!event_waiters)
+ 		return NULL;
+ 
+-	for (i = 0; (event_waiters) && (i < num_events) ; i++) {
++	for (i = 0; i < num_events; i++)
+ 		init_wait(&event_waiters[i].wait);
+-		event_waiters[i].activated = false;
+-	}
+ 
+ 	return event_waiters;
+ }
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+index e427f4ffa0807..e5b1002d7f3f0 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+@@ -1868,7 +1868,10 @@ static unsigned int CalculateVMAndRowBytes(
+ 	}
+ 
+ 	if (SurfaceTiling == dm_sw_linear) {
+-		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
++		if (PTEBufferSizeInRequests == 0)
++			*dpte_row_height = 1;
++		else
++			*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ 		*dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
+ 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ 	} else if (ScanDirection != dm_vert) {
+diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
+index c56656a95cf99..b7bb5610dfe21 100644
+--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
++++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
+@@ -614,11 +614,14 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+ 	int ret;
+ 
+ 	if (obj->import_attach) {
+-		/* Drop the reference drm_gem_mmap_obj() acquired.*/
+-		drm_gem_object_put(obj);
+ 		vma->vm_private_data = NULL;
++		ret = dma_buf_mmap(obj->dma_buf, vma, 0);
++
++		/* Drop the reference drm_gem_mmap_obj() acquired.*/
++		if (!ret)
++			drm_gem_object_put(obj);
+ 
+-		return dma_buf_mmap(obj->dma_buf, vma, 0);
++		return ret;
+ 	}
+ 
+ 	shmem = to_drm_gem_shmem_obj(obj);
+diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
+index 69b2e5509d678..de67b2745258f 100644
+--- a/drivers/gpu/drm/i915/gt/intel_ring.c
++++ b/drivers/gpu/drm/i915/gt/intel_ring.c
+@@ -108,7 +108,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+ 	struct i915_vma *vma;
+ 
+ 	obj = ERR_PTR(-ENODEV);
+-	if (i915_ggtt_has_aperture(ggtt))
++	if (i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
+ 		obj = i915_gem_object_create_stolen(i915, size);
+ 	if (IS_ERR(obj))
+ 		obj = i915_gem_object_create_internal(i915, size);
+diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
+index c4c2d24dc5094..0532a5069c04b 100644
+--- a/drivers/gpu/drm/i915/i915_active.c
++++ b/drivers/gpu/drm/i915/i915_active.c
+@@ -432,8 +432,7 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
+ 	 * we can use it to substitute for the pending idle-barrer
+ 	 * request that we want to emit on the kernel_context.
+ 	 */
+-	__active_del_barrier(ref, node_from_active(active));
+-	return true;
++	return __active_del_barrier(ref, node_from_active(active));
+ }
+ 
+ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+@@ -446,16 +445,19 @@ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+ 	if (err)
+ 		return err;
+ 
+-	active = active_instance(ref, idx);
+-	if (!active) {
+-		err = -ENOMEM;
+-		goto out;
+-	}
++	do {
++		active = active_instance(ref, idx);
++		if (!active) {
++			err = -ENOMEM;
++			goto out;
++		}
++
++		if (replace_barrier(ref, active)) {
++			RCU_INIT_POINTER(active->fence, NULL);
++			atomic_dec(&ref->count);
++		}
++	} while (unlikely(is_barrier(active)));
+ 
+-	if (replace_barrier(ref, active)) {
+-		RCU_INIT_POINTER(active->fence, NULL);
+-		atomic_dec(&ref->count);
+-	}
+ 	if (!__i915_active_fence_set(active, fence))
+ 		__i915_active_acquire(ref);
+ 
+diff --git a/drivers/gpu/drm/meson/meson_vpp.c b/drivers/gpu/drm/meson/meson_vpp.c
+index 154837688ab0d..5df1957c8e41f 100644
+--- a/drivers/gpu/drm/meson/meson_vpp.c
++++ b/drivers/gpu/drm/meson/meson_vpp.c
+@@ -100,6 +100,8 @@ void meson_vpp_init(struct meson_drm *priv)
+ 			       priv->io_base + _REG(VPP_DOLBY_CTRL));
+ 		writel_relaxed(0x1020080,
+ 				priv->io_base + _REG(VPP_DUMMY_DATA1));
++		writel_relaxed(0x42020,
++				priv->io_base + _REG(VPP_DUMMY_DATA));
+ 	} else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+ 		writel_relaxed(0xf, priv->io_base + _REG(DOLBY_PATH_CTRL));
+ 
+diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+index 13596961ae17f..5ff856ef7d88c 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
+@@ -236,7 +236,7 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+ 	if (pm_runtime_active(pfdev->dev))
+ 		mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
+ 
+-	pm_runtime_put_sync_autosuspend(pfdev->dev);
++	pm_runtime_put_autosuspend(pfdev->dev);
+ }
+ 
+ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index 5f9ec1d1464a2..524d6d712e724 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -258,6 +258,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
+ {
+ 	struct hid_report *report;
+ 	struct hid_field *field;
++	unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
+ 	unsigned int usages;
+ 	unsigned int offset;
+ 	unsigned int i;
+@@ -288,8 +289,11 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
+ 	offset = report->size;
+ 	report->size += parser->global.report_size * parser->global.report_count;
+ 
++	if (parser->device->ll_driver->max_buffer_size)
++		max_buffer_size = parser->device->ll_driver->max_buffer_size;
++
+ 	/* Total size check: Allow for possible report index byte */
+-	if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) {
++	if (report->size > (max_buffer_size - 1) << 3) {
+ 		hid_err(parser->device, "report is too long\n");
+ 		return -1;
+ 	}
+@@ -1752,6 +1756,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
+ 	struct hid_report_enum *report_enum = hid->report_enum + type;
+ 	struct hid_report *report;
+ 	struct hid_driver *hdrv;
++	int max_buffer_size = HID_MAX_BUFFER_SIZE;
+ 	unsigned int a;
+ 	u32 rsize, csize = size;
+ 	u8 *cdata = data;
+@@ -1768,10 +1773,13 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
+ 
+ 	rsize = hid_compute_report_size(report);
+ 
+-	if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE)
+-		rsize = HID_MAX_BUFFER_SIZE - 1;
+-	else if (rsize > HID_MAX_BUFFER_SIZE)
+-		rsize = HID_MAX_BUFFER_SIZE;
++	if (hid->ll_driver->max_buffer_size)
++		max_buffer_size = hid->ll_driver->max_buffer_size;
++
++	if (report_enum->numbered && rsize >= max_buffer_size)
++		rsize = max_buffer_size - 1;
++	else if (rsize > max_buffer_size)
++		rsize = max_buffer_size;
+ 
+ 	if (csize < rsize) {
+ 		dbg_hid("report %d is too short, (%d < %d)\n", report->id,
+diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
+index fc06d8bb42e0f..ba0ca652b9dab 100644
+--- a/drivers/hid/uhid.c
++++ b/drivers/hid/uhid.c
+@@ -395,6 +395,7 @@ struct hid_ll_driver uhid_hid_driver = {
+ 	.parse = uhid_hid_parse,
+ 	.raw_request = uhid_hid_raw_request,
+ 	.output_report = uhid_hid_output_report,
++	.max_buffer_size = UHID_DATA_MAX,
+ };
+ EXPORT_SYMBOL_GPL(uhid_hid_driver);
+ 
+diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
+index 9d5b019651f2d..6b84822e7d93b 100644
+--- a/drivers/hwmon/adt7475.c
++++ b/drivers/hwmon/adt7475.c
+@@ -486,10 +486,10 @@ static ssize_t temp_store(struct device *dev, struct device_attribute *attr,
+ 		val = (temp - val) / 1000;
+ 
+ 		if (sattr->index != 1) {
+-			data->temp[HYSTERSIS][sattr->index] &= 0xF0;
++			data->temp[HYSTERSIS][sattr->index] &= 0x0F;
+ 			data->temp[HYSTERSIS][sattr->index] |= (val & 0xF) << 4;
+ 		} else {
+-			data->temp[HYSTERSIS][sattr->index] &= 0x0F;
++			data->temp[HYSTERSIS][sattr->index] &= 0xF0;
+ 			data->temp[HYSTERSIS][sattr->index] |= (val & 0xF);
+ 		}
+ 
+@@ -554,11 +554,11 @@ static ssize_t temp_st_show(struct device *dev, struct device_attribute *attr,
+ 		val = data->enh_acoustics[0] & 0xf;
+ 		break;
+ 	case 1:
+-		val = (data->enh_acoustics[1] >> 4) & 0xf;
++		val = data->enh_acoustics[1] & 0xf;
+ 		break;
+ 	case 2:
+ 	default:
+-		val = data->enh_acoustics[1] & 0xf;
++		val = (data->enh_acoustics[1] >> 4) & 0xf;
+ 		break;
+ 	}
+ 
+diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
+index d3c98115042b5..836e7579e166a 100644
+--- a/drivers/hwmon/ina3221.c
++++ b/drivers/hwmon/ina3221.c
+@@ -772,7 +772,7 @@ static int ina3221_probe_child_from_dt(struct device *dev,
+ 		return ret;
+ 	} else if (val > INA3221_CHANNEL3) {
+ 		dev_err(dev, "invalid reg %d of %pOFn\n", val, child);
+-		return ret;
++		return -EINVAL;
+ 	}
+ 
+ 	input = &ina->inputs[val];
+diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c
+index c7b373ba92f21..d1b2e936546fd 100644
+--- a/drivers/hwmon/pmbus/adm1266.c
++++ b/drivers/hwmon/pmbus/adm1266.c
+@@ -301,6 +301,7 @@ static int adm1266_config_gpio(struct adm1266_data *data)
+ 	data->gc.label = name;
+ 	data->gc.parent = &data->client->dev;
+ 	data->gc.owner = THIS_MODULE;
++	data->gc.can_sleep = true;
+ 	data->gc.base = -1;
+ 	data->gc.names = data->gpio_names;
+ 	data->gc.ngpio = ARRAY_SIZE(data->gpio_names);
+diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
+index f8017993e2b4d..9e26cc084a176 100644
+--- a/drivers/hwmon/pmbus/ucd9000.c
++++ b/drivers/hwmon/pmbus/ucd9000.c
+@@ -7,6 +7,7 @@
+  */
+ 
+ #include <linux/debugfs.h>
++#include <linux/delay.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/of_device.h>
+@@ -16,6 +17,7 @@
+ #include <linux/i2c.h>
+ #include <linux/pmbus.h>
+ #include <linux/gpio/driver.h>
++#include <linux/timekeeping.h>
+ #include "pmbus.h"
+ 
+ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd90320, ucd9090,
+@@ -65,6 +67,7 @@ struct ucd9000_data {
+ 	struct gpio_chip gpio;
+ #endif
+ 	struct dentry *debugfs;
++	ktime_t write_time;
+ };
+ #define to_ucd9000_data(_info) container_of(_info, struct ucd9000_data, info)
+ 
+@@ -73,6 +76,73 @@ struct ucd9000_debugfs_entry {
+ 	u8 index;
+ };
+ 
++/*
++ * It has been observed that the UCD90320 randomly fails register access when
++ * doing another access right on the back of a register write. To mitigate this
++ * make sure that there is a minimum delay between a write access and the
++ * following access. The 250us is based on experimental data. At a delay of
++ * 200us the issue seems to go away. Add a bit of extra margin to allow for
++ * system to system differences.
++ */
++#define UCD90320_WAIT_DELAY_US 250
++
++static inline void ucd90320_wait(const struct ucd9000_data *data)
++{
++	s64 delta = ktime_us_delta(ktime_get(), data->write_time);
++
++	if (delta < UCD90320_WAIT_DELAY_US)
++		udelay(UCD90320_WAIT_DELAY_US - delta);
++}
++
++static int ucd90320_read_word_data(struct i2c_client *client, int page,
++				   int phase, int reg)
++{
++	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++	struct ucd9000_data *data = to_ucd9000_data(info);
++
++	if (reg >= PMBUS_VIRT_BASE)
++		return -ENXIO;
++
++	ucd90320_wait(data);
++	return pmbus_read_word_data(client, page, phase, reg);
++}
++
++static int ucd90320_read_byte_data(struct i2c_client *client, int page, int reg)
++{
++	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++	struct ucd9000_data *data = to_ucd9000_data(info);
++
++	ucd90320_wait(data);
++	return pmbus_read_byte_data(client, page, reg);
++}
++
++static int ucd90320_write_word_data(struct i2c_client *client, int page,
++				    int reg, u16 word)
++{
++	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++	struct ucd9000_data *data = to_ucd9000_data(info);
++	int ret;
++
++	ucd90320_wait(data);
++	ret = pmbus_write_word_data(client, page, reg, word);
++	data->write_time = ktime_get();
++
++	return ret;
++}
++
++static int ucd90320_write_byte(struct i2c_client *client, int page, u8 value)
++{
++	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
++	struct ucd9000_data *data = to_ucd9000_data(info);
++	int ret;
++
++	ucd90320_wait(data);
++	ret = pmbus_write_byte(client, page, value);
++	data->write_time = ktime_get();
++
++	return ret;
++}
++
+ static int ucd9000_get_fan_config(struct i2c_client *client, int fan)
+ {
+ 	int fan_config = 0;
+@@ -598,6 +668,11 @@ static int ucd9000_probe(struct i2c_client *client)
+ 		info->read_byte_data = ucd9000_read_byte_data;
+ 		info->func[0] |= PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12
+ 		  | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN34;
++	} else if (mid->driver_data == ucd90320) {
++		info->read_byte_data = ucd90320_read_byte_data;
++		info->read_word_data = ucd90320_read_word_data;
++		info->write_byte = ucd90320_write_byte;
++		info->write_word_data = ucd90320_write_word_data;
+ 	}
+ 
+ 	ucd9000_probe_gpio(client, mid, data);
+diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c
+index 47bbe47e062fd..7d5f7441aceb1 100644
+--- a/drivers/hwmon/tmp513.c
++++ b/drivers/hwmon/tmp513.c
+@@ -758,7 +758,7 @@ static int tmp51x_probe(struct i2c_client *client)
+ static struct i2c_driver tmp51x_driver = {
+ 	.driver = {
+ 		.name	= "tmp51x",
+-		.of_match_table = of_match_ptr(tmp51x_of_match),
++		.of_match_table = tmp51x_of_match,
+ 	},
+ 	.probe_new	= tmp51x_probe,
+ 	.id_table	= tmp51x_id,
+diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
+index f2a5af239c956..f5d3cf86753f7 100644
+--- a/drivers/hwmon/xgene-hwmon.c
++++ b/drivers/hwmon/xgene-hwmon.c
+@@ -768,6 +768,7 @@ static int xgene_hwmon_remove(struct platform_device *pdev)
+ {
+ 	struct xgene_hwmon_dev *ctx = platform_get_drvdata(pdev);
+ 
++	cancel_work_sync(&ctx->workq);
+ 	hwmon_device_unregister(ctx->hwmon_dev);
+ 	kfifo_free(&ctx->async_msg_fifo);
+ 	if (acpi_disabled)
+diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
+index ceb6cdc20484e..7db6d0fc6ec2e 100644
+--- a/drivers/interconnect/core.c
++++ b/drivers/interconnect/core.c
+@@ -850,6 +850,10 @@ void icc_node_destroy(int id)
+ 
+ 	mutex_unlock(&icc_lock);
+ 
++	if (!node)
++		return;
++
++	kfree(node->links);
+ 	kfree(node);
+ }
+ EXPORT_SYMBOL_GPL(icc_node_destroy);
+diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c
+index 21666d705e372..dcf9e4d4ee6b8 100644
+--- a/drivers/media/i2c/m5mols/m5mols_core.c
++++ b/drivers/media/i2c/m5mols/m5mols_core.c
+@@ -488,7 +488,7 @@ static enum m5mols_restype __find_restype(u32 code)
+ 	do {
+ 		if (code == m5mols_default_ffmt[type].code)
+ 			return type;
+-	} while (type++ != SIZE_DEFAULT_FFMT);
++	} while (++type != SIZE_DEFAULT_FFMT);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
+index af85b32c6c1c8..c468f9a02ef6b 100644
+--- a/drivers/mmc/host/atmel-mci.c
++++ b/drivers/mmc/host/atmel-mci.c
+@@ -1818,7 +1818,6 @@ static void atmci_tasklet_func(unsigned long priv)
+ 				atmci_writel(host, ATMCI_IER, ATMCI_NOTBUSY);
+ 				state = STATE_WAITING_NOTBUSY;
+ 			} else if (host->mrq->stop) {
+-				atmci_writel(host, ATMCI_IER, ATMCI_CMDRDY);
+ 				atmci_send_stop_cmd(host, data);
+ 				state = STATE_SENDING_STOP;
+ 			} else {
+@@ -1851,8 +1850,6 @@ static void atmci_tasklet_func(unsigned long priv)
+ 				 * command to send.
+ 				 */
+ 				if (host->mrq->stop) {
+-					atmci_writel(host, ATMCI_IER,
+-					             ATMCI_CMDRDY);
+ 					atmci_send_stop_cmd(host, data);
+ 					state = STATE_SENDING_STOP;
+ 				} else {
+diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
+index 24cd6d3dc6477..bf2592774165b 100644
+--- a/drivers/mmc/host/sdhci_am654.c
++++ b/drivers/mmc/host/sdhci_am654.c
+@@ -369,7 +369,7 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg)
+ 					MAX_POWER_ON_TIMEOUT, false, host, val,
+ 					reg);
+ 		if (ret)
+-			dev_warn(mmc_dev(host->mmc), "Power on failed\n");
++			dev_info(mmc_dev(host->mmc), "Power on failed\n");
+ 	}
+ }
+ 
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 371b345635e62..a253476a52b01 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -2734,7 +2734,7 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port)
+ 		return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
+ 	else if (chip->info->ops->set_max_frame_size)
+ 		return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
+-	return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
++	return ETH_DATA_LEN;
+ }
+ 
+ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+@@ -2742,6 +2742,17 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ 	struct mv88e6xxx_chip *chip = ds->priv;
+ 	int ret = 0;
+ 
++	/* For families where we don't know how to alter the MTU,
++	 * just accept any value up to ETH_DATA_LEN
++	 */
++	if (!chip->info->ops->port_set_jumbo_size &&
++	    !chip->info->ops->set_max_frame_size) {
++		if (new_mtu > ETH_DATA_LEN)
++			return -EINVAL;
++
++		return 0;
++	}
++
+ 	if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+ 		new_mtu += EDSA_HLEN;
+ 
+@@ -2750,9 +2761,6 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
+ 		ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
+ 	else if (chip->info->ops->set_max_frame_size)
+ 		ret = chip->info->ops->set_max_frame_size(chip, new_mtu);
+-	else
+-		if (new_mtu > 1522)
+-			ret = -EINVAL;
+ 	mv88e6xxx_reg_unlock(chip);
+ 
+ 	return ret;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 9e8a20a94862f..76481ff7074ba 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -14851,6 +14851,7 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
+ 	int err;
+ 	int v_idx;
+ 
++	pci_set_drvdata(pf->pdev, pf);
+ 	pci_save_state(pf->pdev);
+ 
+ 	/* set up periodic task facility */
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
+index 59963b901be0f..e0790df700e2c 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
+@@ -169,8 +169,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+ 	}
+ 	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+ 
+-	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+-
+ 	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+ 	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+ 	if (err)
+@@ -185,6 +183,8 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+ 		if (err)
+ 			return err;
+ 	}
++	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
++
+ 	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
+ 	if (err)
+ 		return err;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
+index d2f5855b2ea79..895b6f0a39841 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
+@@ -4986,6 +4986,11 @@ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn,
+ 
+ 	num_vports = p_hwfn->qm_info.num_vports;
+ 
++	if (num_vports < 2) {
++		DP_NOTICE(p_hwfn, "Unexpected num_vports: %d\n", num_vports);
++		return -EINVAL;
++	}
++
+ 	/* Accounting for the vports which are configured for WFQ explicitly */
+ 	for (i = 0; i < num_vports; i++) {
+ 		u32 tmp_speed;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+index 3e3192a3ad9b7..fdbd5f07a1857 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+@@ -422,7 +422,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time,
+ 	if (p_time->hour > 23)
+ 		p_time->hour = 0;
+ 	if (p_time->min > 59)
+-		p_time->hour = 0;
++		p_time->min = 0;
+ 	if (p_time->msec > 999)
+ 		p_time->msec = 0;
+ 	if (p_time->usec > 999)
+diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
+index 01ea0d6f88193..934a4b54784b8 100644
+--- a/drivers/net/ethernet/sun/ldmvsw.c
++++ b/drivers/net/ethernet/sun/ldmvsw.c
+@@ -290,6 +290,9 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+ 
+ 	hp = mdesc_grab();
+ 
++	if (!hp)
++		return -ENODEV;
++
+ 	rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
+ 	err = -ENODEV;
+ 	if (!rmac) {
+diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
+index 96b883f965f63..b6c03adf1e762 100644
+--- a/drivers/net/ethernet/sun/sunvnet.c
++++ b/drivers/net/ethernet/sun/sunvnet.c
+@@ -431,6 +431,9 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+ 
+ 	hp = mdesc_grab();
+ 
++	if (!hp)
++		return -ENODEV;
++
+ 	vp = vnet_find_parent(hp, vdev->mp, vdev);
+ 	if (IS_ERR(vp)) {
+ 		pr_err("Cannot find port parent vnet\n");
+diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c
+index 943d26cbf39f5..71712ea25403d 100644
+--- a/drivers/net/ipvlan/ipvlan_l3s.c
++++ b/drivers/net/ipvlan/ipvlan_l3s.c
+@@ -101,6 +101,7 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+ 		goto out;
+ 
+ 	skb->dev = addr->master->dev;
++	skb->skb_iif = skb->dev->ifindex;
+ 	len = skb->len + ETH_HLEN;
+ 	ipvlan_count_rx(addr->master, len, true, false);
+ out:
+diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
+index caf7291ffaf83..b67de3f9ef186 100644
+--- a/drivers/net/phy/smsc.c
++++ b/drivers/net/phy/smsc.c
+@@ -181,8 +181,11 @@ static int lan95xx_config_aneg_ext(struct phy_device *phydev)
+ static int lan87xx_read_status(struct phy_device *phydev)
+ {
+ 	struct smsc_phy_priv *priv = phydev->priv;
++	int err;
+ 
+-	int err = genphy_read_status(phydev);
++	err = genphy_read_status(phydev);
++	if (err)
++		return err;
+ 
+ 	if (!phydev->link && priv->energy_enable) {
+ 		/* Disable EDPD to wake up PHY */
+diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
+index 378a12ae2d957..fb1389bd09392 100644
+--- a/drivers/net/usb/smsc75xx.c
++++ b/drivers/net/usb/smsc75xx.c
+@@ -2199,6 +2199,13 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+ 		size = (rx_cmd_a & RX_CMD_A_LEN) - RXW_PADDING;
+ 		align_count = (4 - ((size + RXW_PADDING) % 4)) % 4;
+ 
++		if (unlikely(size > skb->len)) {
++			netif_dbg(dev, rx_err, dev->net,
++				  "size err rx_cmd_a=0x%08x\n",
++				  rx_cmd_a);
++			return 0;
++		}
++
+ 		if (unlikely(rx_cmd_a & RX_CMD_A_RED)) {
+ 			netif_dbg(dev, rx_err, dev->net,
+ 				  "Error rx_cmd_a=0x%08x\n", rx_cmd_a);
+diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
+index 57b07446bb768..68eb1253f888f 100644
+--- a/drivers/nfc/pn533/usb.c
++++ b/drivers/nfc/pn533/usb.c
+@@ -175,6 +175,7 @@ static int pn533_usb_send_frame(struct pn533 *dev,
+ 	print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1,
+ 			     out->data, out->len, false);
+ 
++	arg.phy = phy;
+ 	init_completion(&arg.done);
+ 	cntx = phy->out_urb->context;
+ 	phy->out_urb->context = &arg;
+diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c
+index 5d74c674368a5..8ccf5a86ad1bb 100644
+--- a/drivers/nfc/st-nci/ndlc.c
++++ b/drivers/nfc/st-nci/ndlc.c
+@@ -286,13 +286,15 @@ EXPORT_SYMBOL(ndlc_probe);
+ 
+ void ndlc_remove(struct llt_ndlc *ndlc)
+ {
+-	st_nci_remove(ndlc->ndev);
+-
+ 	/* cancel timers */
+ 	del_timer_sync(&ndlc->t1_timer);
+ 	del_timer_sync(&ndlc->t2_timer);
+ 	ndlc->t2_active = false;
+ 	ndlc->t1_active = false;
++	/* cancel work */
++	cancel_work_sync(&ndlc->sm_work);
++
++	st_nci_remove(ndlc->ndev);
+ 
+ 	skb_queue_purge(&ndlc->rcv_q);
+ 	skb_queue_purge(&ndlc->send_q);
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index e162f1dfbafe9..a4b6aa932a8fe 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -723,16 +723,26 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+ 		range = page_address(ns->ctrl->discard_page);
+ 	}
+ 
+-	__rq_for_each_bio(bio, req) {
+-		u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
+-		u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
+-
+-		if (n < segments) {
+-			range[n].cattr = cpu_to_le32(0);
+-			range[n].nlb = cpu_to_le32(nlb);
+-			range[n].slba = cpu_to_le64(slba);
++	if (queue_max_discard_segments(req->q) == 1) {
++		u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req));
++		u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9);
++
++		range[0].cattr = cpu_to_le32(0);
++		range[0].nlb = cpu_to_le32(nlb);
++		range[0].slba = cpu_to_le64(slba);
++		n = 1;
++	} else {
++		__rq_for_each_bio(bio, req) {
++			u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
++			u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
++
++			if (n < segments) {
++				range[n].cattr = cpu_to_le32(0);
++				range[n].nlb = cpu_to_le32(nlb);
++				range[n].slba = cpu_to_le64(slba);
++			}
++			n++;
+ 		}
+-		n++;
+ 	}
+ 
+ 	if (WARN_ON_ONCE(n != segments)) {
+diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
+index bc88ff2912f56..a82a0796a6148 100644
+--- a/drivers/nvme/target/core.c
++++ b/drivers/nvme/target/core.c
+@@ -749,8 +749,10 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+ 
+ void nvmet_req_complete(struct nvmet_req *req, u16 status)
+ {
++	struct nvmet_sq *sq = req->sq;
++
+ 	__nvmet_req_complete(req, status);
+-	percpu_ref_put(&req->sq->ref);
++	percpu_ref_put(&sq->ref);
+ }
+ EXPORT_SYMBOL_GPL(nvmet_req_complete);
+ 
+diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
+index 8b587fc97f7bc..c22cc20db1a74 100644
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -911,7 +911,7 @@ static int pci_pm_resume_noirq(struct device *dev)
+ 	pcie_pme_root_status_cleanup(pci_dev);
+ 
+ 	if (!skip_bus_pm && prev_state == PCI_D3cold)
+-		pci_bridge_wait_for_secondary_bus(pci_dev);
++		pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
+ 
+ 	if (pci_has_legacy_pm_support(pci_dev))
+ 		return 0;
+@@ -1298,7 +1298,7 @@ static int pci_pm_runtime_resume(struct device *dev)
+ 	pci_pm_default_resume(pci_dev);
+ 
+ 	if (prev_state == PCI_D3cold)
+-		pci_bridge_wait_for_secondary_bus(pci_dev);
++		pci_bridge_wait_for_secondary_bus(pci_dev, "resume", PCI_RESET_WAIT);
+ 
+ 	if (pm && pm->runtime_resume)
+ 		error = pm->runtime_resume(dev);
+diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
+index 744a2e05635b9..d37013d007b6e 100644
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -157,9 +157,6 @@ static int __init pcie_port_pm_setup(char *str)
+ }
+ __setup("pcie_port_pm=", pcie_port_pm_setup);
+ 
+-/* Time to wait after a reset for device to become responsive */
+-#define PCIE_RESET_READY_POLL_MS 60000
+-
+ /**
+  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
+  * @bus: pointer to PCI bus structure to search
+@@ -1221,7 +1218,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+ 			return -ENOTTY;
+ 		}
+ 
+-		if (delay > 1000)
++		if (delay > PCI_RESET_WAIT)
+ 			pci_info(dev, "not ready %dms after %s; waiting\n",
+ 				 delay - 1, reset_type);
+ 
+@@ -1230,7 +1227,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
+ 		pci_read_config_dword(dev, PCI_COMMAND, &id);
+ 	}
+ 
+-	if (delay > 1000)
++	if (delay > PCI_RESET_WAIT)
+ 		pci_info(dev, "ready %dms after %s\n", delay - 1,
+ 			 reset_type);
+ 
+@@ -4792,24 +4789,31 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
+ /**
+  * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+  * @dev: PCI bridge
++ * @reset_type: reset type in human-readable form
++ * @timeout: maximum time to wait for devices on secondary bus (milliseconds)
+  *
+  * Handle necessary delays before access to the devices on the secondary
+- * side of the bridge are permitted after D3cold to D0 transition.
++ * side of the bridge are permitted after D3cold to D0 transition
++ * or Conventional Reset.
+  *
+  * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
+  * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
+  * 4.3.2.
++ *
++ * Return 0 on success or -ENOTTY if the first device on the secondary bus
++ * failed to become accessible.
+  */
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++				      int timeout)
+ {
+ 	struct pci_dev *child;
+ 	int delay;
+ 
+ 	if (pci_dev_is_disconnected(dev))
+-		return;
++		return 0;
+ 
+ 	if (!pci_is_bridge(dev))
+-		return;
++		return 0;
+ 
+ 	down_read(&pci_bus_sem);
+ 
+@@ -4821,14 +4825,14 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ 	 */
+ 	if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
+ 		up_read(&pci_bus_sem);
+-		return;
++		return 0;
+ 	}
+ 
+ 	/* Take d3cold_delay requirements into account */
+ 	delay = pci_bus_max_d3cold_delay(dev->subordinate);
+ 	if (!delay) {
+ 		up_read(&pci_bus_sem);
+-		return;
++		return 0;
+ 	}
+ 
+ 	child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
+@@ -4837,14 +4841,12 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ 
+ 	/*
+ 	 * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
+-	 * accessing the device after reset (that is 1000 ms + 100 ms). In
+-	 * practice this should not be needed because we don't do power
+-	 * management for them (see pci_bridge_d3_possible()).
++	 * accessing the device after reset (that is 1000 ms + 100 ms).
+ 	 */
+ 	if (!pci_is_pcie(dev)) {
+ 		pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
+ 		msleep(1000 + delay);
+-		return;
++		return 0;
+ 	}
+ 
+ 	/*
+@@ -4861,11 +4863,11 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ 	 * configuration requests if we only wait for 100 ms (see
+ 	 * https://bugzilla.kernel.org/show_bug.cgi?id=203885).
+ 	 *
+-	 * Therefore we wait for 100 ms and check for the device presence.
+-	 * If it is still not present give it an additional 100 ms.
++	 * Therefore we wait for 100 ms and check for the device presence
++	 * until the timeout expires.
+ 	 */
+ 	if (!pcie_downstream_port(dev))
+-		return;
++		return 0;
+ 
+ 	if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
+ 		pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
+@@ -4876,14 +4878,11 @@ void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+ 		if (!pcie_wait_for_link_delay(dev, true, delay)) {
+ 			/* Did not train, no need to wait any further */
+ 			pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n");
+-			return;
++			return -ENOTTY;
+ 		}
+ 	}
+ 
+-	if (!pci_device_is_present(child)) {
+-		pci_dbg(child, "waiting additional %d ms to become accessible\n", delay);
+-		msleep(delay);
+-	}
++	return pci_dev_wait(child, reset_type, timeout - delay);
+ }
+ 
+ void pci_reset_secondary_bus(struct pci_dev *dev)
+@@ -4902,15 +4901,6 @@ void pci_reset_secondary_bus(struct pci_dev *dev)
+ 
+ 	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ 	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+-
+-	/*
+-	 * Trhfa for conventional PCI is 2^25 clock cycles.
+-	 * Assuming a minimum 33MHz clock this results in a 1s
+-	 * delay before we can consider subordinate devices to
+-	 * be re-initialized.  PCIe has some ways to shorten this,
+-	 * but we don't make use of them yet.
+-	 */
+-	ssleep(1);
+ }
+ 
+ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
+@@ -4929,7 +4919,8 @@ int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
+ {
+ 	pcibios_reset_secondary_bus(dev);
+ 
+-	return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
++	return pci_bridge_wait_for_secondary_bus(dev, "bus reset",
++						 PCIE_RESET_READY_POLL_MS);
+ }
+ EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
+ 
+diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
+index 9197d7362731e..72436000ff252 100644
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -47,6 +47,19 @@ int pci_bus_error_reset(struct pci_dev *dev);
+ #define PCI_PM_D3HOT_WAIT       10	/* msec */
+ #define PCI_PM_D3COLD_WAIT      100	/* msec */
+ 
++/*
++ * Following exit from Conventional Reset, devices must be ready within 1 sec
++ * (PCIe r6.0 sec 6.6.1).  A D3cold to D0 transition implies a Conventional
++ * Reset (PCIe r6.0 sec 5.8).
++ */
++#define PCI_RESET_WAIT		1000	/* msec */
++/*
++ * Devices may extend the 1 sec period through Request Retry Status completions
++ * (PCIe r6.0 sec 2.3.1).  The spec does not provide an upper limit, but 60 sec
++ * ought to be enough for any device to become responsive.
++ */
++#define PCIE_RESET_READY_POLL_MS 60000	/* msec */
++
+ /**
+  * struct pci_platform_pm_ops - Firmware PM callbacks
+  *
+@@ -108,7 +121,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev);
+ void pci_free_cap_save_buffers(struct pci_dev *dev);
+ bool pci_bridge_d3_possible(struct pci_dev *dev);
+ void pci_bridge_d3_update(struct pci_dev *dev);
+-void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
++int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type,
++				      int timeout);
+ 
+ static inline void pci_wakeup_event(struct pci_dev *dev)
+ {
+diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
+index c556e7beafe38..f21d64ae4ffcc 100644
+--- a/drivers/pci/pcie/dpc.c
++++ b/drivers/pci/pcie/dpc.c
+@@ -170,8 +170,8 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
+ 	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+ 			      PCI_EXP_DPC_STATUS_TRIGGER);
+ 
+-	if (!pcie_wait_for_link(pdev, true)) {
+-		pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n");
++	if (pci_bridge_wait_for_secondary_bus(pdev, "DPC",
++					      PCIE_RESET_READY_POLL_MS)) {
+ 		clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
+ 		ret = PCI_ERS_RESULT_DISCONNECT;
+ 	} else {
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index fae0323242103..18321cf9db5d6 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -322,10 +322,7 @@ static void scsi_host_dev_release(struct device *dev)
+ 	struct Scsi_Host *shost = dev_to_shost(dev);
+ 	struct device *parent = dev->parent;
+ 
+-	/* In case scsi_remove_host() has not been called. */
+-	scsi_proc_hostdir_rm(shost->hostt);
+-
+-	/* Wait for functions invoked through call_rcu(&shost->rcu, ...) */
++	/* Wait for functions invoked through call_rcu(&scmd->rcu, ...) */
+ 	rcu_barrier();
+ 
+ 	if (shost->tmf_work_q)
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
+index b58f4d9c296a3..326265fd7f91a 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
+@@ -670,7 +670,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ 		goto out_fail;
+ 	}
+ 	port = sas_port_alloc_num(sas_node->parent_dev);
+-	if ((sas_port_add(port))) {
++	if (!port || (sas_port_add(port))) {
+ 		ioc_err(ioc, "failure at %s:%d/%s()!\n",
+ 			__FILE__, __LINE__, __func__);
+ 		goto out_fail;
+@@ -695,6 +695,12 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ 		rphy = sas_expander_alloc(port,
+ 		    mpt3sas_port->remote_identify.device_type);
+ 
++	if (!rphy) {
++		ioc_err(ioc, "failure at %s:%d/%s()!\n",
++			__FILE__, __LINE__, __func__);
++		goto out_delete_port;
++	}
++
+ 	rphy->identify = mpt3sas_port->remote_identify;
+ 
+ 	if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) {
+@@ -714,6 +720,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ 			__FILE__, __LINE__, __func__);
+ 		sas_rphy_free(rphy);
+ 		rphy = NULL;
++		goto out_delete_port;
+ 	}
+ 
+ 	if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) {
+@@ -740,7 +747,10 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
+ 		    rphy_to_expander_device(rphy));
+ 	return mpt3sas_port;
+ 
+- out_fail:
++out_delete_port:
++	sas_port_delete(port);
++
++out_fail:
+ 	list_for_each_entry_safe(mpt3sas_phy, next, &mpt3sas_port->phy_list,
+ 	    port_siblings)
+ 		list_del(&mpt3sas_phy->port_siblings);
+diff --git a/drivers/tty/serial/8250/8250_em.c b/drivers/tty/serial/8250/8250_em.c
+index f8e99995eee91..d94c3811a8f7a 100644
+--- a/drivers/tty/serial/8250/8250_em.c
++++ b/drivers/tty/serial/8250/8250_em.c
+@@ -106,8 +106,8 @@ static int serial8250_em_probe(struct platform_device *pdev)
+ 	memset(&up, 0, sizeof(up));
+ 	up.port.mapbase = regs->start;
+ 	up.port.irq = irq;
+-	up.port.type = PORT_UNKNOWN;
+-	up.port.flags = UPF_BOOT_AUTOCONF | UPF_FIXED_PORT | UPF_IOREMAP;
++	up.port.type = PORT_16750;
++	up.port.flags = UPF_FIXED_PORT | UPF_IOREMAP | UPF_FIXED_TYPE;
+ 	up.port.dev = &pdev->dev;
+ 	up.port.private_data = priv;
+ 
+diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
+index 9cb0e8673f826..32cce52800a73 100644
+--- a/drivers/tty/serial/fsl_lpuart.c
++++ b/drivers/tty/serial/fsl_lpuart.c
+@@ -2159,9 +2159,15 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios,
+ 	/* update the per-port timeout */
+ 	uart_update_timeout(port, termios->c_cflag, baud);
+ 
+-	/* wait transmit engin complete */
+-	lpuart32_write(&sport->port, 0, UARTMODIR);
+-	lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC);
++	/*
++	 * LPUART Transmission Complete Flag may never be set while queuing a break
++	 * character, so skip waiting for transmission complete when UARTCTRL_SBK is
++	 * asserted.
++	 */
++	if (!(old_ctrl & UARTCTRL_SBK)) {
++		lpuart32_write(&sport->port, 0, UARTMODIR);
++		lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC);
++	}
+ 
+ 	/* disable transmit and receive */
+ 	lpuart32_write(&sport->port, old_ctrl & ~(UARTCTRL_TE | UARTCTRL_RE),
+diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
+index 3feb6e40d56d8..ef8a4c5fc6875 100644
+--- a/drivers/video/fbdev/stifb.c
++++ b/drivers/video/fbdev/stifb.c
+@@ -921,6 +921,28 @@ SETUP_HCRX(struct stifb_info *fb)
+ 
+ /* ------------------- driver specific functions --------------------------- */
+ 
++static int
++stifb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
++{
++	struct stifb_info *fb = container_of(info, struct stifb_info, info);
++
++	if (var->xres != fb->info.var.xres ||
++	    var->yres != fb->info.var.yres ||
++	    var->bits_per_pixel != fb->info.var.bits_per_pixel)
++		return -EINVAL;
++
++	var->xres_virtual = var->xres;
++	var->yres_virtual = var->yres;
++	var->xoffset = 0;
++	var->yoffset = 0;
++	var->grayscale = fb->info.var.grayscale;
++	var->red.length = fb->info.var.red.length;
++	var->green.length = fb->info.var.green.length;
++	var->blue.length = fb->info.var.blue.length;
++
++	return 0;
++}
++
+ static int
+ stifb_setcolreg(u_int regno, u_int red, u_int green,
+ 	      u_int blue, u_int transp, struct fb_info *info)
+@@ -1145,6 +1167,7 @@ stifb_init_display(struct stifb_info *fb)
+ 
+ static const struct fb_ops stifb_ops = {
+ 	.owner		= THIS_MODULE,
++	.fb_check_var	= stifb_check_var,
+ 	.fb_setcolreg	= stifb_setcolreg,
+ 	.fb_blank	= stifb_blank,
+ 	.fb_fillrect	= stifb_fillrect,
+@@ -1164,6 +1187,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
+ 	struct stifb_info *fb;
+ 	struct fb_info *info;
+ 	unsigned long sti_rom_address;
++	char modestr[32];
+ 	char *dev_name;
+ 	int bpp, xres, yres;
+ 
+@@ -1342,6 +1366,9 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
+ 	info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
+ 	info->pseudo_palette = &fb->pseudo_palette;
+ 
++	scnprintf(modestr, sizeof(modestr), "%dx%d-%d", xres, yres, bpp);
++	fb_find_mode(&info->var, info, modestr, NULL, 0, NULL, bpp);
++
+ 	/* This has to be done !!! */
+ 	if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0))
+ 		goto out_err1;
+diff --git a/fs/attr.c b/fs/attr.c
+index 848ffe6e3c24b..326a0db3296d7 100644
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -18,6 +18,65 @@
+ #include <linux/evm.h>
+ #include <linux/ima.h>
+ 
++#include "internal.h"
++
++/**
++ * setattr_should_drop_sgid - determine whether the setgid bit needs to be
++ *                            removed
++ * @inode:	inode to check
++ *
++ * This function determines whether the setgid bit needs to be removed.
++ * We retain backwards compatibility and require setgid bit to be removed
++ * unconditionally if S_IXGRP is set. Otherwise we have the exact same
++ * requirements as setattr_prepare() and setattr_copy().
++ *
++ * Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise.
++ */
++int setattr_should_drop_sgid(const struct inode *inode)
++{
++	umode_t mode = inode->i_mode;
++
++	if (!(mode & S_ISGID))
++		return 0;
++	if (mode & S_IXGRP)
++		return ATTR_KILL_SGID;
++	if (!in_group_or_capable(inode, inode->i_gid))
++		return ATTR_KILL_SGID;
++	return 0;
++}
++
++/**
++ * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to
++ *                               be dropped
++ * @inode:	inode to check
++ *
++ * This function determines whether the set{g,u}id bits need to be removed.
++ * If the setuid bit needs to be removed ATTR_KILL_SUID is returned. If the
++ * setgid bit needs to be removed ATTR_KILL_SGID is returned. If both
++ * set{g,u}id bits need to be removed the corresponding mask of both flags is
++ * returned.
++ *
++ * Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits
++ * to remove, 0 otherwise.
++ */
++int setattr_should_drop_suidgid(struct inode *inode)
++{
++	umode_t mode = inode->i_mode;
++	int kill = 0;
++
++	/* suid always must be killed */
++	if (unlikely(mode & S_ISUID))
++		kill = ATTR_KILL_SUID;
++
++	kill |= setattr_should_drop_sgid(inode);
++
++	if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
++		return kill;
++
++	return 0;
++}
++EXPORT_SYMBOL(setattr_should_drop_suidgid);
++
+ static bool chown_ok(const struct inode *inode, kuid_t uid)
+ {
+ 	if (uid_eq(current_fsuid(), inode->i_uid) &&
+@@ -90,9 +149,8 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr)
+ 		if (!inode_owner_or_capable(inode))
+ 			return -EPERM;
+ 		/* Also check the setgid bit! */
+-		if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
+-				inode->i_gid) &&
+-		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
++		if (!in_group_or_capable(inode, (ia_valid & ATTR_GID) ?
++						attr->ia_gid : inode->i_gid))
+ 			attr->ia_mode &= ~S_ISGID;
+ 	}
+ 
+@@ -193,9 +251,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
+ 		inode->i_ctime = attr->ia_ctime;
+ 	if (ia_valid & ATTR_MODE) {
+ 		umode_t mode = attr->ia_mode;
+-
+-		if (!in_group_p(inode->i_gid) &&
+-		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
++		if (!in_group_or_capable(inode, inode->i_gid))
+ 			mode &= ~S_ISGID;
+ 		inode->i_mode = mode;
+ 	}
+@@ -297,7 +353,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
+ 		}
+ 	}
+ 	if (ia_valid & ATTR_KILL_SGID) {
+-		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
++		if (mode & S_ISGID) {
+ 			if (!(ia_valid & ATTR_MODE)) {
+ 				ia_valid = attr->ia_valid |= ATTR_MODE;
+ 				attr->ia_mode = inode->i_mode;
+diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
+index 97cd4df040608..e11818801148a 100644
+--- a/fs/cifs/smb2inode.c
++++ b/fs/cifs/smb2inode.c
+@@ -236,15 +236,32 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
+ 		size[0] = 8; /* sizeof __le64 */
+ 		data[0] = ptr;
+ 
+-		rc = SMB2_set_info_init(tcon, server,
+-					&rqst[num_rqst], COMPOUND_FID,
+-					COMPOUND_FID, current->tgid,
+-					FILE_END_OF_FILE_INFORMATION,
+-					SMB2_O_INFO_FILE, 0, data, size);
++		if (cfile) {
++			rc = SMB2_set_info_init(tcon, server,
++						&rqst[num_rqst],
++						cfile->fid.persistent_fid,
++						cfile->fid.volatile_fid,
++						current->tgid,
++						FILE_END_OF_FILE_INFORMATION,
++						SMB2_O_INFO_FILE, 0,
++						data, size);
++		} else {
++			rc = SMB2_set_info_init(tcon, server,
++						&rqst[num_rqst],
++						COMPOUND_FID,
++						COMPOUND_FID,
++						current->tgid,
++						FILE_END_OF_FILE_INFORMATION,
++						SMB2_O_INFO_FILE, 0,
++						data, size);
++			if (!rc) {
++				smb2_set_next_command(tcon, &rqst[num_rqst]);
++				smb2_set_related(&rqst[num_rqst]);
++			}
++		}
+ 		if (rc)
+ 			goto finished;
+-		smb2_set_next_command(tcon, &rqst[num_rqst]);
+-		smb2_set_related(&rqst[num_rqst++]);
++		num_rqst++;
+ 		trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path);
+ 		break;
+ 	case SMB2_OP_SET_INFO:
+diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
+index b137006f0fd25..4409f56fc37e6 100644
+--- a/fs/cifs/transport.c
++++ b/fs/cifs/transport.c
+@@ -312,7 +312,7 @@ static int
+ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ 		struct smb_rqst *rqst)
+ {
+-	int rc = 0;
++	int rc;
+ 	struct kvec *iov;
+ 	int n_vec;
+ 	unsigned int send_length = 0;
+@@ -323,6 +323,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ 	struct msghdr smb_msg = {};
+ 	__be32 rfc1002_marker;
+ 
++	cifs_in_send_inc(server);
+ 	if (cifs_rdma_enabled(server)) {
+ 		/* return -EAGAIN when connecting or reconnecting */
+ 		rc = -EAGAIN;
+@@ -331,14 +332,17 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ 		goto smbd_done;
+ 	}
+ 
++	rc = -EAGAIN;
+ 	if (ssocket == NULL)
+-		return -EAGAIN;
++		goto out;
+ 
++	rc = -ERESTARTSYS;
+ 	if (fatal_signal_pending(current)) {
+ 		cifs_dbg(FYI, "signal pending before send request\n");
+-		return -ERESTARTSYS;
++		goto out;
+ 	}
+ 
++	rc = 0;
+ 	/* cork the socket */
+ 	tcp_sock_set_cork(ssocket->sk, true);
+ 
+@@ -449,7 +453,8 @@ smbd_done:
+ 			 rc);
+ 	else if (rc > 0)
+ 		rc = 0;
+-
++out:
++	cifs_in_send_dec(server);
+ 	return rc;
+ }
+ 
+@@ -826,9 +831,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
+ 	 * I/O response may come back and free the mid entry on another thread.
+ 	 */
+ 	cifs_save_when_sent(mid);
+-	cifs_in_send_inc(server);
+ 	rc = smb_send_rqst(server, 1, rqst, flags);
+-	cifs_in_send_dec(server);
+ 
+ 	if (rc < 0) {
+ 		revert_current_mid(server, mid->credits);
+@@ -1117,9 +1120,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ 		else
+ 			midQ[i]->callback = cifs_compound_last_callback;
+ 	}
+-	cifs_in_send_inc(server);
+ 	rc = smb_send_rqst(server, num_rqst, rqst, flags);
+-	cifs_in_send_dec(server);
+ 
+ 	for (i = 0; i < num_rqst; i++)
+ 		cifs_save_when_sent(midQ[i]);
+@@ -1356,9 +1357,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
+ 
+ 	midQ->mid_state = MID_REQUEST_SUBMITTED;
+ 
+-	cifs_in_send_inc(server);
+ 	rc = smb_send(server, in_buf, len);
+-	cifs_in_send_dec(server);
+ 	cifs_save_when_sent(midQ);
+ 
+ 	if (rc < 0)
+@@ -1495,9 +1494,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
+ 	}
+ 
+ 	midQ->mid_state = MID_REQUEST_SUBMITTED;
+-	cifs_in_send_inc(server);
+ 	rc = smb_send(server, in_buf, len);
+-	cifs_in_send_dec(server);
+ 	cifs_save_when_sent(midQ);
+ 
+ 	if (rc < 0)
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 1a654a1f3f46b..6ba185b46ba39 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4721,13 +4721,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ 		goto bad_inode;
+ 	raw_inode = ext4_raw_inode(&iloc);
+ 
+-	if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
+-		ext4_error_inode(inode, function, line, 0,
+-				 "iget: root inode unallocated");
+-		ret = -EFSCORRUPTED;
+-		goto bad_inode;
+-	}
+-
+ 	if ((flags & EXT4_IGET_HANDLE) &&
+ 	    (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
+ 		ret = -ESTALE;
+@@ -4800,11 +4793,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ 	 * NeilBrown 1999oct15
+ 	 */
+ 	if (inode->i_nlink == 0) {
+-		if ((inode->i_mode == 0 ||
++		if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
+ 		     !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
+ 		    ino != EXT4_BOOT_LOADER_INO) {
+-			/* this inode is deleted */
+-			ret = -ESTALE;
++			/* this inode is deleted or unallocated */
++			if (flags & EXT4_IGET_SPECIAL) {
++				ext4_error_inode(inode, function, line, 0,
++						 "iget: special inode unallocated");
++				ret = -EFSCORRUPTED;
++			} else
++				ret = -ESTALE;
+ 			goto bad_inode;
+ 		}
+ 		/* The only unlinked inodes we let through here have
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 1f47aeca71422..45f719c1e0023 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -3934,10 +3934,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
+ 				goto end_rename;
+ 		}
+ 		retval = ext4_rename_dir_prepare(handle, &old);
+-		if (retval) {
+-			inode_unlock(old.inode);
++		if (retval)
+ 			goto end_rename;
+-		}
+ 	}
+ 	/*
+ 	 * If we're renaming a file within an inline_data dir and adding or
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 60e122761352c..f3da1f2d4cb93 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -386,6 +386,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ 	struct inode *inode;
+ 	int err;
+ 
++	/*
++	 * We have to check for this corruption early as otherwise
++	 * iget_locked() could wait indefinitely for the state of our
++	 * parent inode.
++	 */
++	if (parent->i_ino == ea_ino) {
++		ext4_error(parent->i_sb,
++			   "Parent and EA inode have the same ino %lu", ea_ino);
++		return -EFSCORRUPTED;
++	}
++
+ 	inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
+ 	if (IS_ERR(inode)) {
+ 		err = PTR_ERR(inode);
+diff --git a/fs/inode.c b/fs/inode.c
+index 9f49e0bdc2f77..7ec90788d8be9 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -1854,35 +1854,6 @@ skip_update:
+ }
+ EXPORT_SYMBOL(touch_atime);
+ 
+-/*
+- * The logic we want is
+- *
+- *	if suid or (sgid and xgrp)
+- *		remove privs
+- */
+-int should_remove_suid(struct dentry *dentry)
+-{
+-	umode_t mode = d_inode(dentry)->i_mode;
+-	int kill = 0;
+-
+-	/* suid always must be killed */
+-	if (unlikely(mode & S_ISUID))
+-		kill = ATTR_KILL_SUID;
+-
+-	/*
+-	 * sgid without any exec bits is just a mandatory locking mark; leave
+-	 * it alone.  If some exec bits are set, it's a real sgid; kill it.
+-	 */
+-	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+-		kill |= ATTR_KILL_SGID;
+-
+-	if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
+-		return kill;
+-
+-	return 0;
+-}
+-EXPORT_SYMBOL(should_remove_suid);
+-
+ /*
+  * Return mask of changes for notify_change() that need to be done as a
+  * response to write or truncate. Return 0 if nothing has to be changed.
+@@ -1897,7 +1868,7 @@ int dentry_needs_remove_privs(struct dentry *dentry)
+ 	if (IS_NOSEC(inode))
+ 		return 0;
+ 
+-	mask = should_remove_suid(dentry);
++	mask = setattr_should_drop_suidgid(inode);
+ 	ret = security_inode_need_killpriv(dentry);
+ 	if (ret < 0)
+ 		return ret;
+@@ -2147,10 +2118,6 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
+ 		/* Directories are special, and always inherit S_ISGID */
+ 		if (S_ISDIR(mode))
+ 			mode |= S_ISGID;
+-		else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
+-			 !in_group_p(inode->i_gid) &&
+-			 !capable_wrt_inode_uidgid(dir, CAP_FSETID))
+-			mode &= ~S_ISGID;
+ 	} else
+ 		inode->i_gid = current_fsgid();
+ 	inode->i_mode = mode;
+@@ -2382,3 +2349,48 @@ int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
+ 	return 0;
+ }
+ EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
++
++/**
++ * in_group_or_capable - check whether caller is CAP_FSETID privileged
++ * @inode:	inode to check
++ * @gid:	the new/current gid of @inode
++ *
++ * Check wether @gid is in the caller's group list or if the caller is
++ * privileged with CAP_FSETID over @inode. This can be used to determine
++ * whether the setgid bit can be kept or must be dropped.
++ *
++ * Return: true if the caller is sufficiently privileged, false if not.
++ */
++bool in_group_or_capable(const struct inode *inode, kgid_t gid)
++{
++	if (in_group_p(gid))
++		return true;
++	if (capable_wrt_inode_uidgid(inode, CAP_FSETID))
++		return true;
++	return false;
++}
++
++/**
++ * mode_strip_sgid - handle the sgid bit for non-directories
++ * @dir: parent directory inode
++ * @mode: mode of the file to be created in @dir
++ *
++ * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
++ * raised and @dir has the S_ISGID bit raised ensure that the caller is
++ * either in the group of the parent directory or they have CAP_FSETID
++ * in their user namespace and are privileged over the parent directory.
++ * In all other cases, strip the S_ISGID bit from @mode.
++ *
++ * Return: the new mode to use for the file
++ */
++umode_t mode_strip_sgid(const struct inode *dir, umode_t mode)
++{
++	if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
++		return mode;
++	if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
++		return mode;
++	if (in_group_or_capable(dir, dir->i_gid))
++		return mode;
++	return mode & ~S_ISGID;
++}
++EXPORT_SYMBOL(mode_strip_sgid);
+diff --git a/fs/internal.h b/fs/internal.h
+index 06d313b9beecb..d5d9fcdae10c4 100644
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -149,6 +149,7 @@ extern int vfs_open(const struct path *, struct file *);
+ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
+ extern void inode_add_lru(struct inode *inode);
+ extern int dentry_needs_remove_privs(struct dentry *dentry);
++bool in_group_or_capable(const struct inode *inode, kgid_t gid);
+ 
+ /*
+  * fs-writeback.c
+@@ -196,3 +197,8 @@ int sb_init_dio_done_wq(struct super_block *sb);
+  */
+ int do_statx(int dfd, const char __user *filename, unsigned flags,
+ 	     unsigned int mask, struct statx __user *buffer);
++
++/*
++ * fs/attr.c
++ */
++int setattr_should_drop_sgid(const struct inode *inode);
+diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
+index bd7d58d27bfc6..97a3c09fd96b6 100644
+--- a/fs/jffs2/file.c
++++ b/fs/jffs2/file.c
+@@ -138,19 +138,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+ 	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ 	pgoff_t index = pos >> PAGE_SHIFT;
+-	uint32_t pageofs = index << PAGE_SHIFT;
+ 	int ret = 0;
+ 
+ 	jffs2_dbg(1, "%s()\n", __func__);
+ 
+-	if (pageofs > inode->i_size) {
+-		/* Make new hole frag from old EOF to new page */
++	if (pos > inode->i_size) {
++		/* Make new hole frag from old EOF to new position */
+ 		struct jffs2_raw_inode ri;
+ 		struct jffs2_full_dnode *fn;
+ 		uint32_t alloc_len;
+ 
+-		jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
+-			  (unsigned int)inode->i_size, pageofs);
++		jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new position\n",
++			  (unsigned int)inode->i_size, (uint32_t)pos);
+ 
+ 		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+ 					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+@@ -170,10 +169,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ 		ri.mode = cpu_to_jemode(inode->i_mode);
+ 		ri.uid = cpu_to_je16(i_uid_read(inode));
+ 		ri.gid = cpu_to_je16(i_gid_read(inode));
+-		ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
++		ri.isize = cpu_to_je32((uint32_t)pos);
+ 		ri.atime = ri.ctime = ri.mtime = cpu_to_je32(JFFS2_NOW());
+ 		ri.offset = cpu_to_je32(inode->i_size);
+-		ri.dsize = cpu_to_je32(pageofs - inode->i_size);
++		ri.dsize = cpu_to_je32((uint32_t)pos - inode->i_size);
+ 		ri.csize = cpu_to_je32(0);
+ 		ri.compr = JFFS2_COMPR_ZERO;
+ 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
+@@ -203,7 +202,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+ 			goto out_err;
+ 		}
+ 		jffs2_complete_reservation(c);
+-		inode->i_size = pageofs;
++		inode->i_size = pos;
+ 		mutex_unlock(&f->sem);
+ 	}
+ 
+diff --git a/fs/namei.c b/fs/namei.c
+index 4159c140fa473..3d98db9802a77 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -2798,6 +2798,63 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
+ }
+ EXPORT_SYMBOL(unlock_rename);
+ 
++/**
++ * mode_strip_umask - handle vfs umask stripping
++ * @dir:	parent directory of the new inode
++ * @mode:	mode of the new inode to be created in @dir
++ *
++ * Umask stripping depends on whether or not the filesystem supports POSIX
++ * ACLs. If the filesystem doesn't support it umask stripping is done directly
++ * in here. If the filesystem does support POSIX ACLs umask stripping is
++ * deferred until the filesystem calls posix_acl_create().
++ *
++ * Returns: mode
++ */
++static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
++{
++	if (!IS_POSIXACL(dir))
++		mode &= ~current_umask();
++	return mode;
++}
++
++/**
++ * vfs_prepare_mode - prepare the mode to be used for a new inode
++ * @dir:	parent directory of the new inode
++ * @mode:	mode of the new inode
++ * @mask_perms:	allowed permission by the vfs
++ * @type:	type of file to be created
++ *
++ * This helper consolidates and enforces vfs restrictions on the @mode of a new
++ * object to be created.
++ *
++ * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
++ * the kernel documentation for mode_strip_umask()). Moving umask stripping
++ * after setgid stripping allows the same ordering for both non-POSIX ACL and
++ * POSIX ACL supporting filesystems.
++ *
++ * Note that it's currently valid for @type to be 0 if a directory is created.
++ * Filesystems raise that flag individually and we need to check whether each
++ * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
++ * non-zero type.
++ *
++ * Returns: mode to be passed to the filesystem
++ */
++static inline umode_t vfs_prepare_mode(const struct inode *dir, umode_t mode,
++				       umode_t mask_perms, umode_t type)
++{
++	mode = mode_strip_sgid(dir, mode);
++	mode = mode_strip_umask(dir, mode);
++
++	/*
++	 * Apply the vfs mandated allowed permission mask and set the type of
++	 * file to be created before we call into the filesystem.
++	 */
++	mode &= (mask_perms & ~S_IFMT);
++	mode |= (type & S_IFMT);
++
++	return mode;
++}
++
+ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ 		bool want_excl)
+ {
+@@ -2807,8 +2864,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ 
+ 	if (!dir->i_op->create)
+ 		return -EACCES;	/* shouldn't it be ENOSYS? */
+-	mode &= S_IALLUGO;
+-	mode |= S_IFREG;
++
++	mode = vfs_prepare_mode(dir, mode, S_IALLUGO, S_IFREG);
+ 	error = security_inode_create(dir, dentry, mode);
+ 	if (error)
+ 		return error;
+@@ -3072,8 +3129,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
+ 	if (open_flag & O_CREAT) {
+ 		if (open_flag & O_EXCL)
+ 			open_flag &= ~O_TRUNC;
+-		if (!IS_POSIXACL(dir->d_inode))
+-			mode &= ~current_umask();
++		mode = vfs_prepare_mode(dir->d_inode, mode, mode, mode);
+ 		if (likely(got_write))
+ 			create_error = may_o_create(&nd->path, dentry, mode);
+ 		else
+@@ -3286,8 +3342,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
+ 	child = d_alloc(dentry, &slash_name);
+ 	if (unlikely(!child))
+ 		goto out_err;
+-	if (!IS_POSIXACL(dir))
+-		mode &= ~current_umask();
++	mode = vfs_prepare_mode(dir, mode, mode, mode);
+ 	error = dir->i_op->tmpfile(dir, child, mode);
+ 	if (error)
+ 		goto out_err;
+@@ -3548,6 +3603,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+ 	if (!dir->i_op->mknod)
+ 		return -EPERM;
+ 
++	mode = vfs_prepare_mode(dir, mode, mode, mode);
+ 	error = devcgroup_inode_mknod(mode, dev);
+ 	if (error)
+ 		return error;
+@@ -3596,9 +3652,8 @@ retry:
+ 	if (IS_ERR(dentry))
+ 		return PTR_ERR(dentry);
+ 
+-	if (!IS_POSIXACL(path.dentry->d_inode))
+-		mode &= ~current_umask();
+-	error = security_path_mknod(&path, dentry, mode, dev);
++	error = security_path_mknod(&path, dentry,
++			mode_strip_umask(path.dentry->d_inode, mode), dev);
+ 	if (error)
+ 		goto out;
+ 	switch (mode & S_IFMT) {
+@@ -3646,7 +3701,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+ 	if (!dir->i_op->mkdir)
+ 		return -EPERM;
+ 
+-	mode &= (S_IRWXUGO|S_ISVTX);
++	mode = vfs_prepare_mode(dir, mode, S_IRWXUGO | S_ISVTX, 0);
+ 	error = security_inode_mkdir(dir, dentry, mode);
+ 	if (error)
+ 		return error;
+@@ -3673,9 +3728,8 @@ retry:
+ 	if (IS_ERR(dentry))
+ 		return PTR_ERR(dentry);
+ 
+-	if (!IS_POSIXACL(path.dentry->d_inode))
+-		mode &= ~current_umask();
+-	error = security_path_mkdir(&path, dentry, mode);
++	error = security_path_mkdir(&path, dentry,
++			mode_strip_umask(path.dentry->d_inode, mode));
+ 	if (!error)
+ 		error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+ 	done_path_create(&path, dentry);
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index 1470b49adb2db..ca00cac5a12f7 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1994,7 +1994,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
+ 		}
+ 	}
+ 
+-	if (file && should_remove_suid(file->f_path.dentry)) {
++	if (file && setattr_should_drop_suidgid(file_inode(file))) {
+ 		ret = __ocfs2_write_remove_suid(inode, di_bh);
+ 		if (ret) {
+ 			mlog_errno(ret);
+@@ -2282,7 +2282,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
+ 		 * inode. There's also the dinode i_size state which
+ 		 * can be lost via setattr during extending writes (we
+ 		 * set inode->i_size at the end of a write. */
+-		if (should_remove_suid(dentry)) {
++		if (setattr_should_drop_suidgid(inode)) {
+ 			if (meta_level == 0) {
+ 				ocfs2_inode_unlock_for_extent_tree(inode,
+ 								   &di_bh,
+diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
+index 856474b0a1ae7..df1f6b7aa7979 100644
+--- a/fs/ocfs2/namei.c
++++ b/fs/ocfs2/namei.c
+@@ -198,6 +198,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
+ 	 * callers. */
+ 	if (S_ISDIR(mode))
+ 		set_nlink(inode, 2);
++	mode = mode_strip_sgid(dir, mode);
+ 	inode_init_owner(inode, dir, mode);
+ 	status = dquot_initialize(inode);
+ 	if (status)
+diff --git a/fs/open.c b/fs/open.c
+index b3fbb4300fc96..1ca4b236fdbe0 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -665,10 +665,10 @@ retry_deleg:
+ 		newattrs.ia_valid |= ATTR_GID;
+ 		newattrs.ia_gid = gid;
+ 	}
+-	if (!S_ISDIR(inode->i_mode))
+-		newattrs.ia_valid |=
+-			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+ 	inode_lock(inode);
++	if (!S_ISDIR(inode->i_mode))
++		newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
++				     setattr_should_drop_sgid(inode);
+ 	error = security_path_chown(path, uid, gid);
+ 	if (!error)
+ 		error = notify_change(path->dentry, &newattrs, &delegated_inode);
+diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
+index 24c7d30e41dfe..0926363179a76 100644
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -3190,7 +3190,7 @@ xfs_btree_insrec(
+ 	struct xfs_btree_block	*block;	/* btree block */
+ 	struct xfs_buf		*bp;	/* buffer for block */
+ 	union xfs_btree_ptr	nptr;	/* new block ptr */
+-	struct xfs_btree_cur	*ncur;	/* new btree cursor */
++	struct xfs_btree_cur	*ncur = NULL;	/* new btree cursor */
+ 	union xfs_btree_key	nkey;	/* new block key */
+ 	union xfs_btree_key	*lkey;
+ 	int			optr;	/* old key/record index */
+@@ -3270,7 +3270,7 @@ xfs_btree_insrec(
+ #ifdef DEBUG
+ 	error = xfs_btree_check_block(cur, block, level, bp);
+ 	if (error)
+-		return error;
++		goto error0;
+ #endif
+ 
+ 	/*
+@@ -3290,7 +3290,7 @@ xfs_btree_insrec(
+ 		for (i = numrecs - ptr; i >= 0; i--) {
+ 			error = xfs_btree_debug_check_ptr(cur, pp, i, level);
+ 			if (error)
+-				return error;
++				goto error0;
+ 		}
+ 
+ 		xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
+@@ -3375,6 +3375,8 @@ xfs_btree_insrec(
+ 	return 0;
+ 
+ error0:
++	if (ncur)
++		xfs_btree_del_cursor(ncur, error);
+ 	return error;
+ }
+ 
+diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
+index 7371a7f7c6529..fbab1042bc90b 100644
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -800,9 +800,6 @@ xfs_alloc_file_space(
+ 			quota_flag = XFS_QMOPT_RES_REGBLKS;
+ 		}
+ 
+-		/*
+-		 * Allocate and setup the transaction.
+-		 */
+ 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
+ 				resrtextents, 0, &tp);
+ 
+@@ -830,9 +827,9 @@ xfs_alloc_file_space(
+ 		if (error)
+ 			goto error0;
+ 
+-		/*
+-		 * Complete the transaction
+-		 */
++		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
++		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
++
+ 		error = xfs_trans_commit(tp);
+ 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ 		if (error)
+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
+index 4d6bf8d4974fe..9b6c5ba5fdfb6 100644
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -94,8 +94,6 @@ xfs_update_prealloc_flags(
+ 		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
+ 
+ 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+-	if (flags & XFS_PREALLOC_SYNC)
+-		xfs_trans_set_sync(tp);
+ 	return xfs_trans_commit(tp);
+ }
+ 
+@@ -852,7 +850,6 @@ xfs_file_fallocate(
+ 	struct inode		*inode = file_inode(file);
+ 	struct xfs_inode	*ip = XFS_I(inode);
+ 	long			error;
+-	enum xfs_prealloc_flags	flags = 0;
+ 	uint			iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
+ 	loff_t			new_size = 0;
+ 	bool			do_file_insert = false;
+@@ -897,6 +894,10 @@ xfs_file_fallocate(
+ 			goto out_unlock;
+ 	}
+ 
++	error = file_modified(file);
++	if (error)
++		goto out_unlock;
++
+ 	if (mode & FALLOC_FL_PUNCH_HOLE) {
+ 		error = xfs_free_file_space(ip, offset, len);
+ 		if (error)
+@@ -946,8 +947,6 @@ xfs_file_fallocate(
+ 		}
+ 		do_file_insert = true;
+ 	} else {
+-		flags |= XFS_PREALLOC_SET;
+-
+ 		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ 		    offset + len > i_size_read(inode)) {
+ 			new_size = offset + len;
+@@ -1000,13 +999,6 @@ xfs_file_fallocate(
+ 		}
+ 	}
+ 
+-	if (file->f_flags & O_DSYNC)
+-		flags |= XFS_PREALLOC_SYNC;
+-
+-	error = xfs_update_prealloc_flags(ip, flags);
+-	if (error)
+-		goto out_unlock;
+-
+ 	/* Change file size if needed */
+ 	if (new_size) {
+ 		struct iattr iattr;
+@@ -1024,8 +1016,14 @@ xfs_file_fallocate(
+ 	 * leave shifted extents past EOF and hence losing access to
+ 	 * the data that is contained within them.
+ 	 */
+-	if (do_file_insert)
++	if (do_file_insert) {
+ 		error = xfs_insert_file_space(ip, offset, len);
++		if (error)
++			goto out_unlock;
++	}
++
++	if (file->f_flags & O_DSYNC)
++		error = xfs_log_force_inode(ip);
+ 
+ out_unlock:
+ 	xfs_iunlock(ip, iolock);
+diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
+index 6a3026e78a9bb..69fef29df4284 100644
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -595,37 +595,6 @@ xfs_vn_getattr(
+ 	return 0;
+ }
+ 
+-static void
+-xfs_setattr_mode(
+-	struct xfs_inode	*ip,
+-	struct iattr		*iattr)
+-{
+-	struct inode		*inode = VFS_I(ip);
+-	umode_t			mode = iattr->ia_mode;
+-
+-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+-
+-	inode->i_mode &= S_IFMT;
+-	inode->i_mode |= mode & ~S_IFMT;
+-}
+-
+-void
+-xfs_setattr_time(
+-	struct xfs_inode	*ip,
+-	struct iattr		*iattr)
+-{
+-	struct inode		*inode = VFS_I(ip);
+-
+-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+-
+-	if (iattr->ia_valid & ATTR_ATIME)
+-		inode->i_atime = iattr->ia_atime;
+-	if (iattr->ia_valid & ATTR_CTIME)
+-		inode->i_ctime = iattr->ia_ctime;
+-	if (iattr->ia_valid & ATTR_MTIME)
+-		inode->i_mtime = iattr->ia_mtime;
+-}
+-
+ static int
+ xfs_vn_change_ok(
+ 	struct dentry	*dentry,
+@@ -740,16 +709,6 @@ xfs_setattr_nonsize(
+ 				goto out_cancel;
+ 		}
+ 
+-		/*
+-		 * CAP_FSETID overrides the following restrictions:
+-		 *
+-		 * The set-user-ID and set-group-ID bits of a file will be
+-		 * cleared upon successful return from chown()
+-		 */
+-		if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
+-		    !capable(CAP_FSETID))
+-			inode->i_mode &= ~(S_ISUID|S_ISGID);
+-
+ 		/*
+ 		 * Change the ownerships and register quota modifications
+ 		 * in the transaction.
+@@ -761,7 +720,6 @@ xfs_setattr_nonsize(
+ 				olddquot1 = xfs_qm_vop_chown(tp, ip,
+ 							&ip->i_udquot, udqp);
+ 			}
+-			inode->i_uid = uid;
+ 		}
+ 		if (!gid_eq(igid, gid)) {
+ 			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+@@ -772,15 +730,10 @@ xfs_setattr_nonsize(
+ 				olddquot2 = xfs_qm_vop_chown(tp, ip,
+ 							&ip->i_gdquot, gdqp);
+ 			}
+-			inode->i_gid = gid;
+ 		}
+ 	}
+ 
+-	if (mask & ATTR_MODE)
+-		xfs_setattr_mode(ip, iattr);
+-	if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+-		xfs_setattr_time(ip, iattr);
+-
++	setattr_copy(inode, iattr);
+ 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ 
+ 	XFS_STATS_INC(mp, xs_ig_attrchg);
+@@ -1025,11 +978,8 @@ xfs_setattr_size(
+ 		xfs_inode_clear_eofblocks_tag(ip);
+ 	}
+ 
+-	if (iattr->ia_valid & ATTR_MODE)
+-		xfs_setattr_mode(ip, iattr);
+-	if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+-		xfs_setattr_time(ip, iattr);
+-
++	ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
++	setattr_copy(inode, iattr);
+ 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ 
+ 	XFS_STATS_INC(mp, xs_ig_attrchg);
+diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
+index 4d24ff309f593..dd1bd0332f8e3 100644
+--- a/fs/xfs/xfs_iops.h
++++ b/fs/xfs/xfs_iops.h
+@@ -18,7 +18,6 @@ extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
+  */
+ #define XFS_ATTR_NOACL		0x01	/* Don't call posix_acl_chmod */
+ 
+-extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
+ extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
+ 			       int flags);
+ extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap);
+diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
+index a2a5a0fd92334..402cf828cc919 100644
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -126,7 +126,6 @@ __xfs_free_perag(
+ {
+ 	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+ 
+-	ASSERT(atomic_read(&pag->pag_ref) == 0);
+ 	kmem_free(pag);
+ }
+ 
+@@ -145,7 +144,7 @@ xfs_free_perag(
+ 		pag = radix_tree_delete(&mp->m_perag_tree, agno);
+ 		spin_unlock(&mp->m_perag_lock);
+ 		ASSERT(pag);
+-		ASSERT(atomic_read(&pag->pag_ref) == 0);
++		XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
+ 		xfs_iunlink_destroy(pag);
+ 		xfs_buf_hash_destroy(pag);
+ 		call_rcu(&pag->rcu_head, __xfs_free_perag);
+diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
+index f3082a957d5e1..053b99929f835 100644
+--- a/fs/xfs/xfs_pnfs.c
++++ b/fs/xfs/xfs_pnfs.c
+@@ -164,10 +164,12 @@ xfs_fs_map_blocks(
+ 		 * that the blocks allocated and handed out to the client are
+ 		 * guaranteed to be present even after a server crash.
+ 		 */
+-		error = xfs_update_prealloc_flags(ip,
+-				XFS_PREALLOC_SET | XFS_PREALLOC_SYNC);
++		error = xfs_update_prealloc_flags(ip, XFS_PREALLOC_SET);
++		if (!error)
++			error = xfs_log_force_inode(ip);
+ 		if (error)
+ 			goto out_unlock;
++
+ 	} else {
+ 		xfs_iunlock(ip, lock_flags);
+ 	}
+@@ -283,7 +285,8 @@ xfs_fs_commit_blocks(
+ 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ 
+-	xfs_setattr_time(ip, iattr);
++	ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
++	setattr_copy(inode, iattr);
+ 	if (update_isize) {
+ 		i_size_write(inode, iattr->ia_size);
+ 		ip->i_d.di_size = iattr->ia_size;
+diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
+index 64e5da33733b9..3c17e0c0f8169 100644
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -1318,8 +1318,15 @@ xfs_qm_quotacheck(
+ 
+ 	error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
+ 			NULL);
+-	if (error)
++	if (error) {
++		/*
++		 * The inode walk may have partially populated the dquot
++		 * caches.  We must purge them before disabling quota and
++		 * tearing down the quotainfo, or else the dquots will leak.
++		 */
++		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+ 		goto error_return;
++	}
+ 
+ 	/*
+ 	 * We've made all the changes that we need to make incore.  Flush them
+diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
+index 2195daa289d27..055486e35e68f 100644
+--- a/include/drm/drm_bridge.h
++++ b/include/drm/drm_bridge.h
+@@ -427,11 +427,11 @@ struct drm_bridge_funcs {
+ 	 *
+ 	 * The returned array must be allocated with kmalloc() and will be
+ 	 * freed by the caller. If the allocation fails, NULL should be
+-	 * returned. num_output_fmts must be set to the returned array size.
++	 * returned. num_input_fmts must be set to the returned array size.
+ 	 * Formats listed in the returned array should be listed in decreasing
+ 	 * preference order (the core will try all formats until it finds one
+ 	 * that works). When the format is not supported NULL should be
+-	 * returned and num_output_fmts should be set to 0.
++	 * returned and num_input_fmts should be set to 0.
+ 	 *
+ 	 * This method is called on all elements of the bridge chain as part of
+ 	 * the bus format negotiation process that happens in
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 74e19bccbf738..8ce9e5c61ede8 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1768,6 +1768,7 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
+ extern void inode_init_owner(struct inode *inode, const struct inode *dir,
+ 			umode_t mode);
+ extern bool may_open_dev(const struct path *path);
++umode_t mode_strip_sgid(const struct inode *dir, umode_t mode);
+ 
+ /*
+  * This is the "filldir" function type, used by readdir() to let
+@@ -2959,7 +2960,7 @@ extern void __destroy_inode(struct inode *);
+ extern struct inode *new_inode_pseudo(struct super_block *sb);
+ extern struct inode *new_inode(struct super_block *sb);
+ extern void free_inode_nonrcu(struct inode *inode);
+-extern int should_remove_suid(struct dentry *);
++extern int setattr_should_drop_suidgid(struct inode *);
+ extern int file_remove_privs(struct file *);
+ 
+ extern void __insert_inode_hash(struct inode *, unsigned long hashval);
+@@ -3407,7 +3408,7 @@ int __init get_filesystem_list(char *buf);
+ 
+ static inline bool is_sxid(umode_t mode)
+ {
+-	return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
++	return mode & (S_ISUID | S_ISGID);
+ }
+ 
+ static inline int check_sticky(struct inode *dir, struct inode *inode)
+diff --git a/include/linux/hid.h b/include/linux/hid.h
+index 2ba33d708942c..256f34f49167c 100644
+--- a/include/linux/hid.h
++++ b/include/linux/hid.h
+@@ -798,6 +798,7 @@ struct hid_driver {
+  * @raw_request: send raw report request to device (e.g. feature report)
+  * @output_report: send output report to device
+  * @idle: send idle request to device
++ * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE)
+  */
+ struct hid_ll_driver {
+ 	int (*start)(struct hid_device *hdev);
+@@ -822,6 +823,8 @@ struct hid_ll_driver {
+ 	int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len);
+ 
+ 	int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype);
++
++	unsigned int max_buffer_size;
+ };
+ 
+ extern struct hid_ll_driver i2c_hid_ll_driver;
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index b478a16ef284d..9ef63bc14b002 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -270,9 +270,11 @@ struct hh_cache {
+  * relationship HH alignment <= LL alignment.
+  */
+ #define LL_RESERVED_SPACE(dev) \
+-	((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
++	((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom)) \
++	  & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+ #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
+-	((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
++	((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom) + (extra)) \
++	  & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+ 
+ struct header_ops {
+ 	int	(*create) (struct sk_buff *skb, struct net_device *dev,
+diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
+index c255273b02810..37ad81058d6ae 100644
+--- a/include/linux/sh_intc.h
++++ b/include/linux/sh_intc.h
+@@ -97,7 +97,10 @@ struct intc_hw_desc {
+ 	unsigned int nr_subgroups;
+ };
+ 
+-#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a)
++#define _INTC_SIZEOF_OR_ZERO(a) (_Generic(a,                 \
++                                 typeof(NULL):  0,           \
++                                 default:       sizeof(a)))
++#define _INTC_ARRAY(a) a, _INTC_SIZEOF_OR_ZERO(a)/sizeof(*a)
+ 
+ #define INTC_HW_DESC(vectors, groups, mask_regs,	\
+ 		     prio_regs,	sense_regs, ack_regs)	\
+diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
+index e4c5df71f0e74..4e1356c35fe62 100644
+--- a/include/linux/tracepoint.h
++++ b/include/linux/tracepoint.h
+@@ -234,12 +234,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+  * not add unwanted padding between the beginning of the section and the
+  * structure. Force alignment to the same alignment as the section start.
+  *
+- * When lockdep is enabled, we make sure to always do the RCU portions of
+- * the tracepoint code, regardless of whether tracing is on. However,
+- * don't check if the condition is false, due to interaction with idle
+- * instrumentation. This lets us find RCU issues triggered with tracepoints
+- * even when this tracepoint is off. This code has no purpose other than
+- * poking RCU a bit.
++ * When lockdep is enabled, we make sure to always test if RCU is
++ * "watching" regardless if the tracepoint is enabled or not. Tracepoints
++ * require RCU to be active, and it should always warn at the tracepoint
++ * site if it is not watching, as it will need to be active when the
++ * tracepoint is enabled.
+  */
+ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
+ 	extern int __traceiter_##name(data_proto);			\
+@@ -253,9 +252,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+ 				TP_ARGS(data_args),			\
+ 				TP_CONDITION(cond), 0);			\
+ 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
+-			rcu_read_lock_sched_notrace();			\
+-			rcu_dereference_sched(__tracepoint_##name.funcs);\
+-			rcu_read_unlock_sched_notrace();		\
++			WARN_ON_ONCE(!rcu_is_watching());		\
+ 		}							\
+ 	}								\
+ 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 445afda927f47..fd799567fc23a 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -5792,10 +5792,10 @@ static int io_arm_poll_handler(struct io_kiocb *req)
+ 		}
+ 	} else {
+ 		apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
++		if (unlikely(!apoll))
++			return IO_APOLL_ABORTED;
+ 		apoll->poll.retries = APOLL_MAX_RETRY;
+ 	}
+-	if (unlikely(!apoll))
+-		return IO_APOLL_ABORTED;
+ 	apoll->double_poll = NULL;
+ 	req->apoll = apoll;
+ 	req->flags |= REQ_F_POLLED;
+diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
+index d97c189695cbb..67829b6e07bdc 100644
+--- a/kernel/trace/ftrace.c
++++ b/kernel/trace/ftrace.c
+@@ -1538,7 +1538,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
+ 	key.flags = end;	/* overload flags, as it is unsigned long */
+ 
+ 	for (pg = ftrace_pages_start; pg; pg = pg->next) {
+-		if (end < pg->records[0].ip ||
++		if (pg->index == 0 ||
++		    end < pg->records[0].ip ||
+ 		    start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
+ 			continue;
+ 		rec = bsearch(&key, pg->records, pg->index,
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 8637eab2986ee..ce45bdd9077db 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -4705,6 +4705,8 @@ loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
+ static const struct file_operations tracing_fops = {
+ 	.open		= tracing_open,
+ 	.read		= seq_read,
++	.read_iter	= seq_read_iter,
++	.splice_read	= generic_file_splice_read,
+ 	.write		= tracing_write_stub,
+ 	.llseek		= tracing_lseek,
+ 	.release	= tracing_release,
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index ccc99cd23f3c4..9ed65191888ef 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -1087,6 +1087,9 @@ static const char *hist_field_name(struct hist_field *field,
+ {
+ 	const char *field_name = "";
+ 
++	if (WARN_ON_ONCE(!field))
++		return field_name;
++
+ 	if (level > 1)
+ 		return field_name;
+ 
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 9b15760e0541a..e4c690c21fc9c 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1994,7 +1994,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	pgtable_t pgtable;
+-	pmd_t _pmd;
++	pmd_t _pmd, old_pmd;
+ 	int i;
+ 
+ 	/*
+@@ -2005,7 +2005,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ 	 *
+ 	 * See Documentation/vm/mmu_notifier.rst
+ 	 */
+-	pmdp_huge_clear_flush(vma, haddr, pmd);
++	old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
+ 
+ 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ 	pmd_populate(mm, &_pmd, pgtable);
+@@ -2014,6 +2014,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
+ 		pte_t *pte, entry;
+ 		entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
+ 		entry = pte_mkspecial(entry);
++		if (pmd_uffd_wp(old_pmd))
++			entry = pte_mkuffd_wp(entry);
+ 		pte = pte_offset_map(&_pmd, haddr);
+ 		VM_BUG_ON(!pte_none(*pte));
+ 		set_pte_at(mm, haddr, pte, entry);
+diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
+index 5f786ef662ead..41f890bf9d4c4 100644
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -573,6 +573,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
+ 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
+ 	}
+ 
++	if (!cfg->fc_table)
++		cfg->fc_table = RT_TABLE_MAIN;
++
+ 	if (cmd == SIOCDELRT)
+ 		return 0;
+ 
+diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
+index be75b409445c2..99f70b990eb13 100644
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -613,10 +613,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ 	}
+ 
+ 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+-	if (headroom > dev->needed_headroom)
+-		dev->needed_headroom = headroom;
++	if (headroom > READ_ONCE(dev->needed_headroom))
++		WRITE_ONCE(dev->needed_headroom, headroom);
+ 
+-	if (skb_cow_head(skb, dev->needed_headroom)) {
++	if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ 		ip_rt_put(rt);
+ 		goto tx_dropped;
+ 	}
+@@ -797,10 +797,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ 
+ 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
+-	if (max_headroom > dev->needed_headroom)
+-		dev->needed_headroom = max_headroom;
++	if (max_headroom > READ_ONCE(dev->needed_headroom))
++		WRITE_ONCE(dev->needed_headroom, max_headroom);
+ 
+-	if (skb_cow_head(skb, dev->needed_headroom)) {
++	if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ 		ip_rt_put(rt);
+ 		dev->stats.tx_dropped++;
+ 		kfree_skb(skb);
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index eefd032bc6dbd..e4ad274ec7a30 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3609,7 +3609,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
+ 	th->window = htons(min(req->rsk_rcv_wnd, 65535U));
+ 	tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+ 	th->doff = (tcp_header_size >> 2);
+-	__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
++	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+ 
+ #ifdef CONFIG_TCP_MD5SIG
+ 	/* Okay, we have all we need - do the md5 hash if needed */
+diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
+index 0d4cab94c5dd2..a03a322e0cc1c 100644
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1267,8 +1267,8 @@ route_lookup:
+ 	 */
+ 	max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
+ 			+ dst->header_len + t->hlen;
+-	if (max_headroom > dev->needed_headroom)
+-		dev->needed_headroom = max_headroom;
++	if (max_headroom > READ_ONCE(dev->needed_headroom))
++		WRITE_ONCE(dev->needed_headroom, max_headroom);
+ 
+ 	err = ip6_tnl_encap(skb, t, &proto, fl6);
+ 	if (err)
+diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
+index 349c6ac3313f7..6f84978a77265 100644
+--- a/net/iucv/iucv.c
++++ b/net/iucv/iucv.c
+@@ -83,7 +83,7 @@ struct iucv_irq_data {
+ 	u16 ippathid;
+ 	u8  ipflags1;
+ 	u8  iptype;
+-	u32 res2[8];
++	u32 res2[9];
+ };
+ 
+ struct iucv_irq_list {
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 3b154ad4945c4..607519246bf28 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -275,7 +275,6 @@ void mptcp_subflow_reset(struct sock *ssk)
+ 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ 	struct sock *sk = subflow->conn;
+ 
+-	tcp_set_state(ssk, TCP_CLOSE);
+ 	tcp_send_active_reset(ssk, GFP_ATOMIC);
+ 	tcp_done(ssk);
+ 	if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
+diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
+index 9953e80537536..1818dbf089cad 100644
+--- a/net/netfilter/nft_masq.c
++++ b/net/netfilter/nft_masq.c
+@@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx,
+ 			 const struct nft_expr *expr,
+ 			 const struct nlattr * const tb[])
+ {
+-	u32 plen = sizeof_field(struct nf_nat_range, min_addr.all);
++	u32 plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ 	struct nft_masq *priv = nft_expr_priv(expr);
+ 	int err;
+ 
+diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
+index db8f9116eeb43..cd4eb4996aff3 100644
+--- a/net/netfilter/nft_nat.c
++++ b/net/netfilter/nft_nat.c
+@@ -226,7 +226,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ 		priv->flags |= NF_NAT_RANGE_MAP_IPS;
+ 	}
+ 
+-	plen = sizeof_field(struct nf_nat_range, min_addr.all);
++	plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ 	if (tb[NFTA_NAT_REG_PROTO_MIN]) {
+ 		err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ 					      &priv->sreg_proto_min, plen);
+diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
+index ba09890dddb50..e64f531d66cfc 100644
+--- a/net/netfilter/nft_redir.c
++++ b/net/netfilter/nft_redir.c
+@@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx,
+ 	unsigned int plen;
+ 	int err;
+ 
+-	plen = sizeof_field(struct nf_nat_range, min_addr.all);
++	plen = sizeof_field(struct nf_nat_range, min_proto.all);
+ 	if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
+ 		err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ 					      &priv->sreg_proto_min, plen);
+@@ -232,7 +232,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = {
+ 	.name		= "redir",
+ 	.ops		= &nft_redir_inet_ops,
+ 	.policy		= nft_redir_policy,
+-	.maxattr	= NFTA_MASQ_MAX,
++	.maxattr	= NFTA_REDIR_MAX,
+ 	.owner		= THIS_MODULE,
+ };
+ 
+diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
+index 94503f36b9a61..9125d28d9ff5d 100644
+--- a/net/smc/smc_cdc.c
++++ b/net/smc/smc_cdc.c
+@@ -104,6 +104,9 @@ int smc_cdc_msg_send(struct smc_connection *conn,
+ 	union smc_host_cursor cfed;
+ 	int rc;
+ 
++	if (unlikely(!READ_ONCE(conn->sndbuf_desc)))
++		return -ENOBUFS;
++
+ 	smc_cdc_add_pending_send(conn, pend);
+ 
+ 	conn->tx_cdc_seq++;
+diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
+index bf485a2017a4e..e84241ff4ac4f 100644
+--- a/net/smc/smc_core.c
++++ b/net/smc/smc_core.c
+@@ -912,7 +912,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
+ 	if (lgr->terminating)
+ 		return;	/* lgr already terminating */
+ 	/* cancel free_work sync, will terminate when lgr->freeing is set */
+-	cancel_delayed_work_sync(&lgr->free_work);
++	cancel_delayed_work(&lgr->free_work);
+ 	lgr->terminating = 1;
+ 
+ 	/* kill remaining link group connections */
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index fdbd56ed4bd52..ba73014805a4f 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -2611,9 +2611,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
+ 		if (inner_mode == NULL)
+ 			goto error;
+ 
+-		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
+-			goto error;
+-
+ 		x->inner_mode = *inner_mode;
+ 
+ 		if (x->props.family == AF_INET)
+diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
+index 2a5ba9dca6b08..f96e70c85f84a 100644
+--- a/sound/hda/intel-dsp-config.c
++++ b/sound/hda/intel-dsp-config.c
+@@ -359,6 +359,15 @@ static const struct config_entry config_table[] = {
+ 	},
+ #endif
+ 
++/* Meteor Lake */
++#if IS_ENABLED(CONFIG_SND_SOC_SOF_METEORLAKE)
++	/* Meteorlake-P */
++	{
++		.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
++		.device = 0x7e28,
++	},
++#endif
++
+ };
+ 
+ static const struct config_entry *snd_intel_dsp_find_config
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index 494bfd2135a9e..de1fe604905f3 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -365,14 +365,15 @@ enum {
+ #define needs_eld_notify_link(chip)	false
+ #endif
+ 
+-#define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \
++#define CONTROLLER_IN_GPU(pci) (((pci)->vendor == 0x8086) &&         \
++				       (((pci)->device == 0x0a0c) || \
+ 					((pci)->device == 0x0c0c) || \
+ 					((pci)->device == 0x0d0c) || \
+ 					((pci)->device == 0x160c) || \
+ 					((pci)->device == 0x490d) || \
+ 					((pci)->device == 0x4f90) || \
+ 					((pci)->device == 0x4f91) || \
+-					((pci)->device == 0x4f92))
++					((pci)->device == 0x4f92)))
+ 
+ #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+ 
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index f2ef75c8de427..2cf6600c9ca83 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -9091,6 +9091,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP),
+ 	SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
+ 	SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP),
++	SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP),
+ 	SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
+diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
+index 834066d465fc1..f0fbd7367f4f6 100755
+--- a/tools/testing/selftests/net/devlink_port_split.py
++++ b/tools/testing/selftests/net/devlink_port_split.py
+@@ -57,6 +57,8 @@ class devlink_ports(object):
+         assert stderr == ""
+         ports = json.loads(stdout)['port']
+ 
++        validate_devlink_output(ports, 'flavour')
++
+         for port in ports:
+             if dev in port:
+                 if ports[port]['flavour'] == 'physical':
+@@ -218,6 +220,27 @@ def split_splittable_port(port, k, lanes, dev):
+     unsplit(port.bus_info)
+ 
+ 
++def validate_devlink_output(devlink_data, target_property=None):
++    """
++    Determine if test should be skipped by checking:
++      1. devlink_data contains values
++      2. The target_property exist in devlink_data
++    """
++    skip_reason = None
++    if any(devlink_data.values()):
++        if target_property:
++            skip_reason = "{} not found in devlink output, test skipped".format(target_property)
++            for key in devlink_data:
++                if target_property in devlink_data[key]:
++                    skip_reason = None
++    else:
++        skip_reason = 'devlink output is empty, test skipped'
++
++    if skip_reason:
++        print(skip_reason)
++        sys.exit(KSFT_SKIP)
++
++
+ def make_parser():
+     parser = argparse.ArgumentParser(description='A test for port splitting.')
+     parser.add_argument('--dev',
+@@ -238,6 +261,7 @@ def main(cmdline=None):
+         stdout, stderr = run_command(cmd)
+         assert stderr == ""
+ 
++        validate_devlink_output(json.loads(stdout))
+         devs = json.loads(stdout)['dev']
+         dev = list(devs.keys())[0]
+ 
+@@ -249,6 +273,7 @@ def main(cmdline=None):
+ 
+     ports = devlink_ports(dev)
+ 
++    found_max_lanes = False
+     for port in ports.if_names:
+         max_lanes = get_max_lanes(port.name)
+ 
+@@ -271,6 +296,11 @@ def main(cmdline=None):
+                 split_splittable_port(port, lane, max_lanes, dev)
+ 
+                 lane //= 2
++        found_max_lanes = True
++
++    if not found_max_lanes:
++        print(f"Test not started, no port of device {dev} reports max_lanes")
++        sys.exit(KSFT_SKIP)
+ 
+ 
+ if __name__ == "__main__":