public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Alice Ferrazzi" <alicef@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/linux-patches:6.2 commit in: /
Date: Thu,  6 Apr 2023 10:40:54 +0000 (UTC)	[thread overview]
Message-ID: <1680777626.552322bbd8665a864a089b06ed41c97e413562b9.alicef@gentoo> (raw)

commit:     552322bbd8665a864a089b06ed41c97e413562b9
Author:     Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
AuthorDate: Thu Apr  6 10:40:26 2023 +0000
Commit:     Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
CommitDate: Thu Apr  6 10:40:26 2023 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=552322bb

Linux patch 6.2.10

Signed-off-by: Alice Ferrazzi <alicef <AT> gentoo.org>

 0000_README             |     4 +
 1009_linux-6.2.10.patch | 10600 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 10604 insertions(+)

diff --git a/0000_README b/0000_README
index 47edd2da..93bcb21e 100644
--- a/0000_README
+++ b/0000_README
@@ -79,6 +79,10 @@ Patch:  1008_linux-6.2.9.patch
 From:   https://www.kernel.org
 Desc:   Linux 6.2.9
 
+Patch:  1009_linux-6.2.10.patch
+From:   https://www.kernel.org
+Desc:   Linux 6.2.10
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1009_linux-6.2.10.patch b/1009_linux-6.2.10.patch
new file mode 100644
index 00000000..980ca4df
--- /dev/null
+++ b/1009_linux-6.2.10.patch
@@ -0,0 +1,10600 @@
+diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
+index 3fe981b14e2cb..54736362378eb 100644
+--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
++++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
+@@ -76,6 +76,13 @@ properties:
+       If "broken-flash-reset" is present then having this property does not
+       make any difference.
+ 
++  spi-cpol: true
++  spi-cpha: true
++
++dependencies:
++  spi-cpol: [ spi-cpha ]
++  spi-cpha: [ spi-cpol ]
++
+ unevaluatedProperties: false
+ 
+ examples:
+diff --git a/Makefile b/Makefile
+index 8732f7208d59b..6ec0ec452e465 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 6
+ PATCHLEVEL = 2
+-SUBLEVEL = 9
++SUBLEVEL = 10
+ EXTRAVERSION =
+ NAME = Hurr durr I'ma ninja sloth
+ 
+diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
+index a3ee3b605c9b8..3c24178bd4935 100644
+--- a/arch/arm64/kvm/mmu.c
++++ b/arch/arm64/kvm/mmu.c
+@@ -665,14 +665,33 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
+ 				   CONFIG_PGTABLE_LEVELS),
+ 		.mm_ops		= &kvm_user_mm_ops,
+ 	};
++	unsigned long flags;
+ 	kvm_pte_t pte = 0;	/* Keep GCC quiet... */
+ 	u32 level = ~0;
+ 	int ret;
+ 
++	/*
++	 * Disable IRQs so that we hazard against a concurrent
++	 * teardown of the userspace page tables (which relies on
++	 * IPI-ing threads).
++	 */
++	local_irq_save(flags);
+ 	ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
+-	VM_BUG_ON(ret);
+-	VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
+-	VM_BUG_ON(!(pte & PTE_VALID));
++	local_irq_restore(flags);
++
++	if (ret)
++		return ret;
++
++	/*
++	 * Not seeing an error, but not updating level? Something went
++	 * deeply wrong...
++	 */
++	if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
++		return -EFAULT;
++
++	/* Oops, the userspace PTs are gone... Replay the fault */
++	if (!kvm_pte_valid(pte))
++		return -EAGAIN;
+ 
+ 	return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
+ }
+@@ -1079,7 +1098,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
+  *
+  * Returns the size of the mapping.
+  */
+-static unsigned long
++static long
+ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ 			    unsigned long hva, kvm_pfn_t *pfnp,
+ 			    phys_addr_t *ipap)
+@@ -1091,8 +1110,15 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ 	 * sure that the HVA and IPA are sufficiently aligned and that the
+ 	 * block map is contained within the memslot.
+ 	 */
+-	if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
+-	    get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
++	if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
++		int sz = get_user_mapping_size(kvm, hva);
++
++		if (sz < 0)
++			return sz;
++
++		if (sz < PMD_SIZE)
++			return PAGE_SIZE;
++
+ 		/*
+ 		 * The address we faulted on is backed by a transparent huge
+ 		 * page.  However, because we map the compound huge page and
+@@ -1192,7 +1218,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ {
+ 	int ret = 0;
+ 	bool write_fault, writable, force_pte = false;
+-	bool exec_fault;
++	bool exec_fault, mte_allowed;
+ 	bool device = false;
+ 	unsigned long mmu_seq;
+ 	struct kvm *kvm = vcpu->kvm;
+@@ -1203,7 +1229,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ 	kvm_pfn_t pfn;
+ 	bool logging_active = memslot_is_logging(memslot);
+ 	unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
+-	unsigned long vma_pagesize, fault_granule;
++	long vma_pagesize, fault_granule;
+ 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
+ 	struct kvm_pgtable *pgt;
+ 
+@@ -1217,6 +1243,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ 		return -EFAULT;
+ 	}
+ 
++	/*
++	 * Permission faults just need to update the existing leaf entry,
++	 * and so normally don't require allocations from the memcache. The
++	 * only exception to this is when dirty logging is enabled at runtime
++	 * and a write fault needs to collapse a block entry into a table.
++	 */
++	if (fault_status != ESR_ELx_FSC_PERM ||
++	    (logging_active && write_fault)) {
++		ret = kvm_mmu_topup_memory_cache(memcache,
++						 kvm_mmu_cache_min_pages(kvm));
++		if (ret)
++			return ret;
++	}
++
+ 	/*
+ 	 * Let's check if we will get back a huge page backed by hugetlbfs, or
+ 	 * get block mapping for device MMIO region.
+@@ -1269,37 +1309,21 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ 		fault_ipa &= ~(vma_pagesize - 1);
+ 
+ 	gfn = fault_ipa >> PAGE_SHIFT;
+-	mmap_read_unlock(current->mm);
++	mte_allowed = kvm_vma_mte_allowed(vma);
+ 
+-	/*
+-	 * Permission faults just need to update the existing leaf entry,
+-	 * and so normally don't require allocations from the memcache. The
+-	 * only exception to this is when dirty logging is enabled at runtime
+-	 * and a write fault needs to collapse a block entry into a table.
+-	 */
+-	if (fault_status != ESR_ELx_FSC_PERM ||
+-	    (logging_active && write_fault)) {
+-		ret = kvm_mmu_topup_memory_cache(memcache,
+-						 kvm_mmu_cache_min_pages(kvm));
+-		if (ret)
+-			return ret;
+-	}
++	/* Don't use the VMA after the unlock -- it may have vanished */
++	vma = NULL;
+ 
+-	mmu_seq = vcpu->kvm->mmu_invalidate_seq;
+ 	/*
+-	 * Ensure the read of mmu_invalidate_seq happens before we call
+-	 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
+-	 * the page we just got a reference to gets unmapped before we have a
+-	 * chance to grab the mmu_lock, which ensure that if the page gets
+-	 * unmapped afterwards, the call to kvm_unmap_gfn will take it away
+-	 * from us again properly. This smp_rmb() interacts with the smp_wmb()
+-	 * in kvm_mmu_notifier_invalidate_<page|range_end>.
++	 * Read mmu_invalidate_seq so that KVM can detect if the results of
++	 * vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
++	 * acquiring kvm->mmu_lock.
+ 	 *
+-	 * Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is
+-	 * used to avoid unnecessary overhead introduced to locate the memory
+-	 * slot because it's always fixed even @gfn is adjusted for huge pages.
++	 * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
++	 * with the smp_wmb() in kvm_mmu_invalidate_end().
+ 	 */
+-	smp_rmb();
++	mmu_seq = vcpu->kvm->mmu_invalidate_seq;
++	mmap_read_unlock(current->mm);
+ 
+ 	pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
+ 				   write_fault, &writable, NULL);
+@@ -1350,11 +1374,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ 			vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
+ 								   hva, &pfn,
+ 								   &fault_ipa);
++
++		if (vma_pagesize < 0) {
++			ret = vma_pagesize;
++			goto out_unlock;
++		}
+ 	}
+ 
+ 	if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
+ 		/* Check the VMM hasn't introduced a new disallowed VMA */
+-		if (kvm_vma_mte_allowed(vma)) {
++		if (mte_allowed) {
+ 			sanitise_mte_tags(kvm, pfn, vma_pagesize);
+ 		} else {
+ 			ret = -EFAULT;
+diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
+index 24908400e1906..c243b10f3e150 100644
+--- a/arch/arm64/kvm/pmu-emul.c
++++ b/arch/arm64/kvm/pmu-emul.c
+@@ -538,7 +538,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
+ 	if (!kvm_pmu_is_3p5(vcpu))
+ 		val &= ~ARMV8_PMU_PMCR_LP;
+ 
+-	__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
++	/* The reset bits don't indicate any state, and shouldn't be saved. */
++	__vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
+ 
+ 	if (val & ARMV8_PMU_PMCR_E) {
+ 		kvm_pmu_enable_counter_mask(vcpu,
+diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
+index c6cbfe6b854b3..c48c053d61466 100644
+--- a/arch/arm64/kvm/sys_regs.c
++++ b/arch/arm64/kvm/sys_regs.c
+@@ -765,6 +765,22 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
+ 	return true;
+ }
+ 
++static int get_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
++			  u64 *val)
++{
++	u64 idx;
++
++	if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 0)
++		/* PMCCNTR_EL0 */
++		idx = ARMV8_PMU_CYCLE_IDX;
++	else
++		/* PMEVCNTRn_EL0 */
++		idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
++
++	*val = kvm_pmu_get_counter_value(vcpu, idx);
++	return 0;
++}
++
+ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
+ 			      struct sys_reg_params *p,
+ 			      const struct sys_reg_desc *r)
+@@ -981,7 +997,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ /* Macro to expand the PMEVCNTRn_EL0 register */
+ #define PMU_PMEVCNTR_EL0(n)						\
+ 	{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)),				\
+-	  .reset = reset_pmevcntr,					\
++	  .reset = reset_pmevcntr, .get_user = get_pmu_evcntr,		\
+ 	  .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
+ 
+ /* Macro to expand the PMEVTYPERn_EL0 register */
+@@ -1745,7 +1761,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
+ 	{ PMU_SYS_REG(SYS_PMCEID1_EL0),
+ 	  .access = access_pmceid, .reset = NULL },
+ 	{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
+-	  .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
++	  .access = access_pmu_evcntr, .reset = reset_unknown,
++	  .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr},
+ 	{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
+ 	  .access = access_pmu_evtyper, .reset = NULL },
+ 	{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
+diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
+index 33788668cbdbf..3779e7855bd75 100644
+--- a/arch/mips/bmips/dma.c
++++ b/arch/mips/bmips/dma.c
+@@ -5,6 +5,8 @@
+ #include <asm/bmips.h>
+ #include <asm/io.h>
+ 
++bool bmips_rac_flush_disable;
++
+ void arch_sync_dma_for_cpu_all(void)
+ {
+ 	void __iomem *cbr = BMIPS_GET_CBR();
+@@ -15,6 +17,9 @@ void arch_sync_dma_for_cpu_all(void)
+ 	    boot_cpu_type() != CPU_BMIPS4380)
+ 		return;
+ 
++	if (unlikely(bmips_rac_flush_disable))
++		return;
++
+ 	/* Flush stale data out of the readahead cache */
+ 	cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
+ 	__raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG);
+diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
+index e95b3f78e7cd4..549a6392a3d2d 100644
+--- a/arch/mips/bmips/setup.c
++++ b/arch/mips/bmips/setup.c
+@@ -35,6 +35,8 @@
+ #define REG_BCM6328_OTP		((void __iomem *)CKSEG1ADDR(0x1000062c))
+ #define BCM6328_TP1_DISABLED	BIT(9)
+ 
++extern bool bmips_rac_flush_disable;
++
+ static const unsigned long kbase = VMLINUX_LOAD_ADDRESS & 0xfff00000;
+ 
+ struct bmips_quirk {
+@@ -104,6 +106,12 @@ static void bcm6358_quirks(void)
+ 	 * disable SMP for now
+ 	 */
+ 	bmips_smp_enabled = 0;
++
++	/*
++	 * RAC flush causes kernel panics on BCM6358 when booting from TP1
++	 * because the bootloader is not initializing it properly.
++	 */
++	bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31));
+ }
+ 
+ static void bcm6368_quirks(void)
+diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
+index 2bbc0fcce04a3..5e26c7f2c25ab 100644
+--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
++++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
+@@ -148,6 +148,11 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+ 	 */
+ }
+ 
++static inline bool __pte_protnone(unsigned long pte)
++{
++	return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE);
++}
++
+ static inline bool __pte_flags_need_flush(unsigned long oldval,
+ 					  unsigned long newval)
+ {
+@@ -164,8 +169,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval,
+ 	/*
+ 	 * We do not expect kernel mappings or non-PTEs or not-present PTEs.
+ 	 */
+-	VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED);
+-	VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED);
++	VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED);
++	VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED);
+ 	VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE));
+ 	VM_WARN_ON_ONCE(!(newval & _PAGE_PTE));
+ 	VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT));
+diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
+index 2087a785f05f1..5fff0d04b23f7 100644
+--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
++++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
+@@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+ static int ppr_get(struct task_struct *target, const struct user_regset *regset,
+ 		   struct membuf to)
+ {
++	if (!target->thread.regs)
++		return -EINVAL;
++
+ 	return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));
+ }
+ 
+@@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset,
+ 		   unsigned int pos, unsigned int count, const void *kbuf,
+ 		   const void __user *ubuf)
+ {
++	if (!target->thread.regs)
++		return -EINVAL;
++
+ 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ 				  &target->thread.regs->ppr, 0, sizeof(u64));
+ }
+diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
+index 4ad6e510d405f..94c023bb13e05 100644
+--- a/arch/powerpc/platforms/pseries/vas.c
++++ b/arch/powerpc/platforms/pseries/vas.c
+@@ -857,6 +857,13 @@ int pseries_vas_dlpar_cpu(void)
+ {
+ 	int new_nr_creds, rc;
+ 
++	/*
++	 * NX-GZIP is not enabled. Nothing to do for DLPAR event
++	 */
++	if (!copypaste_feat)
++		return 0;
++
++
+ 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ 				      vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
+ 				      (u64)virt_to_phys(&hv_cop_caps));
+@@ -1013,6 +1020,7 @@ static int __init pseries_vas_init(void)
+ 	 * Linux supports user space COPY/PASTE only with Radix
+ 	 */
+ 	if (!radix_enabled()) {
++		copypaste_feat = false;
+ 		pr_err("API is supported only with radix page tables\n");
+ 		return -ENOTSUPP;
+ 	}
+diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
+index ad34519c8a13d..3ac2ff6a65dac 100644
+--- a/arch/riscv/kvm/vcpu_timer.c
++++ b/arch/riscv/kvm/vcpu_timer.c
+@@ -147,10 +147,8 @@ static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu)
+ 		return;
+ 
+ 	delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
+-	if (delta_ns) {
+-		hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
+-		t->next_set = true;
+-	}
++	hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
++	t->next_set = true;
+ }
+ 
+ static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu)
+diff --git a/arch/s390/Makefile b/arch/s390/Makefile
+index b3235ab0ace83..ed646c583e4fe 100644
+--- a/arch/s390/Makefile
++++ b/arch/s390/Makefile
+@@ -162,7 +162,7 @@ vdso_prepare: prepare0
+ 
+ ifdef CONFIG_EXPOLINE_EXTERN
+ modules_prepare: expoline_prepare
+-expoline_prepare:
++expoline_prepare: scripts
+ 	$(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o
+ endif
+ endif
+diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
+index 720036fb19242..d44214072779e 100644
+--- a/arch/s390/lib/uaccess.c
++++ b/arch/s390/lib/uaccess.c
+@@ -172,7 +172,7 @@ unsigned long __clear_user(void __user *to, unsigned long size)
+ 		"4: slgr  %0,%0\n"
+ 		"5:\n"
+ 		EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b)
+-		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
++		: "+&a" (size), "+&a" (to), "+a" (tmp1), "=&a" (tmp2)
+ 		: "a" (empty_zero_page), [spec] "d" (spec.val)
+ 		: "cc", "memory", "0");
+ 	return size;
+diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
+index 3c5b52fbe4a7f..a9ec8c9f5c5dd 100644
+--- a/arch/x86/xen/Makefile
++++ b/arch/x86/xen/Makefile
+@@ -45,6 +45,6 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
+ 
+ obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
+ 
+-obj-$(CONFIG_XEN_PV_DOM0)	+= vga.o
++obj-$(CONFIG_XEN_DOM0)		+= vga.o
+ 
+ obj-$(CONFIG_XEN_EFI)		+= efi.o
+diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
+index 5b13796628770..68f5f5d209dfa 100644
+--- a/arch/x86/xen/enlighten_pv.c
++++ b/arch/x86/xen/enlighten_pv.c
+@@ -1389,7 +1389,8 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
+ 
+ 		x86_platform.set_legacy_features =
+ 				xen_dom0_set_legacy_features;
+-		xen_init_vga(info, xen_start_info->console.dom0.info_size);
++		xen_init_vga(info, xen_start_info->console.dom0.info_size,
++			     &boot_params.screen_info);
+ 		xen_start_info->console.domU.mfn = 0;
+ 		xen_start_info->console.domU.evtchn = 0;
+ 
+diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
+index bcae606bbc5cf..ada3868c02c23 100644
+--- a/arch/x86/xen/enlighten_pvh.c
++++ b/arch/x86/xen/enlighten_pvh.c
+@@ -43,6 +43,19 @@ void __init xen_pvh_init(struct boot_params *boot_params)
+ 	x86_init.oem.banner = xen_banner;
+ 
+ 	xen_efi_init(boot_params);
++
++	if (xen_initial_domain()) {
++		struct xen_platform_op op = {
++			.cmd = XENPF_get_dom0_console,
++		};
++		int ret = HYPERVISOR_platform_op(&op);
++
++		if (ret > 0)
++			xen_init_vga(&op.u.dom0_console,
++				     min(ret * sizeof(char),
++					 sizeof(op.u.dom0_console)),
++				     &boot_params->screen_info);
++	}
+ }
+ 
+ void __init mem_map_via_hcall(struct boot_params *boot_params_p)
+diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
+index 14ea32e734d59..d97adab8420f4 100644
+--- a/arch/x86/xen/vga.c
++++ b/arch/x86/xen/vga.c
+@@ -9,10 +9,9 @@
+ 
+ #include "xen-ops.h"
+ 
+-void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
++void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size,
++			 struct screen_info *screen_info)
+ {
+-	struct screen_info *screen_info = &boot_params.screen_info;
+-
+ 	/* This is drawn from a dump from vgacon:startup in
+ 	 * standard Linux. */
+ 	screen_info->orig_video_mode = 3;
+diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
+index 9a8bb972193d8..a10903785a338 100644
+--- a/arch/x86/xen/xen-ops.h
++++ b/arch/x86/xen/xen-ops.h
+@@ -108,11 +108,12 @@ static inline void xen_uninit_lock_cpu(int cpu)
+ 
+ struct dom0_vga_console_info;
+ 
+-#ifdef CONFIG_XEN_PV_DOM0
+-void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
++#ifdef CONFIG_XEN_DOM0
++void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size,
++			 struct screen_info *);
+ #else
+ static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
+-				       size_t size)
++				       size_t size, struct screen_info *si)
+ {
+ }
+ #endif
+diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
+index cd98366a9b238..f0a7d1c2641e0 100644
+--- a/arch/xtensa/kernel/traps.c
++++ b/arch/xtensa/kernel/traps.c
+@@ -539,7 +539,7 @@ static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
+ 
+ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+ {
+-	size_t len;
++	size_t len, off = 0;
+ 
+ 	if (!sp)
+ 		sp = stack_pointer(task);
+@@ -548,9 +548,17 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+ 		  kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE);
+ 
+ 	printk("%sStack:\n", loglvl);
+-	print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE,
+-		       STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE,
+-		       sp, len, false);
++	while (off < len) {
++		u8 line[STACK_DUMP_LINE_SIZE];
++		size_t line_len = len - off > STACK_DUMP_LINE_SIZE ?
++			STACK_DUMP_LINE_SIZE : len - off;
++
++		__memcpy(line, (u8 *)sp + off, line_len);
++		print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE,
++			       STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE,
++			       line, line_len, false);
++		off += STACK_DUMP_LINE_SIZE;
++	}
+ 	show_trace(task, sp, loglvl);
+ }
+ 
+diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
+index 0c05ccde1f7a6..7c16bc15e7a14 100644
+--- a/drivers/acpi/bus.c
++++ b/drivers/acpi/bus.c
+@@ -459,85 +459,67 @@ out_free:
+                              Notification Handling
+    -------------------------------------------------------------------------- */
+ 
+-/*
+- * acpi_bus_notify
+- * ---------------
+- * Callback for all 'system-level' device notifications (values 0x00-0x7F).
++/**
++ * acpi_bus_notify - Global system-level (0x00-0x7F) notifications handler
++ * @handle: Target ACPI object.
++ * @type: Notification type.
++ * @data: Ignored.
++ *
++ * This only handles notifications related to device hotplug.
+  */
+ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
+ {
+ 	struct acpi_device *adev;
+-	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
+-	bool hotplug_event = false;
+ 
+ 	switch (type) {
+ 	case ACPI_NOTIFY_BUS_CHECK:
+ 		acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
+-		hotplug_event = true;
+ 		break;
+ 
+ 	case ACPI_NOTIFY_DEVICE_CHECK:
+ 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
+-		hotplug_event = true;
+ 		break;
+ 
+ 	case ACPI_NOTIFY_DEVICE_WAKE:
+ 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
+-		break;
++		return;
+ 
+ 	case ACPI_NOTIFY_EJECT_REQUEST:
+ 		acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
+-		hotplug_event = true;
+ 		break;
+ 
+ 	case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+ 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
+ 		/* TBD: Exactly what does 'light' mean? */
+-		break;
++		return;
+ 
+ 	case ACPI_NOTIFY_FREQUENCY_MISMATCH:
+ 		acpi_handle_err(handle, "Device cannot be configured due "
+ 				"to a frequency mismatch\n");
+-		break;
++		return;
+ 
+ 	case ACPI_NOTIFY_BUS_MODE_MISMATCH:
+ 		acpi_handle_err(handle, "Device cannot be configured due "
+ 				"to a bus mode mismatch\n");
+-		break;
++		return;
+ 
+ 	case ACPI_NOTIFY_POWER_FAULT:
+ 		acpi_handle_err(handle, "Device has suffered a power fault\n");
+-		break;
++		return;
+ 
+ 	default:
+ 		acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
+-		break;
++		return;
+ 	}
+ 
+ 	adev = acpi_get_acpi_dev(handle);
+-	if (!adev)
+-		goto err;
+-
+-	if (adev->dev.driver) {
+-		struct acpi_driver *driver = to_acpi_driver(adev->dev.driver);
+-
+-		if (driver && driver->ops.notify &&
+-		    (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS))
+-			driver->ops.notify(adev, type);
+-	}
+-
+-	if (!hotplug_event) {
+-		acpi_put_acpi_dev(adev);
+-		return;
+-	}
+ 
+-	if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
++	if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
+ 		return;
+ 
+ 	acpi_put_acpi_dev(adev);
+ 
+- err:
+-	acpi_evaluate_ost(handle, type, ost_code, NULL);
++	acpi_evaluate_ost(handle, type, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL);
+ }
+ 
+ static void acpi_notify_device(acpi_handle handle, u32 event, void *data)
+@@ -562,42 +544,51 @@ static u32 acpi_device_fixed_event(void *data)
+ 	return ACPI_INTERRUPT_HANDLED;
+ }
+ 
+-static int acpi_device_install_notify_handler(struct acpi_device *device)
++static int acpi_device_install_notify_handler(struct acpi_device *device,
++					      struct acpi_driver *acpi_drv)
+ {
+ 	acpi_status status;
+ 
+-	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON)
++	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
+ 		status =
+ 		    acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
+ 						     acpi_device_fixed_event,
+ 						     device);
+-	else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON)
++	} else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) {
+ 		status =
+ 		    acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
+ 						     acpi_device_fixed_event,
+ 						     device);
+-	else
+-		status = acpi_install_notify_handler(device->handle,
+-						     ACPI_DEVICE_NOTIFY,
++	} else {
++		u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
++				ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
++
++		status = acpi_install_notify_handler(device->handle, type,
+ 						     acpi_notify_device,
+ 						     device);
++	}
+ 
+ 	if (ACPI_FAILURE(status))
+ 		return -EINVAL;
+ 	return 0;
+ }
+ 
+-static void acpi_device_remove_notify_handler(struct acpi_device *device)
++static void acpi_device_remove_notify_handler(struct acpi_device *device,
++					      struct acpi_driver *acpi_drv)
+ {
+-	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON)
++	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
+ 		acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
+ 						acpi_device_fixed_event);
+-	else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON)
++	} else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) {
+ 		acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
+ 						acpi_device_fixed_event);
+-	else
+-		acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
++	} else {
++		u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
++				ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
++
++		acpi_remove_notify_handler(device->handle, type,
+ 					   acpi_notify_device);
++	}
+ }
+ 
+ /* Handle events targeting \_SB device (at present only graceful shutdown) */
+@@ -1039,7 +1030,7 @@ static int acpi_device_probe(struct device *dev)
+ 		 acpi_drv->name, acpi_dev->pnp.bus_id);
+ 
+ 	if (acpi_drv->ops.notify) {
+-		ret = acpi_device_install_notify_handler(acpi_dev);
++		ret = acpi_device_install_notify_handler(acpi_dev, acpi_drv);
+ 		if (ret) {
+ 			if (acpi_drv->ops.remove)
+ 				acpi_drv->ops.remove(acpi_dev);
+@@ -1062,7 +1053,7 @@ static void acpi_device_remove(struct device *dev)
+ 	struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver);
+ 
+ 	if (acpi_drv->ops.notify)
+-		acpi_device_remove_notify_handler(acpi_dev);
++		acpi_device_remove_notify_handler(acpi_dev, acpi_drv);
+ 
+ 	if (acpi_drv->ops.remove)
+ 		acpi_drv->ops.remove(acpi_dev);
+diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
+index 710ac640267dd..14d6d81e536fe 100644
+--- a/drivers/acpi/video_detect.c
++++ b/drivers/acpi/video_detect.c
+@@ -716,6 +716,13 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
+ 		DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5515"),
+ 		},
+ 	},
++	{
++	 .callback = video_detect_force_native,
++	 .matches = {
++		DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++		DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 15 3535"),
++		},
++	},
+ 
+ 	/*
+ 	 * Desktops which falsely report a backlight and which our heuristics
+diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
+index e45285d4e62a4..da5727069d851 100644
+--- a/drivers/acpi/x86/utils.c
++++ b/drivers/acpi/x86/utils.c
+@@ -251,6 +251,7 @@ bool force_storage_d3(void)
+ #define ACPI_QUIRK_UART1_TTY_UART2_SKIP				BIT(1)
+ #define ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY			BIT(2)
+ #define ACPI_QUIRK_USE_ACPI_AC_AND_BATTERY			BIT(3)
++#define ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS			BIT(4)
+ 
+ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
+ 	/*
+@@ -279,6 +280,16 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
+ 	 *    need the x86-android-tablets module to properly work.
+ 	 */
+ #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
++	{
++		/* Acer Iconia One 7 B1-750 */
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "VESPA2"),
++		},
++		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
++					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
++					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
++	},
+ 	{
+ 		.matches = {
+ 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+@@ -286,7 +297,19 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
+ 		},
+ 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+ 					ACPI_QUIRK_UART1_TTY_UART2_SKIP |
+-					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
++					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
++					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
++	},
++	{
++		/* Lenovo Yoga Book X90F/L */
++		.matches = {
++			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
++			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
++			DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
++		},
++		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
++					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
++					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
+ 	},
+ 	{
+ 		.matches = {
+@@ -294,7 +317,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
+ 			DMI_MATCH(DMI_PRODUCT_NAME, "TF103C"),
+ 		},
+ 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+-					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
++					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
++					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
+ 	},
+ 	{
+ 		/* Lenovo Yoga Tablet 2 1050F/L */
+@@ -336,7 +360,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
+ 			DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"),
+ 		},
+ 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+-					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
++					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
++					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
+ 	},
+ 	{
+ 		/* Whitelabel (sold as various brands) TM800A550L */
+@@ -413,6 +438,20 @@ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *s
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(acpi_quirk_skip_serdev_enumeration);
++
++bool acpi_quirk_skip_gpio_event_handlers(void)
++{
++	const struct dmi_system_id *dmi_id;
++	long quirks;
++
++	dmi_id = dmi_first_match(acpi_quirk_skip_dmi_ids);
++	if (!dmi_id)
++		return false;
++
++	quirks = (unsigned long)dmi_id->driver_data;
++	return (quirks & ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS);
++}
++EXPORT_SYMBOL_GPL(acpi_quirk_skip_gpio_event_handlers);
+ #endif
+ 
+ /* Lists of PMIC ACPI HIDs with an (often better) native charger driver */
+diff --git a/drivers/block/loop.c b/drivers/block/loop.c
+index eabbc3bdec221..4916fe78ab8fa 100644
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -1010,9 +1010,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
+ 	/* This is safe, since we have a reference from open(). */
+ 	__module_get(THIS_MODULE);
+ 
+-	/* suppress uevents while reconfiguring the device */
+-	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+-
+ 	/*
+ 	 * If we don't hold exclusive handle for the device, upgrade to it
+ 	 * here to avoid changing device under exclusive owner.
+@@ -1067,6 +1064,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
+ 		}
+ 	}
+ 
++	/* suppress uevents while reconfiguring the device */
++	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
++
+ 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+ 	set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
+ 
+@@ -1109,17 +1109,17 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
+ 	if (partscan)
+ 		clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
+ 
++	/* enable and uncork uevent now that we are done */
++	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
++
+ 	loop_global_unlock(lo, is_loop);
+ 	if (partscan)
+ 		loop_reread_partitions(lo);
++
+ 	if (!(mode & FMODE_EXCL))
+ 		bd_abort_claiming(bdev, loop_configure);
+ 
+-	error = 0;
+-done:
+-	/* enable and uncork uevent now that we are done */
+-	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+-	return error;
++	return 0;
+ 
+ out_unlock:
+ 	loop_global_unlock(lo, is_loop);
+@@ -1130,7 +1130,7 @@ out_putf:
+ 	fput(file);
+ 	/* This is safe: open() is still holding a reference. */
+ 	module_put(THIS_MODULE);
+-	goto done;
++	return error;
+ }
+ 
+ static void __loop_clr_fd(struct loop_device *lo, bool release)
+diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
+index 34ff048e70d0e..7c9175619a1dc 100644
+--- a/drivers/gpio/gpiolib-acpi.c
++++ b/drivers/gpio/gpiolib-acpi.c
+@@ -536,6 +536,9 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
+ 	if (ACPI_FAILURE(status))
+ 		return;
+ 
++	if (acpi_quirk_skip_gpio_event_handlers())
++		return;
++
+ 	acpi_walk_resources(handle, METHOD_NAME__AEI,
+ 			    acpi_gpiochip_alloc_event, acpi_gpio);
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+index f873692071032..00a92e935ff0f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+@@ -981,7 +981,12 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
+  */
+ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+ {
+-	if (adev->flags & AMD_IS_APU)
++	if ((adev->flags & AMD_IS_APU) &&
++	    adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */
++		return false;
++
++	if ((adev->flags & AMD_IS_APU) &&
++	    amdgpu_acpi_is_s3_active(adev))
+ 		return false;
+ 
+ 	if (amdgpu_sriov_vf(adev))
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+index 2b9d806e23afb..10a0a510910b6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+@@ -123,6 +123,8 @@ enum AMDGIM_FEATURE_FLAG {
+ 	AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
+ 	/* Indirect Reg Access enabled */
+ 	AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
++	/* AV1 Support MODE*/
++	AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
+ };
+ 
+ enum AMDGIM_REG_ACCESS_FLAG {
+@@ -321,6 +323,8 @@ static inline bool is_virtual_machine(void)
+ 	((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
+ #define amdgpu_sriov_is_normal(adev) \
+ 	((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
++#define amdgpu_sriov_is_av1_support(adev) \
++	((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
+ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
+ void amdgpu_virt_init_setting(struct amdgpu_device *adev);
+ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+index 6c97148ca0ed3..24d42d24e6a01 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+@@ -93,7 +93,8 @@ union amd_sriov_msg_feature_flags {
+ 		uint32_t mm_bw_management  : 1;
+ 		uint32_t pp_one_vf_mode	   : 1;
+ 		uint32_t reg_indirect_acc  : 1;
+-		uint32_t reserved	   : 26;
++		uint32_t av1_support       : 1;
++		uint32_t reserved	   : 25;
+ 	} flags;
+ 	uint32_t all;
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
+index 3d938b52178e3..9eedc1a1494c0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
++++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
+@@ -101,6 +101,59 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 =
+ 	.codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1,
+ };
+ 
++/* SRIOV SOC21, not const since data is controlled by host */
++static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
++};
++
++static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
++};
++
++static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = {
++	.codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
++	.codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
++};
++
++static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = {
++	.codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
++	.codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
++};
++
++static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
++};
++
++static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
++	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
++};
++
++static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = {
++	.codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0),
++	.codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
++};
++
++static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = {
++	.codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1),
++	.codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
++};
++
+ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ 				 const struct amdgpu_video_codecs **codecs)
+ {
+@@ -111,16 +164,31 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ 	case IP_VERSION(4, 0, 0):
+ 	case IP_VERSION(4, 0, 2):
+ 	case IP_VERSION(4, 0, 4):
+-		if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+-			if (encode)
+-				*codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
+-			else
+-				*codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
++		if (amdgpu_sriov_vf(adev)) {
++			if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
++			!amdgpu_sriov_is_av1_support(adev)) {
++				if (encode)
++					*codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1;
++				else
++					*codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1;
++			} else {
++				if (encode)
++					*codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0;
++				else
++					*codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0;
++			}
+ 		} else {
+-			if (encode)
+-				*codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+-			else
+-				*codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
++			if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) {
++				if (encode)
++					*codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
++				else
++					*codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
++			} else {
++				if (encode)
++					*codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
++				else
++					*codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
++			}
+ 		}
+ 		return 0;
+ 	default:
+@@ -729,8 +797,23 @@ static int soc21_common_late_init(void *handle)
+ {
+ 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ 
+-	if (amdgpu_sriov_vf(adev))
++	if (amdgpu_sriov_vf(adev)) {
+ 		xgpu_nv_mailbox_get_irq(adev);
++		if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
++		!amdgpu_sriov_is_av1_support(adev)) {
++			amdgpu_virt_update_sriov_video_codec(adev,
++							     sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
++							     ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
++							     sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
++							     ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1));
++		} else {
++			amdgpu_virt_update_sriov_video_codec(adev,
++							     sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
++							     ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
++							     sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
++							     ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0));
++		}
++	}
+ 
+ 	return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index f79b8e964140e..e191d38f3da62 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -1298,14 +1298,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+ 		args->n_success = i+1;
+ 	}
+ 
+-	mutex_unlock(&p->mutex);
+-
+ 	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
+ 	if (err) {
+ 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
+ 		goto sync_memory_failed;
+ 	}
+ 
++	mutex_unlock(&p->mutex);
++
+ 	/* Flush TLBs after waiting for the page table updates to complete */
+ 	for (i = 0; i < args->n_devices; i++) {
+ 		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+@@ -1321,9 +1321,9 @@ get_process_device_data_failed:
+ bind_process_to_device_failed:
+ get_mem_obj_from_handle_failed:
+ map_memory_to_gpu_failed:
++sync_memory_failed:
+ 	mutex_unlock(&p->mutex);
+ copy_from_user_failed:
+-sync_memory_failed:
+ 	kfree(devices_arr);
+ 
+ 	return err;
+@@ -1337,6 +1337,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+ 	void *mem;
+ 	long err = 0;
+ 	uint32_t *devices_arr = NULL, i;
++	bool flush_tlb;
+ 
+ 	if (!args->n_devices) {
+ 		pr_debug("Device IDs array empty\n");
+@@ -1389,16 +1390,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+ 		}
+ 		args->n_success = i+1;
+ 	}
+-	mutex_unlock(&p->mutex);
+ 
+-	if (kfd_flush_tlb_after_unmap(pdd->dev)) {
++	flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev);
++	if (flush_tlb) {
+ 		err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
+ 				(struct kgd_mem *) mem, true);
+ 		if (err) {
+ 			pr_debug("Sync memory failed, wait interrupted by user signal\n");
+ 			goto sync_memory_failed;
+ 		}
++	}
++	mutex_unlock(&p->mutex);
+ 
++	if (flush_tlb) {
+ 		/* Flush TLBs after waiting for the page table updates to complete */
+ 		for (i = 0; i < args->n_devices; i++) {
+ 			peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+@@ -1414,9 +1418,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+ bind_process_to_device_failed:
+ get_mem_obj_from_handle_failed:
+ unmap_memory_from_gpu_failed:
++sync_memory_failed:
+ 	mutex_unlock(&p->mutex);
+ copy_from_user_failed:
+-sync_memory_failed:
+ 	kfree(devices_arr);
+ 	return err;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+index 10048ce16aea4..5c8506f180140 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+@@ -289,7 +289,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
+ static int
+ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+ 			 struct migrate_vma *migrate, struct dma_fence **mfence,
+-			 dma_addr_t *scratch)
++			 dma_addr_t *scratch, uint64_t ttm_res_offset)
+ {
+ 	uint64_t npages = migrate->npages;
+ 	struct device *dev = adev->dev;
+@@ -299,19 +299,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+ 	uint64_t i, j;
+ 	int r;
+ 
+-	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+-		 prange->last);
++	pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
++		 prange->last, ttm_res_offset);
+ 
+ 	src = scratch;
+ 	dst = (uint64_t *)(scratch + npages);
+ 
+-	r = svm_range_vram_node_new(adev, prange, true);
+-	if (r) {
+-		dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
+-		goto out;
+-	}
+-
+-	amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
++	amdgpu_res_first(prange->ttm_res, ttm_res_offset,
+ 			 npages << PAGE_SHIFT, &cursor);
+ 	for (i = j = 0; i < npages; i++) {
+ 		struct page *spage;
+@@ -391,14 +385,14 @@ out_free_vram_pages:
+ 		migrate->dst[i + 3] = 0;
+ 	}
+ #endif
+-out:
++
+ 	return r;
+ }
+ 
+ static long
+ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+ 			struct vm_area_struct *vma, uint64_t start,
+-			uint64_t end, uint32_t trigger)
++			uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
+ {
+ 	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+ 	uint64_t npages = (end - start) >> PAGE_SHIFT;
+@@ -451,7 +445,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+ 	else
+ 		pr_debug("0x%lx pages migrated\n", cpages);
+ 
+-	r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
++	r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset);
+ 	migrate_vma_pages(&migrate);
+ 
+ 	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+@@ -499,6 +493,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+ 	unsigned long addr, start, end;
+ 	struct vm_area_struct *vma;
+ 	struct amdgpu_device *adev;
++	uint64_t ttm_res_offset;
+ 	unsigned long cpages = 0;
+ 	long r = 0;
+ 
+@@ -520,6 +515,13 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+ 	start = prange->start << PAGE_SHIFT;
+ 	end = (prange->last + 1) << PAGE_SHIFT;
+ 
++	r = svm_range_vram_node_new(adev, prange, true);
++	if (r) {
++		dev_dbg(adev->dev, "fail %ld to alloc vram\n", r);
++		return r;
++	}
++	ttm_res_offset = prange->offset << PAGE_SHIFT;
++
+ 	for (addr = start; addr < end;) {
+ 		unsigned long next;
+ 
+@@ -528,18 +530,21 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+ 			break;
+ 
+ 		next = min(vma->vm_end, end);
+-		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
++		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset);
+ 		if (r < 0) {
+ 			pr_debug("failed %ld to migrate\n", r);
+ 			break;
+ 		} else {
+ 			cpages += r;
+ 		}
++		ttm_res_offset += next - addr;
+ 		addr = next;
+ 	}
+ 
+ 	if (cpages)
+ 		prange->actual_loc = best_loc;
++	else
++		svm_range_vram_node_free(prange);
+ 
+ 	return r < 0 ? r : 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+index 09b966dc37681..aee2212e52f69 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+@@ -77,6 +77,7 @@ err_ioctl:
+ 
+ static void kfd_exit(void)
+ {
++	kfd_cleanup_processes();
+ 	kfd_debugfs_fini();
+ 	kfd_process_destroy_wq();
+ 	kfd_procfs_shutdown();
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 552c3ac85a132..7dc55919993c0 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -926,6 +926,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev);
+ 
+ int kfd_process_create_wq(void);
+ void kfd_process_destroy_wq(void);
++void kfd_cleanup_processes(void);
+ struct kfd_process *kfd_create_process(struct file *filep);
+ struct kfd_process *kfd_get_process(const struct task_struct *task);
+ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 51b1683ac5c1e..4d9f2d1c49b1d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn)
+ 	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
+ }
+ 
++static void kfd_process_notifier_release_internal(struct kfd_process *p)
++{
++	cancel_delayed_work_sync(&p->eviction_work);
++	cancel_delayed_work_sync(&p->restore_work);
++
++	/* Indicate to other users that MM is no longer valid */
++	p->mm = NULL;
++
++	mmu_notifier_put(&p->mmu_notifier);
++}
++
+ static void kfd_process_notifier_release(struct mmu_notifier *mn,
+ 					struct mm_struct *mm)
+ {
+@@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
+ 		return;
+ 
+ 	mutex_lock(&kfd_processes_mutex);
++	/*
++	 * Do early return if table is empty.
++	 *
++	 * This could potentially happen if this function is called concurrently
++	 * by mmu_notifier and by kfd_cleanup_pocesses.
++	 *
++	 */
++	if (hash_empty(kfd_processes_table)) {
++		mutex_unlock(&kfd_processes_mutex);
++		return;
++	}
+ 	hash_del_rcu(&p->kfd_processes);
+ 	mutex_unlock(&kfd_processes_mutex);
+ 	synchronize_srcu(&kfd_processes_srcu);
+ 
+-	cancel_delayed_work_sync(&p->eviction_work);
+-	cancel_delayed_work_sync(&p->restore_work);
+-
+-	/* Indicate to other users that MM is no longer valid */
+-	p->mm = NULL;
+-
+-	mmu_notifier_put(&p->mmu_notifier);
++	kfd_process_notifier_release_internal(p);
+ }
+ 
+ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
+@@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
+ 	.free_notifier = kfd_process_free_notifier,
+ };
+ 
++/*
++ * This code handles the case when driver is being unloaded before all
++ * mm_struct are released.  We need to safely free the kfd_process and
++ * avoid race conditions with mmu_notifier that might try to free them.
++ *
++ */
++void kfd_cleanup_processes(void)
++{
++	struct kfd_process *p;
++	struct hlist_node *p_temp;
++	unsigned int temp;
++	HLIST_HEAD(cleanup_list);
++
++	/*
++	 * Move all remaining kfd_process from the process table to a
++	 * temp list for processing.   Once done, callback from mmu_notifier
++	 * release will not see the kfd_process in the table and do early return,
++	 * avoiding double free issues.
++	 */
++	mutex_lock(&kfd_processes_mutex);
++	hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
++		hash_del_rcu(&p->kfd_processes);
++		synchronize_srcu(&kfd_processes_srcu);
++		hlist_add_head(&p->kfd_processes, &cleanup_list);
++	}
++	mutex_unlock(&kfd_processes_mutex);
++
++	hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes)
++		kfd_process_notifier_release_internal(p);
++
++	/*
++	 * Ensures that all outstanding free_notifier get called, triggering
++	 * the release of the kfd_process struct.
++	 */
++	mmu_notifier_synchronize();
++}
++
+ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
+ {
+ 	unsigned long  offset;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+index 5137476ec18e6..4236539d9f932 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+@@ -218,8 +218,8 @@ static int init_user_queue(struct process_queue_manager *pqm,
+ 	return 0;
+ 
+ cleanup:
+-	if (dev->shared_resources.enable_mes)
+-		uninit_queue(*q);
++	uninit_queue(*q);
++	*q = NULL;
+ 	return retval;
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+index a7fd98f57f94c..dc62375a8e2c4 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+@@ -495,7 +495,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
+ 	link->dp.mst_enabled = config->mst_enabled;
+ 	link->dp.usb4_enabled = config->usb4_enabled;
+ 	display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
+-	link->adjust.auth_delay = 0;
++	link->adjust.auth_delay = 2;
+ 	link->adjust.hdcp1.disable = 0;
+ 	conn_state = aconnector->base.state;
+ 
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+index abdbd4352f6f3..60dd88666437d 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+@@ -208,6 +208,21 @@ bool needs_dsc_aux_workaround(struct dc_link *link)
+ 	return false;
+ }
+ 
++bool is_synaptics_cascaded_panamera(struct dc_link *link, struct drm_dp_mst_port *port)
++{
++	u8 branch_vendor_data[4] = { 0 }; // Vendor data 0x50C ~ 0x50F
++
++	if (drm_dp_dpcd_read(port->mgr->aux, DP_BRANCH_VENDOR_SPECIFIC_START, &branch_vendor_data, 4) == 4) {
++		if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
++				IS_SYNAPTICS_CASCADED_PANAMERA(link->dpcd_caps.branch_dev_name, branch_vendor_data)) {
++			DRM_INFO("Synaptics Cascaded MST hub\n");
++			return true;
++		}
++	}
++
++	return false;
++}
++
+ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector)
+ {
+ 	struct dc_sink *dc_sink = aconnector->dc_sink;
+@@ -231,6 +246,10 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
+ 	    needs_dsc_aux_workaround(aconnector->dc_link))
+ 		aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux;
+ 
++	/* synaptics cascaded MST hub case */
++	if (!aconnector->dsc_aux && is_synaptics_cascaded_panamera(aconnector->dc_link, port))
++		aconnector->dsc_aux = port->mgr->aux;
++
+ 	if (!aconnector->dsc_aux)
+ 		return false;
+ 
+@@ -627,12 +646,25 @@ struct dsc_mst_fairness_params {
+ 	struct amdgpu_dm_connector *aconnector;
+ };
+ 
+-static int kbps_to_peak_pbn(int kbps)
++static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link)
++{
++	u8 link_coding_cap;
++	uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B;
++
++	link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link);
++	if (link_coding_cap == DP_128b_132b_ENCODING)
++		fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B;
++
++	return fec_overhead_multiplier_x1000;
++}
++
++static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000)
+ {
+ 	u64 peak_kbps = kbps;
+ 
+ 	peak_kbps *= 1006;
+-	peak_kbps = div_u64(peak_kbps, 1000);
++	peak_kbps *= fec_overhead_multiplier_x1000;
++	peak_kbps = div_u64(peak_kbps, 1000 * 1000);
+ 	return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000));
+ }
+ 
+@@ -726,11 +758,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state,
+ 	int link_timeslots_used;
+ 	int fair_pbn_alloc;
+ 	int ret = 0;
++	uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
+ 
+ 	for (i = 0; i < count; i++) {
+ 		if (vars[i + k].dsc_enabled) {
+ 			initial_slack[i] =
+-			kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn;
++			kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn;
+ 			bpp_increased[i] = false;
+ 			remaining_to_increase += 1;
+ 		} else {
+@@ -826,6 +859,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
+ 	int next_index;
+ 	int remaining_to_try = 0;
+ 	int ret;
++	uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
+ 
+ 	for (i = 0; i < count; i++) {
+ 		if (vars[i + k].dsc_enabled
+@@ -855,7 +889,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
+ 		if (next_index == -1)
+ 			break;
+ 
+-		vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps);
++		vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
+ 		ret = drm_dp_atomic_find_time_slots(state,
+ 						    params[next_index].port->mgr,
+ 						    params[next_index].port,
+@@ -868,7 +902,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
+ 			vars[next_index].dsc_enabled = false;
+ 			vars[next_index].bpp_x16 = 0;
+ 		} else {
+-			vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps);
++			vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps, fec_overhead_multiplier_x1000);
+ 			ret = drm_dp_atomic_find_time_slots(state,
+ 							    params[next_index].port->mgr,
+ 							    params[next_index].port,
+@@ -897,6 +931,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
+ 	int count = 0;
+ 	int i, k, ret;
+ 	bool debugfs_overwrite = false;
++	uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
+ 
+ 	memset(params, 0, sizeof(params));
+ 
+@@ -958,7 +993,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
+ 	/* Try no compression */
+ 	for (i = 0; i < count; i++) {
+ 		vars[i + k].aconnector = params[i].aconnector;
+-		vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
++		vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
+ 		vars[i + k].dsc_enabled = false;
+ 		vars[i + k].bpp_x16 = 0;
+ 		ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port,
+@@ -977,7 +1012,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
+ 	/* Try max compression */
+ 	for (i = 0; i < count; i++) {
+ 		if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) {
+-			vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps);
++			vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000);
+ 			vars[i + k].dsc_enabled = true;
+ 			vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16;
+ 			ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
+@@ -985,7 +1020,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
+ 			if (ret < 0)
+ 				return ret;
+ 		} else {
+-			vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
++			vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
+ 			vars[i + k].dsc_enabled = false;
+ 			vars[i + k].bpp_x16 = 0;
+ 			ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+index 97fd70df531bf..1e4ede1e57abd 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+@@ -34,6 +34,21 @@
+ #define SYNAPTICS_RC_OFFSET        0x4BC
+ #define SYNAPTICS_RC_DATA          0x4C0
+ 
++#define DP_BRANCH_VENDOR_SPECIFIC_START 0x50C
++
++/**
++ * Panamera MST Hub detection
++ * Offset DPCD 050Eh == 0x5A indicates cascaded MST hub case
++ * Check from beginning of branch device vendor specific field (050Ch)
++ */
++#define IS_SYNAPTICS_PANAMERA(branchDevName) (((int)branchDevName[4] & 0xF0) == 0x50 ? 1 : 0)
++#define BRANCH_HW_REVISION_PANAMERA_A2 0x10
++#define SYNAPTICS_CASCADED_HUB_ID  0x5A
++#define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0)
++
++#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B	1031
++#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B	1000
++
+ struct amdgpu_display_manager;
+ struct amdgpu_dm_connector;
+ 
+diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+index 7031db145a77a..3524b5811682a 100644
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+@@ -91,7 +91,15 @@ static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+ static int etnaviv_gem_prime_mmap_obj(struct etnaviv_gem_object *etnaviv_obj,
+ 		struct vm_area_struct *vma)
+ {
+-	return dma_buf_mmap(etnaviv_obj->base.dma_buf, vma, 0);
++	int ret;
++
++	ret = dma_buf_mmap(etnaviv_obj->base.dma_buf, vma, 0);
++	if (!ret) {
++		/* Drop the reference acquired by drm_gem_mmap_obj(). */
++		drm_gem_object_put(&etnaviv_obj->base);
++	}
++
++	return ret;
+ }
+ 
+ static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
+diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
+index 250e83f1f5ac0..c3928d28cd443 100644
+--- a/drivers/gpu/drm/i915/display/intel_color.c
++++ b/drivers/gpu/drm/i915/display/intel_color.c
+@@ -514,6 +514,22 @@ static void icl_color_commit_noarm(const struct intel_crtc_state *crtc_state)
+ 	icl_load_csc_matrix(crtc_state);
+ }
+ 
++static void skl_color_commit_noarm(const struct intel_crtc_state *crtc_state)
++{
++	/*
++	 * Possibly related to display WA #1184, SKL CSC loses the latched
++	 * CSC coeff/offset register values if the CSC registers are disarmed
++	 * between DC5 exit and PSR exit. This will cause the plane(s) to
++	 * output all black (until CSC_MODE is rearmed and properly latched).
++	 * Once PSR exit (and proper register latching) has occurred the
++	 * danger is over. Thus when PSR is enabled the CSC coeff/offset
++	 * register programming will be peformed from skl_color_commit_arm()
++	 * which is called after PSR exit.
++	 */
++	if (!crtc_state->has_psr)
++		ilk_load_csc_matrix(crtc_state);
++}
++
+ static void ilk_color_commit_noarm(const struct intel_crtc_state *crtc_state)
+ {
+ 	ilk_load_csc_matrix(crtc_state);
+@@ -556,6 +572,9 @@ static void skl_color_commit_arm(const struct intel_crtc_state *crtc_state)
+ 	enum pipe pipe = crtc->pipe;
+ 	u32 val = 0;
+ 
++	if (crtc_state->has_psr)
++		ilk_load_csc_matrix(crtc_state);
++
+ 	/*
+ 	 * We don't (yet) allow userspace to control the pipe background color,
+ 	 * so force it to black, but apply pipe gamma and CSC appropriately
+@@ -574,6 +593,25 @@ static void skl_color_commit_arm(const struct intel_crtc_state *crtc_state)
+ 			  crtc_state->csc_mode);
+ }
+ 
++static void icl_color_commit_arm(const struct intel_crtc_state *crtc_state)
++{
++	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
++	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
++	enum pipe pipe = crtc->pipe;
++
++	/*
++	 * We don't (yet) allow userspace to control the pipe background color,
++	 * so force it to black.
++	 */
++	intel_de_write(i915, SKL_BOTTOM_COLOR(pipe), 0);
++
++	intel_de_write(i915, GAMMA_MODE(crtc->pipe),
++		       crtc_state->gamma_mode);
++
++	intel_de_write_fw(i915, PIPE_CSC_MODE(crtc->pipe),
++			  crtc_state->csc_mode);
++}
++
+ static struct drm_property_blob *
+ create_linear_lut(struct drm_i915_private *i915, int lut_size)
+ {
+@@ -2287,14 +2325,14 @@ static const struct intel_color_funcs i9xx_color_funcs = {
+ static const struct intel_color_funcs icl_color_funcs = {
+ 	.color_check = icl_color_check,
+ 	.color_commit_noarm = icl_color_commit_noarm,
+-	.color_commit_arm = skl_color_commit_arm,
++	.color_commit_arm = icl_color_commit_arm,
+ 	.load_luts = icl_load_luts,
+ 	.read_luts = icl_read_luts,
+ };
+ 
+ static const struct intel_color_funcs glk_color_funcs = {
+ 	.color_check = glk_color_check,
+-	.color_commit_noarm = ilk_color_commit_noarm,
++	.color_commit_noarm = skl_color_commit_noarm,
+ 	.color_commit_arm = skl_color_commit_arm,
+ 	.load_luts = glk_load_luts,
+ 	.read_luts = glk_read_luts,
+@@ -2302,7 +2340,7 @@ static const struct intel_color_funcs glk_color_funcs = {
+ 
+ static const struct intel_color_funcs skl_color_funcs = {
+ 	.color_check = ivb_color_check,
+-	.color_commit_noarm = ilk_color_commit_noarm,
++	.color_commit_noarm = skl_color_commit_noarm,
+ 	.color_commit_arm = skl_color_commit_arm,
+ 	.load_luts = bdw_load_luts,
+ 	.read_luts = NULL,
+diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
+index 8b6994853f6f8..f0aad2403109b 100644
+--- a/drivers/gpu/drm/i915/display/intel_display.c
++++ b/drivers/gpu/drm/i915/display/intel_display.c
+@@ -7107,6 +7107,8 @@ static void intel_update_crtc(struct intel_atomic_state *state,
+ 
+ 	intel_fbc_update(state, crtc);
+ 
++	drm_WARN_ON(&i915->drm, !intel_display_power_is_enabled(i915, POWER_DOMAIN_DC_OFF));
++
+ 	if (!modeset &&
+ 	    intel_crtc_needs_color_update(new_crtc_state))
+ 		intel_color_commit_noarm(new_crtc_state);
+@@ -7480,8 +7482,28 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
+ 	drm_atomic_helper_wait_for_dependencies(&state->base);
+ 	drm_dp_mst_atomic_wait_for_dependencies(&state->base);
+ 
+-	if (state->modeset)
+-		wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
++	/*
++	 * During full modesets we write a lot of registers, wait
++	 * for PLLs, etc. Doing that while DC states are enabled
++	 * is not a good idea.
++	 *
++	 * During fastsets and other updates we also need to
++	 * disable DC states due to the following scenario:
++	 * 1. DC5 exit and PSR exit happen
++	 * 2. Some or all _noarm() registers are written
++	 * 3. Due to some long delay PSR is re-entered
++	 * 4. DC5 entry -> DMC saves the already written new
++	 *    _noarm() registers and the old not yet written
++	 *    _arm() registers
++	 * 5. DC5 exit -> DMC restores a mixture of old and
++	 *    new register values and arms the update
++	 * 6. PSR exit -> hardware latches a mixture of old and
++	 *    new register values -> corrupted frame, or worse
++	 * 7. New _arm() registers are finally written
++	 * 8. Hardware finally latches a complete set of new
++	 *    register values, and subsequent frames will be OK again
++	 */
++	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_DC_OFF);
+ 
+ 	intel_atomic_prepare_plane_clear_colors(state);
+ 
+@@ -7625,8 +7647,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
+ 		 * the culprit.
+ 		 */
+ 		intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore);
+-		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref);
+ 	}
++	intel_display_power_put(dev_priv, POWER_DOMAIN_DC_OFF, wakeref);
+ 	intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref);
+ 
+ 	/*
+diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
+index ad1a37b515fb1..2a9f40a2b3ed0 100644
+--- a/drivers/gpu/drm/i915/display/intel_dpt.c
++++ b/drivers/gpu/drm/i915/display/intel_dpt.c
+@@ -301,6 +301,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
+ 	vm->pte_encode = gen8_ggtt_pte_encode;
+ 
+ 	dpt->obj = dpt_obj;
++	dpt->obj->is_dpt = true;
+ 
+ 	return &dpt->vm;
+ }
+@@ -309,5 +310,6 @@ void intel_dpt_destroy(struct i915_address_space *vm)
+ {
+ 	struct i915_dpt *dpt = i915_vm_to_dpt(vm);
+ 
++	dpt->obj->is_dpt = false;
+ 	i915_vm_put(&dpt->vm);
+ }
+diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
+index 70624b4b2d38c..c5d41fd51118f 100644
+--- a/drivers/gpu/drm/i915/display/intel_tc.c
++++ b/drivers/gpu/drm/i915/display/intel_tc.c
+@@ -436,9 +436,9 @@ static bool icl_tc_phy_is_owned(struct intel_digital_port *dig_port)
+ 				PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia));
+ 	if (val == 0xffffffff) {
+ 		drm_dbg_kms(&i915->drm,
+-			    "Port %s: PHY in TCCOLD, assume safe mode\n",
++			    "Port %s: PHY in TCCOLD, assume not owned\n",
+ 			    dig_port->tc_port_name);
+-		return true;
++		return false;
+ 	}
+ 
+ 	return val & DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx);
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
+index 8949fb0a944f6..3198b64ad7dbc 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
+@@ -127,7 +127,8 @@ i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
+ 
+ 	memcpy(map, data, size);
+ 
+-	i915_gem_object_unpin_map(obj);
++	i915_gem_object_flush_map(obj);
++	__i915_gem_object_release_map(obj);
+ 
+ 	return obj;
+ }
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
+index 3db53769864c2..2f53a68348217 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
++++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
+@@ -319,7 +319,7 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj)
+ static inline bool
+ i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
+ {
+-	return READ_ONCE(obj->frontbuffer);
++	return READ_ONCE(obj->frontbuffer) || obj->is_dpt;
+ }
+ 
+ static inline unsigned int
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+index ab4c2f90a5643..1d0d8ee9d707d 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
++++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+@@ -491,6 +491,9 @@ struct drm_i915_gem_object {
+ 	 */
+ 	unsigned int cache_dirty:1;
+ 
++	/* @is_dpt: Object houses a display page table (DPT) */
++	unsigned int is_dpt:1;
++
+ 	/**
+ 	 * @read_domains: Read memory domains.
+ 	 *
+diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
+index 9ad3bc7201cba..fc73cfe0e39bb 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rps.c
++++ b/drivers/gpu/drm/i915/gt/intel_rps.c
+@@ -2074,16 +2074,6 @@ void intel_rps_sanitize(struct intel_rps *rps)
+ 		rps_disable_interrupts(rps);
+ }
+ 
+-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
+-{
+-	struct drm_i915_private *i915 = rps_to_i915(rps);
+-	i915_reg_t rpstat;
+-
+-	rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+-
+-	return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
+-}
+-
+ u32 intel_rps_read_rpstat(struct intel_rps *rps)
+ {
+ 	struct drm_i915_private *i915 = rps_to_i915(rps);
+@@ -2094,7 +2084,7 @@ u32 intel_rps_read_rpstat(struct intel_rps *rps)
+ 	return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
+ }
+ 
+-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
++static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+ {
+ 	struct drm_i915_private *i915 = rps_to_i915(rps);
+ 	u32 cagf;
+@@ -2117,10 +2107,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+ 	return cagf;
+ }
+ 
+-static u32 read_cagf(struct intel_rps *rps)
++static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
+ {
+ 	struct drm_i915_private *i915 = rps_to_i915(rps);
+ 	struct intel_uncore *uncore = rps_to_uncore(rps);
++	i915_reg_t r = INVALID_MMIO_REG;
+ 	u32 freq;
+ 
+ 	/*
+@@ -2128,22 +2119,30 @@ static u32 read_cagf(struct intel_rps *rps)
+ 	 * registers will return 0 freq when GT is in RC6
+ 	 */
+ 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+-		freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
++		r = MTL_MIRROR_TARGET_WP1;
+ 	} else if (GRAPHICS_VER(i915) >= 12) {
+-		freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
++		r = GEN12_RPSTAT1;
+ 	} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ 		vlv_punit_get(i915);
+ 		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ 		vlv_punit_put(i915);
+ 	} else if (GRAPHICS_VER(i915) >= 6) {
+-		freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
++		r = GEN6_RPSTAT1;
+ 	} else {
+-		freq = intel_uncore_read(uncore, MEMSTAT_ILK);
++		r = MEMSTAT_ILK;
+ 	}
+ 
++	if (i915_mmio_reg_valid(r))
++		freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r);
++
+ 	return intel_rps_get_cagf(rps, freq);
+ }
+ 
++static u32 read_cagf(struct intel_rps *rps)
++{
++	return __read_cagf(rps, true);
++}
++
+ u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
+ {
+ 	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
+@@ -2156,7 +2155,12 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
+ 	return freq;
+ }
+ 
+-u32 intel_rps_read_punit_req(struct intel_rps *rps)
++u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
++{
++	return intel_gpu_freq(rps, __read_cagf(rps, false));
++}
++
++static u32 intel_rps_read_punit_req(struct intel_rps *rps)
+ {
+ 	struct intel_uncore *uncore = rps_to_uncore(rps);
+ 	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
+diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
+index 9e1cad9ba0e9c..d86ddfee095ed 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rps.h
++++ b/drivers/gpu/drm/i915/gt/intel_rps.h
+@@ -34,8 +34,8 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
+ 
+ int intel_gpu_freq(struct intel_rps *rps, int val);
+ int intel_freq_opcode(struct intel_rps *rps, int val);
+-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
+ u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
++u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
+ u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
+ u32 intel_rps_get_min_frequency(struct intel_rps *rps);
+ u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
+@@ -46,10 +46,8 @@ int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val);
+ u32 intel_rps_get_rp0_frequency(struct intel_rps *rps);
+ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
+ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
+-u32 intel_rps_read_punit_req(struct intel_rps *rps);
+ u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
+ u32 intel_rps_read_rpstat(struct intel_rps *rps);
+-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
+ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps);
+ void intel_rps_raise_unslice(struct intel_rps *rps);
+ void intel_rps_lower_unslice(struct intel_rps *rps);
+diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
+index 125b6ca25a756..7d5e2c53c23a7 100644
+--- a/drivers/gpu/drm/i915/i915_perf.c
++++ b/drivers/gpu/drm/i915/i915_perf.c
+@@ -1592,9 +1592,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
+ 	/*
+ 	 * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6.
+ 	 */
+-	if (intel_uc_uses_guc_rc(&gt->uc) &&
+-	    (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+-	     IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)))
++	if (stream->override_gucrc)
+ 		drm_WARN_ON(&gt->i915->drm,
+ 			    intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc));
+ 
+@@ -3293,8 +3291,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
+ 		if (ret) {
+ 			drm_dbg(&stream->perf->i915->drm,
+ 				"Unable to override gucrc mode\n");
+-			goto err_config;
++			goto err_gucrc;
+ 		}
++
++		stream->override_gucrc = true;
+ 	}
+ 
+ 	ret = alloc_oa_buffer(stream);
+@@ -3333,11 +3333,15 @@ err_enable:
+ 	free_oa_buffer(stream);
+ 
+ err_oa_buf_alloc:
+-	free_oa_configs(stream);
++	if (stream->override_gucrc)
++		intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc);
+ 
++err_gucrc:
+ 	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
+ 	intel_engine_pm_put(stream->engine);
+ 
++	free_oa_configs(stream);
++
+ err_config:
+ 	free_noa_wait(stream);
+ 
+diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
+index ca150b7af3f29..4d5d8c365d9e2 100644
+--- a/drivers/gpu/drm/i915/i915_perf_types.h
++++ b/drivers/gpu/drm/i915/i915_perf_types.h
+@@ -316,6 +316,12 @@ struct i915_perf_stream {
+ 	 * buffer should be checked for available data.
+ 	 */
+ 	u64 poll_oa_period;
++
++	/**
++	 * @override_gucrc: GuC RC has been overridden for the perf stream,
++	 * and we need to restore the default configuration on release.
++	 */
++	bool override_gucrc;
+ };
+ 
+ /**
+diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
+index 52531ab28c5f5..6d422b056f8a8 100644
+--- a/drivers/gpu/drm/i915/i915_pmu.c
++++ b/drivers/gpu/drm/i915/i915_pmu.c
+@@ -393,14 +393,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
+ 		 * case we assume the system is running at the intended
+ 		 * frequency. Fortunately, the read should rarely fail!
+ 		 */
+-		val = intel_rps_read_rpstat_fw(rps);
+-		if (val)
+-			val = intel_rps_get_cagf(rps, val);
+-		else
+-			val = rps->cur_freq;
++		val = intel_rps_read_actual_frequency_fw(rps);
++		if (!val)
++			val = intel_gpu_freq(rps, rps->cur_freq);
+ 
+ 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
+-				intel_gpu_freq(rps, val), period_ns / 1000);
++				val, period_ns / 1000);
+ 	}
+ 
+ 	if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
+diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
+index 40409a29f5b69..91b5ecc575380 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
++++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
+@@ -33,6 +33,7 @@
+ #include <linux/apple-gmux.h>
+ #include <linux/backlight.h>
+ #include <linux/idr.h>
++#include <drm/drm_probe_helper.h>
+ 
+ #include "nouveau_drv.h"
+ #include "nouveau_reg.h"
+@@ -299,8 +300,12 @@ nv50_backlight_init(struct nouveau_backlight *bl,
+ 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
+ 	struct nvif_object *device = &drm->client.device.object;
+ 
++	/*
++	 * Note when this runs the connectors have not been probed yet,
++	 * so nv_conn->base.status is not set yet.
++	 */
+ 	if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)) ||
+-	    nv_conn->base.status != connector_status_connected)
++	    drm_helper_probe_detect(&nv_conn->base, NULL, false) != connector_status_connected)
+ 		return -ENODEV;
+ 
+ 	if (nv_conn->type == DCB_CONNECTOR_eDP) {
+diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
+index f060ac7376e69..cfeb24d40d378 100644
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -168,7 +168,13 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
+ 
+ 	raw_local_irq_enable();
+ 	ret = __intel_idle(dev, drv, index);
+-	raw_local_irq_disable();
++
++	/*
++	 * The lockdep hardirqs state may be changed to 'on' with timer
++	 * tick interrupt followed by __do_softirq(). Use local_irq_disable()
++	 * to keep the hardirqs state correct.
++	 */
++	local_irq_disable();
+ 
+ 	return ret;
+ }
+diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
+index 2959d80f7fdb6..cd36cf7165423 100644
+--- a/drivers/input/joystick/xpad.c
++++ b/drivers/input/joystick/xpad.c
+@@ -779,9 +779,6 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d
+ 	input_report_key(dev, BTN_C, data[8]);
+ 	input_report_key(dev, BTN_Z, data[9]);
+ 
+-	/* Profile button has a value of 0-3, so it is reported as an axis */
+-	if (xpad->mapping & MAP_PROFILE_BUTTON)
+-		input_report_abs(dev, ABS_PROFILE, data[34]);
+ 
+ 	input_sync(dev);
+ }
+@@ -1059,6 +1056,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char
+ 					(__u16) le16_to_cpup((__le16 *)(data + 8)));
+ 		}
+ 
++		/* Profile button has a value of 0-3, so it is reported as an axis */
++		if (xpad->mapping & MAP_PROFILE_BUTTON)
++			input_report_abs(dev, ABS_PROFILE, data[34]);
++
+ 		/* paddle handling */
+ 		/* based on SDL's SDL_hidapi_xboxone.c */
+ 		if (xpad->mapping & MAP_PADDLES) {
+diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
+index 989228b5a0a44..e2c11d9f3868f 100644
+--- a/drivers/input/mouse/alps.c
++++ b/drivers/input/mouse/alps.c
+@@ -852,8 +852,8 @@ static void alps_process_packet_v6(struct psmouse *psmouse)
+ 			x = y = z = 0;
+ 
+ 		/* Divide 4 since trackpoint's speed is too fast */
+-		input_report_rel(dev2, REL_X, (char)x / 4);
+-		input_report_rel(dev2, REL_Y, -((char)y / 4));
++		input_report_rel(dev2, REL_X, (s8)x / 4);
++		input_report_rel(dev2, REL_Y, -((s8)y / 4));
+ 
+ 		psmouse_report_standard_buttons(dev2, packet[3]);
+ 
+@@ -1104,8 +1104,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse)
+ 	    ((packet[3] & 0x20) << 1);
+ 	z = (packet[5] & 0x3f) | ((packet[3] & 0x80) >> 1);
+ 
+-	input_report_rel(dev2, REL_X, (char)x);
+-	input_report_rel(dev2, REL_Y, -((char)y));
++	input_report_rel(dev2, REL_X, (s8)x);
++	input_report_rel(dev2, REL_Y, -((s8)y));
+ 	input_report_abs(dev2, ABS_PRESSURE, z);
+ 
+ 	psmouse_report_standard_buttons(dev2, packet[1]);
+@@ -2294,20 +2294,20 @@ static int alps_get_v3_v7_resolution(struct psmouse *psmouse, int reg_pitch)
+ 	if (reg < 0)
+ 		return reg;
+ 
+-	x_pitch = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */
++	x_pitch = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */
+ 	x_pitch = 50 + 2 * x_pitch; /* In 0.1 mm units */
+ 
+-	y_pitch = (char)reg >> 4; /* sign extend upper 4 bits */
++	y_pitch = (s8)reg >> 4; /* sign extend upper 4 bits */
+ 	y_pitch = 36 + 2 * y_pitch; /* In 0.1 mm units */
+ 
+ 	reg = alps_command_mode_read_reg(psmouse, reg_pitch + 1);
+ 	if (reg < 0)
+ 		return reg;
+ 
+-	x_electrode = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */
++	x_electrode = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */
+ 	x_electrode = 17 + x_electrode;
+ 
+-	y_electrode = (char)reg >> 4; /* sign extend upper 4 bits */
++	y_electrode = (s8)reg >> 4; /* sign extend upper 4 bits */
+ 	y_electrode = 13 + y_electrode;
+ 
+ 	x_phys = x_pitch * (x_electrode - 1); /* In 0.1 mm units */
+diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c
+index 6fd5fff0cbfff..c74b99077d16a 100644
+--- a/drivers/input/mouse/focaltech.c
++++ b/drivers/input/mouse/focaltech.c
+@@ -202,8 +202,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse,
+ 	state->pressed = packet[0] >> 7;
+ 	finger1 = ((packet[0] >> 4) & 0x7) - 1;
+ 	if (finger1 < FOC_MAX_FINGERS) {
+-		state->fingers[finger1].x += (char)packet[1];
+-		state->fingers[finger1].y += (char)packet[2];
++		state->fingers[finger1].x += (s8)packet[1];
++		state->fingers[finger1].y += (s8)packet[2];
+ 	} else {
+ 		psmouse_err(psmouse, "First finger in rel packet invalid: %d\n",
+ 			    finger1);
+@@ -218,8 +218,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse,
+ 	 */
+ 	finger2 = ((packet[3] >> 4) & 0x7) - 1;
+ 	if (finger2 < FOC_MAX_FINGERS) {
+-		state->fingers[finger2].x += (char)packet[4];
+-		state->fingers[finger2].y += (char)packet[5];
++		state->fingers[finger2].x += (s8)packet[4];
++		state->fingers[finger2].y += (s8)packet[5];
+ 	}
+ }
+ 
+diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
+index efc61736099b9..028e45bd050bf 100644
+--- a/drivers/input/serio/i8042-acpipnpio.h
++++ b/drivers/input/serio/i8042-acpipnpio.h
+@@ -610,6 +610,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
+ 		},
+ 		.driver_data = (void *)(SERIO_QUIRK_NOMUX)
+ 	},
++	{
++		/* Fujitsu Lifebook A574/H */
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "FMVA0501PZ"),
++		},
++		.driver_data = (void *)(SERIO_QUIRK_NOMUX)
++	},
+ 	{
+ 		/* Gigabyte M912 */
+ 		.matches = {
+@@ -1116,6 +1124,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
+ 		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ 					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ 	},
++	{
++		/*
++		 * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes
++		 * the keyboard very laggy for ~5 seconds after boot and
++		 * sometimes also after resume.
++		 * However both are required for the keyboard to not fail
++		 * completely sometimes after boot or resume.
++		 */
++		.matches = {
++			DMI_MATCH(DMI_BOARD_NAME, "N150CU"),
++		},
++		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
++	},
+ 	{
+ 		.matches = {
+ 			DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
+@@ -1123,6 +1145,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
+ 		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+ 					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ 	},
++	{
++		/*
++		 * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes
++		 * the keyboard very laggy for ~5 seconds after boot and
++		 * sometimes also after resume.
++		 * However both are required for the keyboard to not fail
++		 * completely sometimes after boot or resume.
++		 */
++		.matches = {
++			DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"),
++		},
++		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
++					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
++	},
+ 	{
+ 		.matches = {
+ 			DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
+diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
+index 8a0a8078de8f7..2e502fcb0d1fd 100644
+--- a/drivers/input/touchscreen/goodix.c
++++ b/drivers/input/touchscreen/goodix.c
+@@ -124,10 +124,18 @@ static const unsigned long goodix_irq_flags[] = {
+ static const struct dmi_system_id nine_bytes_report[] = {
+ #if defined(CONFIG_DMI) && defined(CONFIG_X86)
+ 	{
+-		.ident = "Lenovo YogaBook",
+-		/* YB1-X91L/F and YB1-X90L/F */
++		/* Lenovo Yoga Book X90F / X90L */
+ 		.matches = {
+-			DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9")
++			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
++			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
++			DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
++		}
++	},
++	{
++		/* Lenovo Yoga Book X91F / X91L */
++		.matches = {
++			/* Non exact match to match F + L versions */
++			DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
+ 		}
+ 	},
+ #endif
+diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
+index b00a0ceb2d137..c80c524ad32d7 100644
+--- a/drivers/iommu/intel/dmar.c
++++ b/drivers/iommu/intel/dmar.c
+@@ -1057,7 +1057,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
+ 	}
+ 
+ 	err = -EINVAL;
+-	if (cap_sagaw(iommu->cap) == 0) {
++	if (!cap_sagaw(iommu->cap) &&
++	    (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
+ 		pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
+ 			iommu->name);
+ 		drhd->ignored = 1;
+diff --git a/drivers/md/dm.c b/drivers/md/dm.c
+index fc953013ea260..1b6c3c783a8eb 100644
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -1509,6 +1509,8 @@ static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
+ 		ret = 1;
+ 		break;
+ 	default:
++		if (len)
++			setup_split_accounting(ci, *len);
+ 		/* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
+ 		alloc_multiple_bios(&blist, ci, ti, num_bios);
+ 		while ((clone = bio_list_pop(&blist))) {
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 272cc5d14906f..beab84f0c585c 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -3131,6 +3131,9 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
+ 		err = kstrtouint(buf, 10, (unsigned int *)&slot);
+ 		if (err < 0)
+ 			return err;
++		if (slot < 0)
++			/* overflow */
++			return -ENOSPC;
+ 	}
+ 	if (rdev->mddev->pers && slot == -1) {
+ 		/* Setting 'slot' on an active array requires also
+diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c
+index 142d4c74017c0..d59ecf776715c 100644
+--- a/drivers/media/platform/qcom/venus/firmware.c
++++ b/drivers/media/platform/qcom/venus/firmware.c
+@@ -38,8 +38,8 @@ static void venus_reset_cpu(struct venus_core *core)
+ 	writel(fw_size, wrapper_base + WRAPPER_FW_END_ADDR);
+ 	writel(0, wrapper_base + WRAPPER_CPA_START_ADDR);
+ 	writel(fw_size, wrapper_base + WRAPPER_CPA_END_ADDR);
+-	writel(0, wrapper_base + WRAPPER_NONPIX_START_ADDR);
+-	writel(0, wrapper_base + WRAPPER_NONPIX_END_ADDR);
++	writel(fw_size, wrapper_base + WRAPPER_NONPIX_START_ADDR);
++	writel(fw_size, wrapper_base + WRAPPER_NONPIX_END_ADDR);
+ 
+ 	if (IS_V6(core)) {
+ 		/* Bring XTSS out of reset */
+diff --git a/drivers/mtd/nand/ecc-mxic.c b/drivers/mtd/nand/ecc-mxic.c
+index 8afdca731b874..6b487ffe2f2dc 100644
+--- a/drivers/mtd/nand/ecc-mxic.c
++++ b/drivers/mtd/nand/ecc-mxic.c
+@@ -429,6 +429,7 @@ static int mxic_ecc_data_xfer_wait_for_completion(struct mxic_ecc_engine *mxic)
+ 		mxic_ecc_enable_int(mxic);
+ 		ret = wait_for_completion_timeout(&mxic->complete,
+ 						  msecs_to_jiffies(1000));
++		ret = ret ? 0 : -ETIMEDOUT;
+ 		mxic_ecc_disable_int(mxic);
+ 	} else {
+ 		ret = readl_poll_timeout(mxic->regs + INTRPT_STS, val,
+diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
+index 5ee01231ac4cd..a28574c009003 100644
+--- a/drivers/mtd/nand/raw/meson_nand.c
++++ b/drivers/mtd/nand/raw/meson_nand.c
+@@ -176,6 +176,7 @@ struct meson_nfc {
+ 
+ 	dma_addr_t daddr;
+ 	dma_addr_t iaddr;
++	u32 info_bytes;
+ 
+ 	unsigned long assigned_cs;
+ };
+@@ -503,6 +504,7 @@ static int meson_nfc_dma_buffer_setup(struct nand_chip *nand, void *databuf,
+ 					 nfc->daddr, datalen, dir);
+ 			return ret;
+ 		}
++		nfc->info_bytes = infolen;
+ 		cmd = GENCMDIADDRL(NFC_CMD_AIL, nfc->iaddr);
+ 		writel(cmd, nfc->reg_base + NFC_REG_CMD);
+ 
+@@ -520,8 +522,10 @@ static void meson_nfc_dma_buffer_release(struct nand_chip *nand,
+ 	struct meson_nfc *nfc = nand_get_controller_data(nand);
+ 
+ 	dma_unmap_single(nfc->dev, nfc->daddr, datalen, dir);
+-	if (infolen)
++	if (infolen) {
+ 		dma_unmap_single(nfc->dev, nfc->iaddr, infolen, dir);
++		nfc->info_bytes = 0;
++	}
+ }
+ 
+ static int meson_nfc_read_buf(struct nand_chip *nand, u8 *buf, int len)
+@@ -710,6 +714,8 @@ static void meson_nfc_check_ecc_pages_valid(struct meson_nfc *nfc,
+ 		usleep_range(10, 15);
+ 		/* info is updated by nfc dma engine*/
+ 		smp_rmb();
++		dma_sync_single_for_cpu(nfc->dev, nfc->iaddr, nfc->info_bytes,
++					DMA_FROM_DEVICE);
+ 		ret = *info & ECC_COMPLETE;
+ 	} while (!ret);
+ }
+@@ -991,7 +997,7 @@ static const struct mtd_ooblayout_ops meson_ooblayout_ops = {
+ 
+ static int meson_nfc_clk_init(struct meson_nfc *nfc)
+ {
+-	struct clk_parent_data nfc_divider_parent_data[1];
++	struct clk_parent_data nfc_divider_parent_data[1] = {0};
+ 	struct clk_init_data init = {0};
+ 	int ret;
+ 
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index 003b0ac2854c9..3fffd5da8d3b0 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -958,15 +958,14 @@ int ksz8_fdb_dump(struct ksz_device *dev, int port,
+ 	u16 entries = 0;
+ 	u8 timestamp = 0;
+ 	u8 fid;
+-	u8 member;
+-	struct alu_struct alu;
++	u8 src_port;
++	u8 mac[ETH_ALEN];
+ 
+ 	do {
+-		alu.is_static = false;
+-		ret = ksz8_r_dyn_mac_table(dev, i, alu.mac, &fid, &member,
++		ret = ksz8_r_dyn_mac_table(dev, i, mac, &fid, &src_port,
+ 					   &timestamp, &entries);
+-		if (!ret && (member & BIT(port))) {
+-			ret = cb(alu.mac, alu.fid, alu.is_static, data);
++		if (!ret && port == src_port) {
++			ret = cb(mac, fid, false, data);
+ 			if (ret)
+ 				break;
+ 		}
+diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c
+index 2f4623f3bd852..3698112138b78 100644
+--- a/drivers/net/dsa/microchip/ksz8863_smi.c
++++ b/drivers/net/dsa/microchip/ksz8863_smi.c
+@@ -82,22 +82,16 @@ static const struct regmap_bus regmap_smi[] = {
+ 	{
+ 		.read = ksz8863_mdio_read,
+ 		.write = ksz8863_mdio_write,
+-		.max_raw_read = 1,
+-		.max_raw_write = 1,
+ 	},
+ 	{
+ 		.read = ksz8863_mdio_read,
+ 		.write = ksz8863_mdio_write,
+ 		.val_format_endian_default = REGMAP_ENDIAN_BIG,
+-		.max_raw_read = 2,
+-		.max_raw_write = 2,
+ 	},
+ 	{
+ 		.read = ksz8863_mdio_read,
+ 		.write = ksz8863_mdio_write,
+ 		.val_format_endian_default = REGMAP_ENDIAN_BIG,
+-		.max_raw_read = 4,
+-		.max_raw_write = 4,
+ 	}
+ };
+ 
+@@ -108,7 +102,6 @@ static const struct regmap_config ksz8863_regmap_config[] = {
+ 		.pad_bits = 24,
+ 		.val_bits = 8,
+ 		.cache_type = REGCACHE_NONE,
+-		.use_single_read = 1,
+ 		.lock = ksz_regmap_lock,
+ 		.unlock = ksz_regmap_unlock,
+ 	},
+@@ -118,7 +111,6 @@ static const struct regmap_config ksz8863_regmap_config[] = {
+ 		.pad_bits = 24,
+ 		.val_bits = 16,
+ 		.cache_type = REGCACHE_NONE,
+-		.use_single_read = 1,
+ 		.lock = ksz_regmap_lock,
+ 		.unlock = ksz_regmap_unlock,
+ 	},
+@@ -128,7 +120,6 @@ static const struct regmap_config ksz8863_regmap_config[] = {
+ 		.pad_bits = 24,
+ 		.val_bits = 32,
+ 		.cache_type = REGCACHE_NONE,
+-		.use_single_read = 1,
+ 		.lock = ksz_regmap_lock,
+ 		.unlock = ksz_regmap_unlock,
+ 	}
+diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
+index 19cd05762ab77..8601a9e4e4d2f 100644
+--- a/drivers/net/dsa/microchip/ksz_common.c
++++ b/drivers/net/dsa/microchip/ksz_common.c
+@@ -395,13 +395,13 @@ static const u32 ksz8863_masks[] = {
+ 	[VLAN_TABLE_VALID]		= BIT(19),
+ 	[STATIC_MAC_TABLE_VALID]	= BIT(19),
+ 	[STATIC_MAC_TABLE_USE_FID]	= BIT(21),
+-	[STATIC_MAC_TABLE_FID]		= GENMASK(29, 26),
++	[STATIC_MAC_TABLE_FID]		= GENMASK(25, 22),
+ 	[STATIC_MAC_TABLE_OVERRIDE]	= BIT(20),
+ 	[STATIC_MAC_TABLE_FWD_PORTS]	= GENMASK(18, 16),
+-	[DYNAMIC_MAC_TABLE_ENTRIES_H]	= GENMASK(5, 0),
+-	[DYNAMIC_MAC_TABLE_MAC_EMPTY]	= BIT(7),
++	[DYNAMIC_MAC_TABLE_ENTRIES_H]	= GENMASK(1, 0),
++	[DYNAMIC_MAC_TABLE_MAC_EMPTY]	= BIT(2),
+ 	[DYNAMIC_MAC_TABLE_NOT_READY]	= BIT(7),
+-	[DYNAMIC_MAC_TABLE_ENTRIES]	= GENMASK(31, 28),
++	[DYNAMIC_MAC_TABLE_ENTRIES]	= GENMASK(31, 24),
+ 	[DYNAMIC_MAC_TABLE_FID]		= GENMASK(19, 16),
+ 	[DYNAMIC_MAC_TABLE_SRC_PORT]	= GENMASK(21, 20),
+ 	[DYNAMIC_MAC_TABLE_TIMESTAMP]	= GENMASK(23, 22),
+@@ -411,10 +411,10 @@ static u8 ksz8863_shifts[] = {
+ 	[VLAN_TABLE_MEMBERSHIP_S]	= 16,
+ 	[STATIC_MAC_FWD_PORTS]		= 16,
+ 	[STATIC_MAC_FID]		= 22,
+-	[DYNAMIC_MAC_ENTRIES_H]		= 3,
++	[DYNAMIC_MAC_ENTRIES_H]		= 8,
+ 	[DYNAMIC_MAC_ENTRIES]		= 24,
+ 	[DYNAMIC_MAC_FID]		= 16,
+-	[DYNAMIC_MAC_TIMESTAMP]		= 24,
++	[DYNAMIC_MAC_TIMESTAMP]		= 22,
+ 	[DYNAMIC_MAC_SRC_PORT]		= 20,
+ };
+ 
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 89829e0ca8e8f..8211a4d373e81 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -3354,9 +3354,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
+ 	 * If this is the upstream port for this switch, enable
+ 	 * forwarding of unknown unicasts and multicasts.
+ 	 */
+-	reg = MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP |
+-		MV88E6185_PORT_CTL0_USE_TAG | MV88E6185_PORT_CTL0_USE_IP |
++	reg = MV88E6185_PORT_CTL0_USE_TAG | MV88E6185_PORT_CTL0_USE_IP |
+ 		MV88E6XXX_PORT_CTL0_STATE_FORWARDING;
++	/* Forward any IPv4 IGMP or IPv6 MLD frames received
++	 * by a USER port to the CPU port to allow snooping.
++	 */
++	if (dsa_is_user_port(ds, port))
++		reg |= MV88E6XXX_PORT_CTL0_IGMP_MLD_SNOOP;
++
+ 	err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL0, reg);
+ 	if (err)
+ 		return err;
+diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c
+index 3e54fac5f9027..5a8fe707ca25e 100644
+--- a/drivers/net/dsa/realtek/realtek-mdio.c
++++ b/drivers/net/dsa/realtek/realtek-mdio.c
+@@ -21,6 +21,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/of_device.h>
++#include <linux/overflow.h>
+ #include <linux/regmap.h>
+ 
+ #include "realtek.h"
+@@ -152,7 +153,9 @@ static int realtek_mdio_probe(struct mdio_device *mdiodev)
+ 	if (!var)
+ 		return -EINVAL;
+ 
+-	priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL);
++	priv = devm_kzalloc(&mdiodev->dev,
++			    size_add(sizeof(*priv), var->chip_data_sz),
++			    GFP_KERNEL);
+ 	if (!priv)
+ 		return -ENOMEM;
+ 
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+index 16c490692f422..12083b9679b54 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+@@ -672,6 +672,18 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+ 	return 0;
+ }
+ 
++static struct sk_buff *
++bnx2x_build_skb(const struct bnx2x_fastpath *fp, void *data)
++{
++	struct sk_buff *skb;
++
++	if (fp->rx_frag_size)
++		skb = build_skb(data, fp->rx_frag_size);
++	else
++		skb = slab_build_skb(data);
++	return skb;
++}
++
+ static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data)
+ {
+ 	if (fp->rx_frag_size)
+@@ -779,7 +791,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+ 	dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
+ 			 fp->rx_buf_size, DMA_FROM_DEVICE);
+ 	if (likely(new_data))
+-		skb = build_skb(data, fp->rx_frag_size);
++		skb = bnx2x_build_skb(fp, data);
+ 
+ 	if (likely(skb)) {
+ #ifdef BNX2X_STOP_ON_ERROR
+@@ -1046,7 +1058,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
+ 						 dma_unmap_addr(rx_buf, mapping),
+ 						 fp->rx_buf_size,
+ 						 DMA_FROM_DEVICE);
+-				skb = build_skb(data, fp->rx_frag_size);
++				skb = bnx2x_build_skb(fp, data);
+ 				if (unlikely(!skb)) {
+ 					bnx2x_frag_free(fp, data);
+ 					bnx2x_fp_qstats(bp, fp)->
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index b44b2ec5e61a2..015b5848b9583 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -175,12 +175,12 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
+ 	{ PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 },
+ 	{ PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 },
+ 	{ PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 },
+-	{ PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR },
++	{ PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57502_NPAR },
+ 	{ PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR },
+-	{ PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR },
+-	{ PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR },
++	{ PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57508_NPAR },
++	{ PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57502_NPAR },
+ 	{ PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR },
+-	{ PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR },
++	{ PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57508_NPAR },
+ 	{ PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
+ 	{ PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
+ #ifdef CONFIG_BNXT_SRIOV
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+index 56355e64815e2..3056e5bb7d6fa 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -1225,6 +1225,7 @@ struct bnxt_link_info {
+ #define BNXT_LINK_SPEED_40GB	PORT_PHY_QCFG_RESP_LINK_SPEED_40GB
+ #define BNXT_LINK_SPEED_50GB	PORT_PHY_QCFG_RESP_LINK_SPEED_50GB
+ #define BNXT_LINK_SPEED_100GB	PORT_PHY_QCFG_RESP_LINK_SPEED_100GB
++#define BNXT_LINK_SPEED_200GB	PORT_PHY_QCFG_RESP_LINK_SPEED_200GB
+ 	u16			support_speeds;
+ 	u16			support_pam4_speeds;
+ 	u16			auto_link_speeds;	/* fw adv setting */
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+index ec573127b7076..6bd18eb5137f4 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+@@ -1714,6 +1714,8 @@ u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
+ 		return SPEED_50000;
+ 	case BNXT_LINK_SPEED_100GB:
+ 		return SPEED_100000;
++	case BNXT_LINK_SPEED_200GB:
++		return SPEED_200000;
+ 	default:
+ 		return SPEED_UNKNOWN;
+ 	}
+@@ -3738,6 +3740,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
+ 		bnxt_ulp_stop(bp);
+ 		rc = bnxt_close_nic(bp, true, false);
+ 		if (rc) {
++			etest->flags |= ETH_TEST_FL_FAILED;
+ 			bnxt_ulp_start(bp, rc);
+ 			return;
+ 		}
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
+index ef4d3762bf371..ca229b0efeb65 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c
+@@ -44,7 +44,7 @@ static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw,
+ 	return 0;
+ }
+ 
+-struct i40e_diag_reg_test_info i40e_reg_list[] = {
++const struct i40e_diag_reg_test_info i40e_reg_list[] = {
+ 	/* offset               mask         elements   stride */
+ 	{I40E_QTX_CTL(0),       0x0000FFBF, 1,
+ 		I40E_QTX_CTL(1) - I40E_QTX_CTL(0)},
+@@ -78,27 +78,28 @@ i40e_status i40e_diag_reg_test(struct i40e_hw *hw)
+ {
+ 	i40e_status ret_code = 0;
+ 	u32 reg, mask;
++	u32 elements;
+ 	u32 i, j;
+ 
+ 	for (i = 0; i40e_reg_list[i].offset != 0 &&
+ 					     !ret_code; i++) {
+ 
++		elements = i40e_reg_list[i].elements;
+ 		/* set actual reg range for dynamically allocated resources */
+ 		if (i40e_reg_list[i].offset == I40E_QTX_CTL(0) &&
+ 		    hw->func_caps.num_tx_qp != 0)
+-			i40e_reg_list[i].elements = hw->func_caps.num_tx_qp;
++			elements = hw->func_caps.num_tx_qp;
+ 		if ((i40e_reg_list[i].offset == I40E_PFINT_ITRN(0, 0) ||
+ 		     i40e_reg_list[i].offset == I40E_PFINT_ITRN(1, 0) ||
+ 		     i40e_reg_list[i].offset == I40E_PFINT_ITRN(2, 0) ||
+ 		     i40e_reg_list[i].offset == I40E_QINT_TQCTL(0) ||
+ 		     i40e_reg_list[i].offset == I40E_QINT_RQCTL(0)) &&
+ 		    hw->func_caps.num_msix_vectors != 0)
+-			i40e_reg_list[i].elements =
+-				hw->func_caps.num_msix_vectors - 1;
++			elements = hw->func_caps.num_msix_vectors - 1;
+ 
+ 		/* test register access */
+ 		mask = i40e_reg_list[i].mask;
+-		for (j = 0; j < i40e_reg_list[i].elements && !ret_code; j++) {
++		for (j = 0; j < elements && !ret_code; j++) {
+ 			reg = i40e_reg_list[i].offset +
+ 			      (j * i40e_reg_list[i].stride);
+ 			ret_code = i40e_diag_reg_pattern_test(hw, reg, mask);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
+index c3340f320a18c..1db7c6d572311 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
+@@ -20,7 +20,7 @@ struct i40e_diag_reg_test_info {
+ 	u32 stride;	/* bytes between each element */
+ };
+ 
+-extern struct i40e_diag_reg_test_info i40e_reg_list[];
++extern const struct i40e_diag_reg_test_info i40e_reg_list[];
+ 
+ i40e_status i40e_diag_reg_test(struct i40e_hw *hw);
+ i40e_status i40e_diag_eeprom_test(struct i40e_hw *hw);
+diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
+index 6d08b397df2ad..8f0ea411dfba0 100644
+--- a/drivers/net/ethernet/intel/ice/ice_sched.c
++++ b/drivers/net/ethernet/intel/ice/ice_sched.c
+@@ -2787,7 +2787,7 @@ static int
+ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
+ 			   u16 vsi_handle, unsigned long *tc_bitmap)
+ {
+-	struct ice_sched_agg_vsi_info *agg_vsi_info, *old_agg_vsi_info = NULL;
++	struct ice_sched_agg_vsi_info *agg_vsi_info, *iter, *old_agg_vsi_info = NULL;
+ 	struct ice_sched_agg_info *agg_info, *old_agg_info;
+ 	struct ice_hw *hw = pi->hw;
+ 	int status = 0;
+@@ -2805,11 +2805,13 @@ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
+ 	if (old_agg_info && old_agg_info != agg_info) {
+ 		struct ice_sched_agg_vsi_info *vtmp;
+ 
+-		list_for_each_entry_safe(old_agg_vsi_info, vtmp,
++		list_for_each_entry_safe(iter, vtmp,
+ 					 &old_agg_info->agg_vsi_list,
+ 					 list_entry)
+-			if (old_agg_vsi_info->vsi_handle == vsi_handle)
++			if (iter->vsi_handle == vsi_handle) {
++				old_agg_vsi_info = iter;
+ 				break;
++			}
+ 	}
+ 
+ 	/* check if entry already exist */
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index 61f844d225123..46b36851af460 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -1780,18 +1780,36 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+ int
+ ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable)
+ {
+-	struct ice_vsi_ctx *ctx;
++	struct ice_vsi_ctx *ctx, *cached_ctx;
++	int status;
++
++	cached_ctx = ice_get_vsi_ctx(hw, vsi_handle);
++	if (!cached_ctx)
++		return -ENOENT;
+ 
+-	ctx = ice_get_vsi_ctx(hw, vsi_handle);
++	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ 	if (!ctx)
+-		return -EIO;
++		return -ENOMEM;
++
++	ctx->info.q_opt_rss = cached_ctx->info.q_opt_rss;
++	ctx->info.q_opt_tc = cached_ctx->info.q_opt_tc;
++	ctx->info.q_opt_flags = cached_ctx->info.q_opt_flags;
++
++	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+ 
+ 	if (enable)
+ 		ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+ 	else
+ 		ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+ 
+-	return ice_update_vsi(hw, vsi_handle, ctx, NULL);
++	status = ice_update_vsi(hw, vsi_handle, ctx, NULL);
++	if (!status) {
++		cached_ctx->info.q_opt_flags = ctx->info.q_opt_flags;
++		cached_ctx->info.valid_sections |= ctx->info.valid_sections;
++	}
++
++	kfree(ctx);
++	return status;
+ }
+ 
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+index c6a58343d81d8..a2645ff3100e4 100644
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+@@ -541,6 +541,72 @@ static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf)
+ 	}
+ }
+ 
++/**
++ * ice_vc_fdir_has_prof_conflict
++ * @vf: pointer to the VF structure
++ * @conf: FDIR configuration for each filter
++ *
++ * Check if @conf has conflicting profile with existing profiles
++ *
++ * Return: true on success, and false on error.
++ */
++static bool
++ice_vc_fdir_has_prof_conflict(struct ice_vf *vf,
++			      struct virtchnl_fdir_fltr_conf *conf)
++{
++	struct ice_fdir_fltr *desc;
++
++	list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) {
++		struct virtchnl_fdir_fltr_conf *existing_conf;
++		enum ice_fltr_ptype flow_type_a, flow_type_b;
++		struct ice_fdir_fltr *a, *b;
++
++		existing_conf = to_fltr_conf_from_desc(desc);
++		a = &existing_conf->input;
++		b = &conf->input;
++		flow_type_a = a->flow_type;
++		flow_type_b = b->flow_type;
++
++		/* No need to compare two rules with different tunnel types or
++		 * with the same protocol type.
++		 */
++		if (existing_conf->ttype != conf->ttype ||
++		    flow_type_a == flow_type_b)
++			continue;
++
++		switch (flow_type_a) {
++		case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
++		case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
++		case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
++			if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
++				return true;
++			break;
++		case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
++			if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
++			    flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
++			    flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_SCTP)
++				return true;
++			break;
++		case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
++		case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
++		case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
++			if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_OTHER)
++				return true;
++			break;
++		case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
++			if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_UDP ||
++			    flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
++			    flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_SCTP)
++				return true;
++			break;
++		default:
++			break;
++		}
++	}
++
++	return false;
++}
++
+ /**
+  * ice_vc_fdir_write_flow_prof
+  * @vf: pointer to the VF structure
+@@ -677,6 +743,13 @@ ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+ 	enum ice_fltr_ptype flow;
+ 	int ret;
+ 
++	ret = ice_vc_fdir_has_prof_conflict(vf, conf);
++	if (ret) {
++		dev_dbg(dev, "Found flow profile conflict for VF %d\n",
++			vf->vf_id);
++		return ret;
++	}
++
+ 	flow = input->flow_type;
+ 	ret = ice_vc_fdir_alloc_prof(vf, flow);
+ 	if (ret) {
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+index 41d935d1aaf6f..40aeaa7bd739f 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+@@ -62,35 +62,38 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+ 		       MVPP22_CLS_HEK_IP4_2T,
+ 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+-		       MVPP2_PRS_RI_L4_TCP,
++		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+ 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+ 
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+ 		       MVPP22_CLS_HEK_IP4_2T,
+ 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+-		       MVPP2_PRS_RI_L4_TCP,
++		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+ 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+ 
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+ 		       MVPP22_CLS_HEK_IP4_2T,
+ 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+-		       MVPP2_PRS_RI_L4_TCP,
++		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+ 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+ 
+ 	/* TCP over IPv4 flows, fragmented, with vlan tag */
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+ 		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+-		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
++		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_IP_FRAG_TRUE |
++			   MVPP2_PRS_RI_L4_TCP,
+ 		       MVPP2_PRS_IP_MASK),
+ 
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+ 		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+-		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
++		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_IP_FRAG_TRUE |
++			   MVPP2_PRS_RI_L4_TCP,
+ 		       MVPP2_PRS_IP_MASK),
+ 
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG,
+ 		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+-		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
++		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_IP_FRAG_TRUE |
++			   MVPP2_PRS_RI_L4_TCP,
+ 		       MVPP2_PRS_IP_MASK),
+ 
+ 	/* UDP over IPv4 flows, Not fragmented, no vlan tag */
+@@ -132,35 +135,38 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = {
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+ 		       MVPP22_CLS_HEK_IP4_2T,
+ 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+-		       MVPP2_PRS_RI_L4_UDP,
++		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+ 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+ 
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+ 		       MVPP22_CLS_HEK_IP4_2T,
+ 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+-		       MVPP2_PRS_RI_L4_UDP,
++		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+ 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+ 
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+ 		       MVPP22_CLS_HEK_IP4_2T,
+ 		       MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+-		       MVPP2_PRS_RI_L4_UDP,
++		       MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+ 		       MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+ 
+ 	/* UDP over IPv4 flows, fragmented, with vlan tag */
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+ 		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+-		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
++		       MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_IP_FRAG_TRUE |
++			   MVPP2_PRS_RI_L4_UDP,
+ 		       MVPP2_PRS_IP_MASK),
+ 
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+ 		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+-		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
++		       MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_IP_FRAG_TRUE |
++			   MVPP2_PRS_RI_L4_UDP,
+ 		       MVPP2_PRS_IP_MASK),
+ 
+ 	MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG,
+ 		       MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED,
+-		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
++		       MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_IP_FRAG_TRUE |
++			   MVPP2_PRS_RI_L4_UDP,
+ 		       MVPP2_PRS_IP_MASK),
+ 
+ 	/* TCP over IPv6 flows, not fragmented, no vlan tag */
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+index 75ba57bd1d46d..9af22f497a40f 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+@@ -1539,8 +1539,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
+ 	if (!priv->prs_double_vlans)
+ 		return -ENOMEM;
+ 
+-	/* Double VLAN: 0x8100, 0x88A8 */
+-	err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021Q, ETH_P_8021AD,
++	/* Double VLAN: 0x88A8, 0x8100 */
++	err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021AD, ETH_P_8021Q,
+ 					MVPP2_PRS_PORT_MASK);
+ 	if (err)
+ 		return err;
+@@ -1607,59 +1607,45 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
+ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv)
+ {
+ 	struct mvpp2_prs_entry pe;
+-	int tid;
+-
+-	/* IPv4 over PPPoE with options */
+-	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+-					MVPP2_PE_LAST_FREE_TID);
+-	if (tid < 0)
+-		return tid;
+-
+-	memset(&pe, 0, sizeof(pe));
+-	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE);
+-	pe.index = tid;
+-
+-	mvpp2_prs_match_etype(&pe, 0, PPP_IP);
+-
+-	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
+-	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT,
+-				 MVPP2_PRS_RI_L3_PROTO_MASK);
+-	/* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */
+-	mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN +
+-				 sizeof(struct iphdr) - 4,
+-				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+-	/* Set L3 offset */
+-	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
+-				  MVPP2_ETH_TYPE_LEN,
+-				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+-
+-	/* Update shadow table and hw entry */
+-	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
+-	mvpp2_prs_hw_write(priv, &pe);
++	int tid, ihl;
+ 
+-	/* IPv4 over PPPoE without options */
+-	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+-					MVPP2_PE_LAST_FREE_TID);
+-	if (tid < 0)
+-		return tid;
++	/* IPv4 over PPPoE with header length >= 5 */
++	for (ihl = MVPP2_PRS_IPV4_IHL_MIN; ihl <= MVPP2_PRS_IPV4_IHL_MAX; ihl++) {
++		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
++						MVPP2_PE_LAST_FREE_TID);
++		if (tid < 0)
++			return tid;
+ 
+-	pe.index = tid;
++		memset(&pe, 0, sizeof(pe));
++		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_PPPOE);
++		pe.index = tid;
+ 
+-	mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
+-				     MVPP2_PRS_IPV4_HEAD |
+-				     MVPP2_PRS_IPV4_IHL_MIN,
+-				     MVPP2_PRS_IPV4_HEAD_MASK |
+-				     MVPP2_PRS_IPV4_IHL_MASK);
++		mvpp2_prs_match_etype(&pe, 0, PPP_IP);
++		mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
++					     MVPP2_PRS_IPV4_HEAD | ihl,
++					     MVPP2_PRS_IPV4_HEAD_MASK |
++					     MVPP2_PRS_IPV4_IHL_MASK);
+ 
+-	/* Clear ri before updating */
+-	pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
+-	pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
+-	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
+-				 MVPP2_PRS_RI_L3_PROTO_MASK);
++		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
++		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
++					 MVPP2_PRS_RI_L3_PROTO_MASK);
++		/* goto ipv4 dst-address (skip eth_type + IP-header-size - 4) */
++		mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN +
++					 sizeof(struct iphdr) - 4,
++					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
++		/* Set L3 offset */
++		mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
++					  MVPP2_ETH_TYPE_LEN,
++					  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
++		/* Set L4 offset */
++		mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
++					  MVPP2_ETH_TYPE_LEN + (ihl * 4),
++					  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
+ 
+-	/* Update shadow table and hw entry */
+-	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
+-	mvpp2_prs_hw_write(priv, &pe);
++		/* Update shadow table and hw entry */
++		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_PPPOE);
++		mvpp2_prs_hw_write(priv, &pe);
++	}
+ 
+ 	/* IPv6 over PPPoE */
+ 	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 332329cb1ee00..bd7c18c839d42 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -713,8 +713,6 @@ static void mtk_mac_link_up(struct phylink_config *config,
+ 		break;
+ 	}
+ 
+-	mtk_set_queue_speed(mac->hw, mac->id, speed);
+-
+ 	/* Configure duplex */
+ 	if (duplex == DUPLEX_FULL)
+ 		mcr |= MAC_MCR_FORCE_DPX;
+@@ -2008,9 +2006,6 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ 			skb_checksum_none_assert(skb);
+ 		skb->protocol = eth_type_trans(skb, netdev);
+ 
+-		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
+-			mtk_ppe_check_skb(eth->ppe[0], skb, hash);
+-
+ 		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+ 			if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ 				if (trxd.rxd3 & RX_DMA_VTAG_V2) {
+@@ -2038,6 +2033,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ 			__vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan_tci);
+ 		}
+ 
++		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
++			mtk_ppe_check_skb(eth->ppe[0], skb, hash);
++
+ 		skb_record_rx_queue(skb, 0);
+ 		napi_gro_receive(napi, skb);
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
+index 1ff024f42444b..2ea539ccc0802 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -8,6 +8,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/if_ether.h>
+ #include <linux/if_vlan.h>
++#include <net/dst_metadata.h>
+ #include <net/dsa.h>
+ #include "mtk_eth_soc.h"
+ #include "mtk_ppe.h"
+@@ -458,6 +459,7 @@ __mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+ 		hwe->ib1 &= ~MTK_FOE_IB1_STATE;
+ 		hwe->ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_INVALID);
+ 		dma_wmb();
++		mtk_ppe_cache_clear(ppe);
+ 	}
+ 	entry->hash = 0xffff;
+ 
+@@ -699,7 +701,9 @@ void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
+ 		    skb->dev->dsa_ptr->tag_ops->proto != DSA_TAG_PROTO_MTK)
+ 			goto out;
+ 
+-		tag += 4;
++		if (!skb_metadata_dst(skb))
++			tag += 4;
++
+ 		if (get_unaligned_be16(tag) != ETH_P_8021Q)
+ 			break;
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+index 81afd5ee3fbf1..161751bb36c9c 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -576,6 +576,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+ 		if (IS_ERR(block_cb))
+ 			return PTR_ERR(block_cb);
+ 
++		flow_block_cb_incref(block_cb);
+ 		flow_block_cb_add(block_cb, f);
+ 		list_add_tail(&block_cb->driver_list, &block_cb_list);
+ 		return 0;
+@@ -584,7 +585,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+ 		if (!block_cb)
+ 			return -ENOENT;
+ 
+-		if (flow_block_cb_decref(block_cb)) {
++		if (!flow_block_cb_decref(block_cb)) {
+ 			flow_block_cb_remove(block_cb, f);
+ 			list_del(&block_cb->driver_list);
+ 		}
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 47d4b54d15634..1f4233b2842f7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -4117,13 +4117,17 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
+ 		struct xsk_buff_pool *xsk_pool =
+ 			mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
+ 		struct mlx5e_xsk_param xsk;
++		int max_xdp_mtu;
+ 
+ 		if (!xsk_pool)
+ 			continue;
+ 
+ 		mlx5e_build_xsk_param(xsk_pool, &xsk);
++		max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk);
+ 
+-		if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
++		/* Validate XSK params and XDP MTU in advance */
++		if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) ||
++		    new_params->sw_mtu > max_xdp_mtu) {
+ 			u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
+ 			int max_mtu_frame, max_mtu_page, max_mtu;
+ 
+@@ -4133,9 +4137,9 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
+ 			 */
+ 			max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
+ 			max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0));
+-			max_mtu = min(max_mtu_frame, max_mtu_page);
++			max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu);
+ 
+-			netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n",
++			netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n",
+ 				   new_params->sw_mtu, ix, max_mtu);
+ 			return false;
+ 		}
+diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
+index 930496cd34ed0..b50f16786c246 100644
+--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
++++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
+@@ -826,6 +826,9 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp,
+ 	/* disable phy pfm mode */
+ 	phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
+ 
++	/* disable 10m pll off */
++	phy_modify_paged(phydev, 0x0a43, 0x10, BIT(0), 0);
++
+ 	rtl8168g_disable_aldps(phydev);
+ 	rtl8168g_config_eee_phy(phydev);
+ }
+diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
+index 7022fb2005a2f..d30459dbfe8f8 100644
+--- a/drivers/net/ethernet/sfc/ef10.c
++++ b/drivers/net/ethernet/sfc/ef10.c
+@@ -1304,7 +1304,8 @@ static void efx_ef10_fini_nic(struct efx_nic *efx)
+ static int efx_ef10_init_nic(struct efx_nic *efx)
+ {
+ 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+-	netdev_features_t hw_enc_features = 0;
++	struct net_device *net_dev = efx->net_dev;
++	netdev_features_t tun_feats, tso_feats;
+ 	int rc;
+ 
+ 	if (nic_data->must_check_datapath_caps) {
+@@ -1349,20 +1350,30 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
+ 		nic_data->must_restore_piobufs = false;
+ 	}
+ 
+-	/* add encapsulated checksum offload features */
++	/* encap features might change during reset if fw variant changed */
+ 	if (efx_has_cap(efx, VXLAN_NVGRE) && !efx_ef10_is_vf(efx))
+-		hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+-	/* add encapsulated TSO features */
+-	if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) {
+-		netdev_features_t encap_tso_features;
++		net_dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
++	else
++		net_dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+ 
+-		encap_tso_features = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
+-			NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM;
++	tun_feats = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
++		    NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM;
++	tso_feats = NETIF_F_TSO | NETIF_F_TSO6;
+ 
+-		hw_enc_features |= encap_tso_features | NETIF_F_TSO;
+-		efx->net_dev->features |= encap_tso_features;
++	if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) {
++		/* If this is first nic_init, or if it is a reset and a new fw
++		 * variant has added new features, enable them by default.
++		 * If the features are not new, maintain their current value.
++		 */
++		if (!(net_dev->hw_features & tun_feats))
++			net_dev->features |= tun_feats;
++		net_dev->hw_enc_features |= tun_feats | tso_feats;
++		net_dev->hw_features |= tun_feats;
++	} else {
++		net_dev->hw_enc_features &= ~(tun_feats | tso_feats);
++		net_dev->hw_features &= ~tun_feats;
++		net_dev->features &= ~tun_feats;
+ 	}
+-	efx->net_dev->hw_enc_features = hw_enc_features;
+ 
+ 	/* don't fail init if RSS setup doesn't work */
+ 	rc = efx->type->rx_push_rss_config(efx, false,
+@@ -4021,7 +4032,10 @@ static unsigned int efx_ef10_recycle_ring_size(const struct efx_nic *efx)
+ 	 NETIF_F_HW_VLAN_CTAG_FILTER |	\
+ 	 NETIF_F_IPV6_CSUM |		\
+ 	 NETIF_F_RXHASH |		\
+-	 NETIF_F_NTUPLE)
++	 NETIF_F_NTUPLE |		\
++	 NETIF_F_SG |			\
++	 NETIF_F_RXCSUM |		\
++	 NETIF_F_RXALL)
+ 
+ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
+ 	.is_vf = true,
+diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
+index 3a86f1213a051..6a1bff54bc6c3 100644
+--- a/drivers/net/ethernet/sfc/efx.c
++++ b/drivers/net/ethernet/sfc/efx.c
+@@ -1001,21 +1001,18 @@ static int efx_pci_probe_post_io(struct efx_nic *efx)
+ 	}
+ 
+ 	/* Determine netdevice features */
+-	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
+-			      NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
+-	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) {
+-		net_dev->features |= NETIF_F_TSO6;
+-		if (efx_has_cap(efx, TX_TSO_V2_ENCAP))
+-			net_dev->hw_enc_features |= NETIF_F_TSO6;
+-	}
+-	/* Check whether device supports TSO */
+-	if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
+-		net_dev->features &= ~NETIF_F_ALL_TSO;
++	net_dev->features |= efx->type->offload_features;
++
++	/* Add TSO features */
++	if (efx->type->tso_versions && efx->type->tso_versions(efx))
++		net_dev->features |= NETIF_F_TSO | NETIF_F_TSO6;
++
+ 	/* Mask for features that also apply to VLAN devices */
+ 	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
+ 				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
+ 				   NETIF_F_RXCSUM);
+ 
++	/* Determine user configurable features */
+ 	net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
+ 
+ 	/* Disable receiving frames with bad FCS, by default. */
+diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
+index a2e511912e6a9..a690d139e1770 100644
+--- a/drivers/net/ethernet/smsc/smsc911x.c
++++ b/drivers/net/ethernet/smsc/smsc911x.c
+@@ -1037,8 +1037,6 @@ static int smsc911x_mii_probe(struct net_device *dev)
+ 		return ret;
+ 	}
+ 
+-	/* Indicate that the MAC is responsible for managing PHY PM */
+-	phydev->mac_managed_pm = true;
+ 	phy_attached_info(phydev);
+ 
+ 	phy_set_max_speed(phydev, SPEED_100);
+@@ -1066,6 +1064,7 @@ static int smsc911x_mii_init(struct platform_device *pdev,
+ 			     struct net_device *dev)
+ {
+ 	struct smsc911x_data *pdata = netdev_priv(dev);
++	struct phy_device *phydev;
+ 	int err = -ENXIO;
+ 
+ 	pdata->mii_bus = mdiobus_alloc();
+@@ -1108,6 +1107,10 @@ static int smsc911x_mii_init(struct platform_device *pdev,
+ 		goto err_out_free_bus_2;
+ 	}
+ 
++	phydev = phy_find_first(pdata->mii_bus);
++	if (phydev)
++		phydev->mac_managed_pm = true;
++
+ 	return 0;
+ 
+ err_out_free_bus_2:
+diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
+index ec9c130276d89..54bb072aeb2d3 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/common.h
++++ b/drivers/net/ethernet/stmicro/stmmac/common.h
+@@ -532,7 +532,6 @@ struct mac_device_info {
+ 	unsigned int xlgmac;
+ 	unsigned int num_vlan;
+ 	u32 vlan_filter[32];
+-	unsigned int promisc;
+ 	bool vlan_fail_q_en;
+ 	u8 vlan_fail_q;
+ };
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+index 8c7a0b7c99520..36251ec2589c9 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+@@ -472,12 +472,6 @@ static int dwmac4_add_hw_vlan_rx_fltr(struct net_device *dev,
+ 	if (vid > 4095)
+ 		return -EINVAL;
+ 
+-	if (hw->promisc) {
+-		netdev_err(dev,
+-			   "Adding VLAN in promisc mode not supported\n");
+-		return -EPERM;
+-	}
+-
+ 	/* Single Rx VLAN Filter */
+ 	if (hw->num_vlan == 1) {
+ 		/* For single VLAN filter, VID 0 means VLAN promiscuous */
+@@ -527,12 +521,6 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev,
+ {
+ 	int i, ret = 0;
+ 
+-	if (hw->promisc) {
+-		netdev_err(dev,
+-			   "Deleting VLAN in promisc mode not supported\n");
+-		return -EPERM;
+-	}
+-
+ 	/* Single Rx VLAN Filter */
+ 	if (hw->num_vlan == 1) {
+ 		if ((hw->vlan_filter[0] & GMAC_VLAN_TAG_VID) == vid) {
+@@ -557,39 +545,6 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev,
+ 	return ret;
+ }
+ 
+-static void dwmac4_vlan_promisc_enable(struct net_device *dev,
+-				       struct mac_device_info *hw)
+-{
+-	void __iomem *ioaddr = hw->pcsr;
+-	u32 value;
+-	u32 hash;
+-	u32 val;
+-	int i;
+-
+-	/* Single Rx VLAN Filter */
+-	if (hw->num_vlan == 1) {
+-		dwmac4_write_single_vlan(dev, 0);
+-		return;
+-	}
+-
+-	/* Extended Rx VLAN Filter Enable */
+-	for (i = 0; i < hw->num_vlan; i++) {
+-		if (hw->vlan_filter[i] & GMAC_VLAN_TAG_DATA_VEN) {
+-			val = hw->vlan_filter[i] & ~GMAC_VLAN_TAG_DATA_VEN;
+-			dwmac4_write_vlan_filter(dev, hw, i, val);
+-		}
+-	}
+-
+-	hash = readl(ioaddr + GMAC_VLAN_HASH_TABLE);
+-	if (hash & GMAC_VLAN_VLHT) {
+-		value = readl(ioaddr + GMAC_VLAN_TAG);
+-		if (value & GMAC_VLAN_VTHM) {
+-			value &= ~GMAC_VLAN_VTHM;
+-			writel(value, ioaddr + GMAC_VLAN_TAG);
+-		}
+-	}
+-}
+-
+ static void dwmac4_restore_hw_vlan_rx_fltr(struct net_device *dev,
+ 					   struct mac_device_info *hw)
+ {
+@@ -709,22 +664,12 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
+ 	}
+ 
+ 	/* VLAN filtering */
+-	if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
++	if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en)
++		value &= ~GMAC_PACKET_FILTER_VTFE;
++	else if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ 		value |= GMAC_PACKET_FILTER_VTFE;
+ 
+ 	writel(value, ioaddr + GMAC_PACKET_FILTER);
+-
+-	if (dev->flags & IFF_PROMISC && !hw->vlan_fail_q_en) {
+-		if (!hw->promisc) {
+-			hw->promisc = 1;
+-			dwmac4_vlan_promisc_enable(dev, hw);
+-		}
+-	} else {
+-		if (hw->promisc) {
+-			hw->promisc = 0;
+-			dwmac4_restore_hw_vlan_rx_fltr(dev, hw);
+-		}
+-	}
+ }
+ 
+ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
+diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
+index 0b0c6c0764fe9..d0b5129439ed6 100644
+--- a/drivers/net/ieee802154/ca8210.c
++++ b/drivers/net/ieee802154/ca8210.c
+@@ -1902,10 +1902,9 @@ static int ca8210_skb_tx(
+ 	struct ca8210_priv  *priv
+ )
+ {
+-	int status;
+ 	struct ieee802154_hdr header = { };
+ 	struct secspec secspec;
+-	unsigned int mac_len;
++	int mac_len, status;
+ 
+ 	dev_dbg(&priv->spi->dev, "%s called\n", __func__);
+ 
+diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
+index 0f52c068c46d6..ee6fb00b71eb6 100644
+--- a/drivers/net/ipa/gsi_trans.c
++++ b/drivers/net/ipa/gsi_trans.c
+@@ -156,7 +156,7 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
+ 	 * gsi_trans_pool_exit_dma() can assume the total allocated
+ 	 * size is exactly (count * size).
+ 	 */
+-	total_size = get_order(total_size) << PAGE_SHIFT;
++	total_size = PAGE_SIZE << get_order(total_size);
+ 
+ 	virt = dma_alloc_coherent(dev, total_size, &addr, GFP_KERNEL);
+ 	if (!virt)
+diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
+index 7a28e082436e4..d0c916a53d7ce 100644
+--- a/drivers/net/net_failover.c
++++ b/drivers/net/net_failover.c
+@@ -130,14 +130,10 @@ static u16 net_failover_select_queue(struct net_device *dev,
+ 			txq = ops->ndo_select_queue(primary_dev, skb, sb_dev);
+ 		else
+ 			txq = netdev_pick_tx(primary_dev, skb, NULL);
+-
+-		qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+-
+-		return txq;
++	} else {
++		txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+ 	}
+ 
+-	txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+-
+ 	/* Save the original txq to restore before passing to the driver */
+ 	qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+ 
+diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
+index b4ff9c5073a3c..9ab5eff502b71 100644
+--- a/drivers/net/phy/dp83869.c
++++ b/drivers/net/phy/dp83869.c
+@@ -588,15 +588,13 @@ static int dp83869_of_init(struct phy_device *phydev)
+ 						       &dp83869_internal_delay[0],
+ 						       delay_size, true);
+ 	if (dp83869->rx_int_delay < 0)
+-		dp83869->rx_int_delay =
+-				dp83869_internal_delay[DP83869_CLK_DELAY_DEF];
++		dp83869->rx_int_delay = DP83869_CLK_DELAY_DEF;
+ 
+ 	dp83869->tx_int_delay = phy_get_internal_delay(phydev, dev,
+ 						       &dp83869_internal_delay[0],
+ 						       delay_size, false);
+ 	if (dp83869->tx_int_delay < 0)
+-		dp83869->tx_int_delay =
+-				dp83869_internal_delay[DP83869_CLK_DELAY_DEF];
++		dp83869->tx_int_delay = DP83869_CLK_DELAY_DEF;
+ 
+ 	return ret;
+ }
+diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
+index 682987040ea82..da488cbb05428 100644
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -1688,7 +1688,9 @@ not_lro:
+ 			if (unlikely(rcd->ts))
+ 				__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
+ 
+-			if (adapter->netdev->features & NETIF_F_LRO)
++			/* Use GRO callback if UPT is enabled */
++			if ((adapter->netdev->features & NETIF_F_LRO) &&
++			    !rq->shared->updateRxProd)
+ 				netif_receive_skb(skb);
+ 			else
+ 				napi_gro_receive(&rq->napi, skb);
+diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c
+index 1e6a479766429..c066b0040a3fe 100644
+--- a/drivers/net/wwan/iosm/iosm_ipc_imem.c
++++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c
+@@ -587,6 +587,13 @@ static void ipc_imem_run_state_worker(struct work_struct *instance)
+ 	while (ctrl_chl_idx < IPC_MEM_MAX_CHANNELS) {
+ 		if (!ipc_chnl_cfg_get(&chnl_cfg_port, ctrl_chl_idx)) {
+ 			ipc_imem->ipc_port[ctrl_chl_idx] = NULL;
++
++			if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7560_ID &&
++			    chnl_cfg_port.wwan_port_type == WWAN_PORT_XMMRPC) {
++				ctrl_chl_idx++;
++				continue;
++			}
++
+ 			if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7360_ID &&
+ 			    chnl_cfg_port.wwan_port_type == WWAN_PORT_MBIM) {
+ 				ctrl_chl_idx++;
+diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
+index 3dbfc8a6924ed..1fcbd83f7ff2e 100644
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -166,7 +166,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
+ 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+ 	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+ 
+-	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
++	struct gnttab_copy tx_copy_ops[2 * MAX_PENDING_REQS];
+ 	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+ 	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+ 	/* passed to gnttab_[un]map_refs with pages under (un)mapping */
+diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
+index bf627af723bf9..5c266062c08f0 100644
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -334,6 +334,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
+ struct xenvif_tx_cb {
+ 	u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1];
+ 	u8 copy_count;
++	u32 split_mask;
+ };
+ 
+ #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
+@@ -361,6 +362,8 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
+ 	struct sk_buff *skb =
+ 		alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
+ 			  GFP_ATOMIC | __GFP_NOWARN);
++
++	BUILD_BUG_ON(sizeof(*XENVIF_TX_CB(skb)) > sizeof(skb->cb));
+ 	if (unlikely(skb == NULL))
+ 		return NULL;
+ 
+@@ -396,11 +399,13 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
+ 	nr_slots = shinfo->nr_frags + 1;
+ 
+ 	copy_count(skb) = 0;
++	XENVIF_TX_CB(skb)->split_mask = 0;
+ 
+ 	/* Create copy ops for exactly data_len bytes into the skb head. */
+ 	__skb_put(skb, data_len);
+ 	while (data_len > 0) {
+ 		int amount = data_len > txp->size ? txp->size : data_len;
++		bool split = false;
+ 
+ 		cop->source.u.ref = txp->gref;
+ 		cop->source.domid = queue->vif->domid;
+@@ -413,6 +418,13 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
+ 		cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
+ 				               - data_len);
+ 
++		/* Don't cross local page boundary! */
++		if (cop->dest.offset + amount > XEN_PAGE_SIZE) {
++			amount = XEN_PAGE_SIZE - cop->dest.offset;
++			XENVIF_TX_CB(skb)->split_mask |= 1U << copy_count(skb);
++			split = true;
++		}
++
+ 		cop->len = amount;
+ 		cop->flags = GNTCOPY_source_gref;
+ 
+@@ -420,7 +432,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
+ 		pending_idx = queue->pending_ring[index];
+ 		callback_param(queue, pending_idx).ctx = NULL;
+ 		copy_pending_idx(skb, copy_count(skb)) = pending_idx;
+-		copy_count(skb)++;
++		if (!split)
++			copy_count(skb)++;
+ 
+ 		cop++;
+ 		data_len -= amount;
+@@ -441,7 +454,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
+ 			nr_slots--;
+ 		} else {
+ 			/* The copy op partially covered the tx_request.
+-			 * The remainder will be mapped.
++			 * The remainder will be mapped or copied in the next
++			 * iteration.
+ 			 */
+ 			txp->offset += amount;
+ 			txp->size -= amount;
+@@ -539,6 +553,13 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
+ 		pending_idx = copy_pending_idx(skb, i);
+ 
+ 		newerr = (*gopp_copy)->status;
++
++		/* Split copies need to be handled together. */
++		if (XENVIF_TX_CB(skb)->split_mask & (1U << i)) {
++			(*gopp_copy)++;
++			if (!newerr)
++				newerr = (*gopp_copy)->status;
++		}
+ 		if (likely(!newerr)) {
+ 			/* The first frag might still have this slot mapped */
+ 			if (i < copy_count(skb) - 1 || !sharedslot)
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 29c902b9aecbd..ea3f0806783a3 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -3126,6 +3126,7 @@ out_dev_unmap:
+ 	nvme_dev_unmap(dev);
+ out_uninit_ctrl:
+ 	nvme_uninit_ctrl(&dev->ctrl);
++	nvme_put_ctrl(&dev->ctrl);
+ 	return result;
+ }
+ 
+@@ -3490,6 +3491,8 @@ static const struct pci_device_id nvme_id_table[] = {
+ 		.driver_data = NVME_QUIRK_BOGUS_NID, },
+ 	{ PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */
+ 		.driver_data = NVME_QUIRK_BOGUS_NID, },
++	{ PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */
++		.driver_data = NVME_QUIRK_BOGUS_NID, },
+ 	{ PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
+ 		.driver_data = NVME_QUIRK_BOGUS_NID, },
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
+diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
+index 6d5d619ab2e94..346f67d2fdae2 100644
+--- a/drivers/pci/controller/dwc/pcie-designware.c
++++ b/drivers/pci/controller/dwc/pcie-designware.c
+@@ -806,11 +806,6 @@ void dw_pcie_setup(struct dw_pcie *pci)
+ 		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+ 	}
+ 
+-	val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
+-	val &= ~PORT_LINK_FAST_LINK_MODE;
+-	val |= PORT_LINK_DLL_LINK_EN;
+-	dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
+-
+ 	if (dw_pcie_cap_is(pci, CDM_CHECK)) {
+ 		val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
+ 		val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
+@@ -818,6 +813,11 @@ void dw_pcie_setup(struct dw_pcie *pci)
+ 		dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
+ 	}
+ 
++	val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
++	val &= ~PORT_LINK_FAST_LINK_MODE;
++	val |= PORT_LINK_DLL_LINK_EN;
++	dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
++
+ 	if (!pci->num_lanes) {
+ 		dev_dbg(pci->dev, "Using h/w default number of lanes\n");
+ 		return;
+diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
+index 32c3edaf90385..5e7b82a2b13d0 100644
+--- a/drivers/pinctrl/pinctrl-amd.c
++++ b/drivers/pinctrl/pinctrl-amd.c
+@@ -865,32 +865,34 @@ static const struct pinconf_ops amd_pinconf_ops = {
+ 	.pin_config_group_set = amd_pinconf_group_set,
+ };
+ 
+-static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
++static void amd_gpio_irq_init_pin(struct amd_gpio *gpio_dev, int pin)
+ {
+-	struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
++	const struct pin_desc *pd;
+ 	unsigned long flags;
+ 	u32 pin_reg, mask;
+-	int i;
+ 
+ 	mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) |
+ 		BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) |
+ 		BIT(WAKE_CNTRL_OFF_S4);
+ 
+-	for (i = 0; i < desc->npins; i++) {
+-		int pin = desc->pins[i].number;
+-		const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin);
+-
+-		if (!pd)
+-			continue;
++	pd = pin_desc_get(gpio_dev->pctrl, pin);
++	if (!pd)
++		return;
+ 
+-		raw_spin_lock_irqsave(&gpio_dev->lock, flags);
++	raw_spin_lock_irqsave(&gpio_dev->lock, flags);
++	pin_reg = readl(gpio_dev->base + pin * 4);
++	pin_reg &= ~mask;
++	writel(pin_reg, gpio_dev->base + pin * 4);
++	raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
++}
+ 
+-		pin_reg = readl(gpio_dev->base + i * 4);
+-		pin_reg &= ~mask;
+-		writel(pin_reg, gpio_dev->base + i * 4);
++static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
++{
++	struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
++	int i;
+ 
+-		raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+-	}
++	for (i = 0; i < desc->npins; i++)
++		amd_gpio_irq_init_pin(gpio_dev, i);
+ }
+ 
+ #ifdef CONFIG_PM_SLEEP
+@@ -943,8 +945,10 @@ static int amd_gpio_resume(struct device *dev)
+ 	for (i = 0; i < desc->npins; i++) {
+ 		int pin = desc->pins[i].number;
+ 
+-		if (!amd_gpio_should_save(gpio_dev, pin))
++		if (!amd_gpio_should_save(gpio_dev, pin)) {
++			amd_gpio_irq_init_pin(gpio_dev, pin);
+ 			continue;
++		}
+ 
+ 		raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+ 		gpio_dev->saved_regs[i] |= readl(gpio_dev->base + pin * 4) & PIN_IRQ_PENDING;
+diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
+index 373eed8bc4be9..c775d239444a6 100644
+--- a/drivers/pinctrl/pinctrl-at91-pio4.c
++++ b/drivers/pinctrl/pinctrl-at91-pio4.c
+@@ -1206,7 +1206,6 @@ static int atmel_pinctrl_probe(struct platform_device *pdev)
+ 		dev_err(dev, "can't add the irq domain\n");
+ 		return -ENODEV;
+ 	}
+-	atmel_pioctrl->irq_domain->name = "atmel gpio";
+ 
+ 	for (i = 0; i < atmel_pioctrl->npins; i++) {
+ 		int irq = irq_create_mapping(atmel_pioctrl->irq_domain, i);
+diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
+index 29e4a6282a641..1dcbd0937ef5a 100644
+--- a/drivers/pinctrl/pinctrl-ocelot.c
++++ b/drivers/pinctrl/pinctrl-ocelot.c
+@@ -1204,7 +1204,7 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev,
+ 	regmap_update_bits(info->map, REG_ALT(0, info, pin->pin),
+ 			   BIT(p), f << p);
+ 	regmap_update_bits(info->map, REG_ALT(1, info, pin->pin),
+-			   BIT(p), f << (p - 1));
++			   BIT(p), (f >> 1) << p);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/platform/surface/aggregator/bus.c b/drivers/platform/surface/aggregator/bus.c
+index de539938896e2..b501a79f2a08a 100644
+--- a/drivers/platform/surface/aggregator/bus.c
++++ b/drivers/platform/surface/aggregator/bus.c
+@@ -485,8 +485,10 @@ int __ssam_register_clients(struct device *parent, struct ssam_controller *ctrl,
+ 		 * device, so ignore it and continue with the next one.
+ 		 */
+ 		status = ssam_add_client_device(parent, ctrl, child);
+-		if (status && status != -ENODEV)
++		if (status && status != -ENODEV) {
++			fwnode_handle_put(child);
+ 			goto err;
++		}
+ 	}
+ 
+ 	return 0;
+diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
+index 0eb5bfdd823a1..959ec3c5f376e 100644
+--- a/drivers/platform/x86/ideapad-laptop.c
++++ b/drivers/platform/x86/ideapad-laptop.c
+@@ -1170,7 +1170,6 @@ static const struct key_entry ideapad_keymap[] = {
+ 	{ KE_KEY,  65, { KEY_PROG4 } },
+ 	{ KE_KEY,  66, { KEY_TOUCHPAD_OFF } },
+ 	{ KE_KEY,  67, { KEY_TOUCHPAD_ON } },
+-	{ KE_KEY,  68, { KEY_TOUCHPAD_TOGGLE } },
+ 	{ KE_KEY, 128, { KEY_ESC } },
+ 
+ 	/*
+@@ -1526,18 +1525,16 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_
+ 	if (priv->features.ctrl_ps2_aux_port)
+ 		i8042_command(&param, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE);
+ 
+-	if (send_events) {
+-		/*
+-		 * On older models the EC controls the touchpad and toggles it
+-		 * on/off itself, in this case we report KEY_TOUCHPAD_ON/_OFF.
+-		 * If the EC did not toggle, report KEY_TOUCHPAD_TOGGLE.
+-		 */
+-		if (value != priv->r_touchpad_val) {
+-			ideapad_input_report(priv, value ? 67 : 66);
+-			sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad");
+-		} else {
+-			ideapad_input_report(priv, 68);
+-		}
++	/*
++	 * On older models the EC controls the touchpad and toggles it on/off
++	 * itself, in this case we report KEY_TOUCHPAD_ON/_OFF. Some models do
++	 * an acpi-notify with VPC bit 5 set on resume, so this function get
++	 * called with send_events=true on every resume. Therefor if the EC did
++	 * not toggle, do nothing to avoid sending spurious KEY_TOUCHPAD_TOGGLE.
++	 */
++	if (send_events && value != priv->r_touchpad_val) {
++		ideapad_input_report(priv, value ? 67 : 66);
++		sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad");
+ 	}
+ 
+ 	priv->r_touchpad_val = value;
+diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
+index 3a15d32d7644c..b9591969e0fa1 100644
+--- a/drivers/platform/x86/intel/pmc/core.c
++++ b/drivers/platform/x86/intel/pmc/core.c
+@@ -66,7 +66,18 @@ static inline void pmc_core_reg_write(struct pmc_dev *pmcdev, int reg_offset,
+ 
+ static inline u64 pmc_core_adjust_slp_s0_step(struct pmc_dev *pmcdev, u32 value)
+ {
+-	return (u64)value * pmcdev->map->slp_s0_res_counter_step;
++	/*
++	 * ADL PCH does not have the SLP_S0 counter and LPM Residency counters are
++	 * used as a workaround which uses 30.5 usec tick. All other client
++	 * programs have the legacy SLP_S0 residency counter that is using the 122
++	 * usec tick.
++	 */
++	const int lpm_adj_x2 = pmcdev->map->lpm_res_counter_step_x2;
++
++	if (pmcdev->map == &adl_reg_map)
++		return (u64)value * GET_X2_COUNTER((u64)lpm_adj_x2);
++	else
++		return (u64)value * pmcdev->map->slp_s0_res_counter_step;
+ }
+ 
+ static int set_etr3(struct pmc_dev *pmcdev)
+diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
+index a01a92769c1a3..74af3e593b2ca 100644
+--- a/drivers/platform/x86/think-lmi.c
++++ b/drivers/platform/x86/think-lmi.c
+@@ -941,12 +941,23 @@ static ssize_t possible_values_show(struct kobject *kobj, struct kobj_attribute
+ {
+ 	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
+ 
+-	if (!tlmi_priv.can_get_bios_selections)
+-		return -EOPNOTSUPP;
+-
+ 	return sysfs_emit(buf, "%s\n", setting->possible_values);
+ }
+ 
++static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr,
++		char *buf)
++{
++	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
++
++	if (setting->possible_values) {
++		/* Figure out what setting type is as BIOS does not return this */
++		if (strchr(setting->possible_values, ';'))
++			return sysfs_emit(buf, "enumeration\n");
++	}
++	/* Anything else is going to be a string */
++	return sysfs_emit(buf, "string\n");
++}
++
+ static ssize_t current_value_store(struct kobject *kobj,
+ 		struct kobj_attribute *attr,
+ 		const char *buf, size_t count)
+@@ -1036,14 +1047,30 @@ static struct kobj_attribute attr_possible_values = __ATTR_RO(possible_values);
+ 
+ static struct kobj_attribute attr_current_val = __ATTR_RW_MODE(current_value, 0600);
+ 
++static struct kobj_attribute attr_type = __ATTR_RO(type);
++
++static umode_t attr_is_visible(struct kobject *kobj,
++					     struct attribute *attr, int n)
++{
++	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
++
++	/* We don't want to display possible_values attributes if not available */
++	if ((attr == &attr_possible_values.attr) && (!setting->possible_values))
++		return 0;
++
++	return attr->mode;
++}
++
+ static struct attribute *tlmi_attrs[] = {
+ 	&attr_displ_name.attr,
+ 	&attr_current_val.attr,
+ 	&attr_possible_values.attr,
++	&attr_type.attr,
+ 	NULL
+ };
+ 
+ static const struct attribute_group tlmi_attr_group = {
++	.is_visible = attr_is_visible,
+ 	.attrs = tlmi_attrs,
+ };
+ 
+@@ -1423,7 +1450,34 @@ static int tlmi_analyze(void)
+ 			if (ret || !setting->possible_values)
+ 				pr_info("Error retrieving possible values for %d : %s\n",
+ 						i, setting->display_name);
++		} else {
++			/*
++			 * Older Thinkstations don't support the bios_selections API.
++			 * Instead they store this as a [Optional:Option1,Option2] section of the
++			 * name string.
++			 * Try and pull that out if it's available.
++			 */
++			char *item, *optstart, *optend;
++
++			if (!tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID)) {
++				optstart = strstr(item, "[Optional:");
++				if (optstart) {
++					optstart += strlen("[Optional:");
++					optend = strstr(optstart, "]");
++					if (optend)
++						setting->possible_values =
++							kstrndup(optstart, optend - optstart,
++									GFP_KERNEL);
++				}
++			}
+ 		}
++		/*
++		 * firmware-attributes requires that possible_values are separated by ';' but
++		 * Lenovo FW uses ','. Replace appropriately.
++		 */
++		if (setting->possible_values)
++			strreplace(setting->possible_values, ',', ';');
++
+ 		kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
+ 		tlmi_priv.setting[i] = setting;
+ 		kfree(item);
+diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
+index 08f4cf0ad9e3c..8fa9772acf79b 100644
+--- a/drivers/ptp/ptp_qoriq.c
++++ b/drivers/ptp/ptp_qoriq.c
+@@ -601,7 +601,7 @@ static int ptp_qoriq_probe(struct platform_device *dev)
+ 	return 0;
+ 
+ no_clock:
+-	iounmap(ptp_qoriq->base);
++	iounmap(base);
+ no_ioremap:
+ 	release_resource(ptp_qoriq->rsrc);
+ no_resource:
+diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
+index 2a9867abba20c..e6724a229d237 100644
+--- a/drivers/regulator/fixed.c
++++ b/drivers/regulator/fixed.c
+@@ -215,7 +215,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
+ 		drvdata->enable_clock = devm_clk_get(dev, NULL);
+ 		if (IS_ERR(drvdata->enable_clock)) {
+ 			dev_err(dev, "Can't get enable-clock from devicetree\n");
+-			return -ENOENT;
++			return PTR_ERR(drvdata->enable_clock);
+ 		}
+ 	} else if (drvtype && drvtype->has_performance_state) {
+ 		drvdata->desc.ops = &fixed_voltage_domain_ops;
+diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
+index 997b524bdd2b5..a48c6938ae68f 100644
+--- a/drivers/s390/crypto/vfio_ap_drv.c
++++ b/drivers/s390/crypto/vfio_ap_drv.c
+@@ -54,8 +54,9 @@ static struct ap_driver vfio_ap_drv = {
+ 
+ static void vfio_ap_matrix_dev_release(struct device *dev)
+ {
+-	struct ap_matrix_dev *matrix_dev = dev_get_drvdata(dev);
++	struct ap_matrix_dev *matrix_dev;
+ 
++	matrix_dev = container_of(dev, struct ap_matrix_dev, device);
+ 	kfree(matrix_dev);
+ }
+ 
+diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
+index fe70f8f114352..5f746b4a6b8da 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
+@@ -4768,7 +4768,7 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd)
+ 	devhandle = megasas_get_tm_devhandle(scmd->device);
+ 
+ 	if (devhandle == (u16)ULONG_MAX) {
+-		ret = SUCCESS;
++		ret = FAILED;
+ 		sdev_printk(KERN_INFO, scmd->device,
+ 			"task abort issued for invalid devhandle\n");
+ 		mutex_unlock(&instance->reset_mutex);
+@@ -4838,7 +4838,7 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd)
+ 	devhandle = megasas_get_tm_devhandle(scmd->device);
+ 
+ 	if (devhandle == (u16)ULONG_MAX) {
+-		ret = SUCCESS;
++		ret = FAILED;
+ 		sdev_printk(KERN_INFO, scmd->device,
+ 			"target reset issued for invalid devhandle\n");
+ 		mutex_unlock(&instance->reset_mutex);
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
+index 2ee9ea57554d7..14ae0a9c5d3d8 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
+@@ -6616,11 +6616,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
+ 	else if (rc == -EAGAIN)
+ 		goto try_32bit_dma;
+ 	total_sz += sense_sz;
+-	ioc_info(ioc,
+-	    "sense pool(0x%p)- dma(0x%llx): depth(%d),"
+-	    "element_size(%d), pool_size(%d kB)\n",
+-	    ioc->sense, (unsigned long long)ioc->sense_dma, ioc->scsiio_depth,
+-	    SCSI_SENSE_BUFFERSIZE, sz / 1024);
+ 	/* reply pool, 4 byte align */
+ 	sz = ioc->reply_free_queue_depth * ioc->reply_sz;
+ 	rc = _base_allocate_reply_pool(ioc, sz);
+diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+index acc11ad569758..cb8980238e8fc 100644
+--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
++++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+@@ -181,7 +181,6 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp
+ 		cancel_delayed_work_sync(&pci_info->work);
+ 		proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
+ 		proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0);
+-		thermal_zone_device_disable(tzd);
+ 		pci_info->stored_thres = 0;
+ 		return 0;
+ 	}
+diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c
+index ae28a03fa890b..1157b8869bcca 100644
+--- a/drivers/thunderbolt/quirks.c
++++ b/drivers/thunderbolt/quirks.c
+@@ -26,6 +26,19 @@ static void quirk_clx_disable(struct tb_switch *sw)
+ 	tb_sw_dbg(sw, "disabling CL states\n");
+ }
+ 
++static void quirk_usb3_maximum_bandwidth(struct tb_switch *sw)
++{
++	struct tb_port *port;
++
++	tb_switch_for_each_port(sw, port) {
++		if (!tb_port_is_usb3_down(port))
++			continue;
++		port->max_bw = 16376;
++		tb_port_dbg(port, "USB3 maximum bandwidth limited to %u Mb/s\n",
++			    port->max_bw);
++	}
++}
++
+ struct tb_quirk {
+ 	u16 hw_vendor_id;
+ 	u16 hw_device_id;
+@@ -43,6 +56,24 @@ static const struct tb_quirk tb_quirks[] = {
+ 	 * DP buffers.
+ 	 */
+ 	{ 0x8087, 0x0b26, 0x0000, 0x0000, quirk_dp_credit_allocation },
++	/*
++	 * Limit the maximum USB3 bandwidth for the following Intel USB4
++	 * host routers due to a hardware issue.
++	 */
++	{ 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI0, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI1, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI0, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI1, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_MTL_M_NHI0, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI0, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
++	{ 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI1, 0x0000, 0x0000,
++		  quirk_usb3_maximum_bandwidth },
+ 	/*
+ 	 * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms.
+ 	 */
+diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
+index e11d973a8f9b6..f034723b1b40e 100644
+--- a/drivers/thunderbolt/tb.h
++++ b/drivers/thunderbolt/tb.h
+@@ -252,6 +252,8 @@ struct tb_switch {
+  * @ctl_credits: Buffers reserved for control path
+  * @dma_credits: Number of credits allocated for DMA tunneling for all
+  *		 DMA paths through this port.
++ * @max_bw: Maximum possible bandwidth through this adapter if set to
++ *	    non-zero.
+  *
+  * In USB4 terminology this structure represents an adapter (protocol or
+  * lane adapter).
+@@ -277,6 +279,7 @@ struct tb_port {
+ 	unsigned int total_credits;
+ 	unsigned int ctl_credits;
+ 	unsigned int dma_credits;
++	unsigned int max_bw;
+ };
+ 
+ /**
+diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
+index d5cd219ee9e6b..3a11b30b6c86a 100644
+--- a/drivers/thunderbolt/usb4.c
++++ b/drivers/thunderbolt/usb4.c
+@@ -1882,6 +1882,15 @@ int usb4_port_retimer_nvm_read(struct tb_port *port, u8 index,
+ 				usb4_port_retimer_nvm_read_block, &info);
+ }
+ 
++static inline unsigned int
++usb4_usb3_port_max_bandwidth(const struct tb_port *port, unsigned int bw)
++{
++	/* Take the possible bandwidth limitation into account */
++	if (port->max_bw)
++		return min(bw, port->max_bw);
++	return bw;
++}
++
+ /**
+  * usb4_usb3_port_max_link_rate() - Maximum support USB3 link rate
+  * @port: USB3 adapter port
+@@ -1903,7 +1912,9 @@ int usb4_usb3_port_max_link_rate(struct tb_port *port)
+ 		return ret;
+ 
+ 	lr = (val & ADP_USB3_CS_4_MSLR_MASK) >> ADP_USB3_CS_4_MSLR_SHIFT;
+-	return lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000;
++	ret = lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000;
++
++	return usb4_usb3_port_max_bandwidth(port, ret);
+ }
+ 
+ /**
+@@ -1930,7 +1941,9 @@ int usb4_usb3_port_actual_link_rate(struct tb_port *port)
+ 		return 0;
+ 
+ 	lr = val & ADP_USB3_CS_4_ALR_MASK;
+-	return lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000;
++	ret = lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000;
++
++	return usb4_usb3_port_max_bandwidth(port, ret);
+ }
+ 
+ static int usb4_usb3_port_cm_request(struct tb_port *port, bool request)
+diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
+index 8cbbb002fefe0..086b509689839 100644
+--- a/drivers/usb/typec/ucsi/ucsi.c
++++ b/drivers/usb/typec/ucsi/ucsi.c
+@@ -1039,9 +1039,8 @@ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con)
+ 	return NULL;
+ }
+ 
+-static int ucsi_register_port(struct ucsi *ucsi, int index)
++static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con)
+ {
+-	struct ucsi_connector *con = &ucsi->connector[index];
+ 	struct typec_capability *cap = &con->typec_cap;
+ 	enum typec_accessory *accessory = cap->accessory;
+ 	enum usb_role u_role = USB_ROLE_NONE;
+@@ -1062,7 +1061,6 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
+ 	init_completion(&con->complete);
+ 	mutex_init(&con->lock);
+ 	INIT_LIST_HEAD(&con->partner_tasks);
+-	con->num = index + 1;
+ 	con->ucsi = ucsi;
+ 
+ 	cap->fwnode = ucsi_find_fwnode(con);
+@@ -1204,7 +1202,7 @@ out_unlock:
+  */
+ static int ucsi_init(struct ucsi *ucsi)
+ {
+-	struct ucsi_connector *con;
++	struct ucsi_connector *con, *connector;
+ 	u64 command, ntfy;
+ 	int ret;
+ 	int i;
+@@ -1235,16 +1233,16 @@ static int ucsi_init(struct ucsi *ucsi)
+ 	}
+ 
+ 	/* Allocate the connectors. Released in ucsi_unregister() */
+-	ucsi->connector = kcalloc(ucsi->cap.num_connectors + 1,
+-				  sizeof(*ucsi->connector), GFP_KERNEL);
+-	if (!ucsi->connector) {
++	connector = kcalloc(ucsi->cap.num_connectors + 1, sizeof(*connector), GFP_KERNEL);
++	if (!connector) {
+ 		ret = -ENOMEM;
+ 		goto err_reset;
+ 	}
+ 
+ 	/* Register all connectors */
+ 	for (i = 0; i < ucsi->cap.num_connectors; i++) {
+-		ret = ucsi_register_port(ucsi, i);
++		connector[i].num = i + 1;
++		ret = ucsi_register_port(ucsi, &connector[i]);
+ 		if (ret)
+ 			goto err_unregister;
+ 	}
+@@ -1256,11 +1254,12 @@ static int ucsi_init(struct ucsi *ucsi)
+ 	if (ret < 0)
+ 		goto err_unregister;
+ 
++	ucsi->connector = connector;
+ 	ucsi->ntfy = ntfy;
+ 	return 0;
+ 
+ err_unregister:
+-	for (con = ucsi->connector; con->port; con++) {
++	for (con = connector; con->port; con++) {
+ 		ucsi_unregister_partner(con);
+ 		ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON);
+ 		ucsi_unregister_port_psy(con);
+@@ -1269,10 +1268,7 @@ err_unregister:
+ 		typec_unregister_port(con->port);
+ 		con->port = NULL;
+ 	}
+-
+-	kfree(ucsi->connector);
+-	ucsi->connector = NULL;
+-
++	kfree(connector);
+ err_reset:
+ 	memset(&ucsi->cap, 0, sizeof(ucsi->cap));
+ 	ucsi_reset_ppm(ucsi);
+diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
+index 81c3154544287..b6b22fa4a8a01 100644
+--- a/drivers/video/fbdev/au1200fb.c
++++ b/drivers/video/fbdev/au1200fb.c
+@@ -1040,6 +1040,9 @@ static int au1200fb_fb_check_var(struct fb_var_screeninfo *var,
+ 	u32 pixclock;
+ 	int screen_size, plane;
+ 
++	if (!var->pixclock)
++		return -EINVAL;
++
+ 	plane = fbdev->plane;
+ 
+ 	/* Make sure that the mode respect all LCD controller and
+diff --git a/drivers/video/fbdev/geode/lxfb_core.c b/drivers/video/fbdev/geode/lxfb_core.c
+index 8130e9eee2b4b..556d8b1a9e06a 100644
+--- a/drivers/video/fbdev/geode/lxfb_core.c
++++ b/drivers/video/fbdev/geode/lxfb_core.c
+@@ -235,6 +235,9 @@ static void get_modedb(struct fb_videomode **modedb, unsigned int *size)
+ 
+ static int lxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+ {
++	if (!var->pixclock)
++		return -EINVAL;
++
+ 	if (var->xres > 1920 || var->yres > 1440)
+ 		return -EINVAL;
+ 
+diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c
+index 0a9e5067b2010..a81095b2b1ea5 100644
+--- a/drivers/video/fbdev/intelfb/intelfbdrv.c
++++ b/drivers/video/fbdev/intelfb/intelfbdrv.c
+@@ -1222,6 +1222,9 @@ static int intelfb_check_var(struct fb_var_screeninfo *var,
+ 
+ 	dinfo = GET_DINFO(info);
+ 
++	if (!var->pixclock)
++		return -EINVAL;
++
+ 	/* update the pitch */
+ 	if (intelfbhw_validate_mode(dinfo, var) != 0)
+ 		return -EINVAL;
+diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c
+index e60a276b4855d..ea4ba3dfb96bb 100644
+--- a/drivers/video/fbdev/nvidia/nvidia.c
++++ b/drivers/video/fbdev/nvidia/nvidia.c
+@@ -764,6 +764,8 @@ static int nvidiafb_check_var(struct fb_var_screeninfo *var,
+ 	int pitch, err = 0;
+ 
+ 	NVTRACE_ENTER();
++	if (!var->pixclock)
++		return -EINVAL;
+ 
+ 	var->transp.offset = 0;
+ 	var->transp.length = 0;
+diff --git a/drivers/video/fbdev/tgafb.c b/drivers/video/fbdev/tgafb.c
+index 14d37c49633c6..b44004880f0d1 100644
+--- a/drivers/video/fbdev/tgafb.c
++++ b/drivers/video/fbdev/tgafb.c
+@@ -173,6 +173,9 @@ tgafb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+ {
+ 	struct tga_par *par = (struct tga_par *)info->par;
+ 
++	if (!var->pixclock)
++		return -EINVAL;
++
+ 	if (par->tga_type == TGA_TYPE_8PLANE) {
+ 		if (var->bits_per_pixel != 8)
+ 			return -EINVAL;
+diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
+index 46851511b661b..0d7ae20e39c9a 100644
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -1895,8 +1895,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
+ 	ULIST_ITER_INIT(&uiter);
+ 	ctx->use_path_cache = true;
+ 	while (1) {
+-		bool is_shared;
+-		bool cached;
++		const unsigned long prev_ref_count = ctx->refs.nnodes;
+ 
+ 		walk_ctx.bytenr = bytenr;
+ 		ret = find_parent_nodes(&walk_ctx, &shared);
+@@ -1914,21 +1913,36 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
+ 		ret = 0;
+ 
+ 		/*
+-		 * If our data extent was not directly shared (without multiple
+-		 * reference items), than it might have a single reference item
+-		 * with a count > 1 for the same offset, which means there are 2
+-		 * (or more) file extent items that point to the data extent -
+-		 * this happens when a file extent item needs to be split and
+-		 * then one item gets moved to another leaf due to a b+tree leaf
+-		 * split when inserting some item. In this case the file extent
+-		 * items may be located in different leaves and therefore some
+-		 * of the leaves may be referenced through shared subtrees while
+-		 * others are not. Since our extent buffer cache only works for
+-		 * a single path (by far the most common case and simpler to
+-		 * deal with), we can not use it if we have multiple leaves
+-		 * (which implies multiple paths).
++		 * More than one extent buffer (bytenr) may have been added to
++		 * the ctx->refs ulist, in which case we have to check multiple
++		 * tree paths in case the first one is not shared, so we can not
++		 * use the path cache which is made for a single path. Multiple
++		 * extent buffers at the current level happen when:
++		 *
++		 * 1) level -1, the data extent: If our data extent was not
++		 *    directly shared (without multiple reference items), then
++		 *    it might have a single reference item with a count > 1 for
++		 *    the same offset, which means there are 2 (or more) file
++		 *    extent items that point to the data extent - this happens
++		 *    when a file extent item needs to be split and then one
++		 *    item gets moved to another leaf due to a b+tree leaf split
++		 *    when inserting some item. In this case the file extent
++		 *    items may be located in different leaves and therefore
++		 *    some of the leaves may be referenced through shared
++		 *    subtrees while others are not. Since our extent buffer
++		 *    cache only works for a single path (by far the most common
++		 *    case and simpler to deal with), we can not use it if we
++		 *    have multiple leaves (which implies multiple paths).
++		 *
++		 * 2) level >= 0, a tree node/leaf: We can have a mix of direct
++		 *    and indirect references on a b+tree node/leaf, so we have
++		 *    to check multiple paths, and the extent buffer (the
++		 *    current bytenr) may be shared or not. One example is
++		 *    during relocation as we may get a shared tree block ref
++		 *    (direct ref) and a non-shared tree block ref (indirect
++		 *    ref) for the same node/leaf.
+ 		 */
+-		if (level == -1 && ctx->refs.nnodes > 1)
++		if ((ctx->refs.nnodes - prev_ref_count) > 1)
+ 			ctx->use_path_cache = false;
+ 
+ 		if (level >= 0)
+@@ -1938,18 +1952,45 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
+ 		if (!node)
+ 			break;
+ 		bytenr = node->val;
+-		level++;
+-		cached = lookup_backref_shared_cache(ctx, root, bytenr, level,
+-						     &is_shared);
+-		if (cached) {
+-			ret = (is_shared ? 1 : 0);
+-			break;
++		if (ctx->use_path_cache) {
++			bool is_shared;
++			bool cached;
++
++			level++;
++			cached = lookup_backref_shared_cache(ctx, root, bytenr,
++							     level, &is_shared);
++			if (cached) {
++				ret = (is_shared ? 1 : 0);
++				break;
++			}
+ 		}
+ 		shared.share_count = 0;
+ 		shared.have_delayed_delete_refs = false;
+ 		cond_resched();
+ 	}
+ 
++	/*
++	 * If the path cache is disabled, then it means at some tree level we
++	 * got multiple parents due to a mix of direct and indirect backrefs or
++	 * multiple leaves with file extent items pointing to the same data
++	 * extent. We have to invalidate the cache and cache only the sharedness
++	 * result for the levels where we got only one node/reference.
++	 */
++	if (!ctx->use_path_cache) {
++		int i = 0;
++
++		level--;
++		if (ret >= 0 && level >= 0) {
++			bytenr = ctx->path_cache_entries[level].bytenr;
++			ctx->use_path_cache = true;
++			store_backref_shared_cache(ctx, root, bytenr, level, ret);
++			i = level + 1;
++		}
++
++		for ( ; i < BTRFS_MAX_LEVEL; i++)
++			ctx->path_cache_entries[i].bytenr = 0;
++	}
++
+ 	/*
+ 	 * Cache the sharedness result for the data extent if we know our inode
+ 	 * has more than 1 file extent item that refers to the data extent.
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index d628d545ffea7..c70a888bf8bf6 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -1036,14 +1036,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
+ 			< block_group->zone_unusable);
+ 		WARN_ON(block_group->space_info->disk_total
+ 			< block_group->length * factor);
+-		WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
+-				 &block_group->runtime_flags) &&
+-			block_group->space_info->active_total_bytes
+-			< block_group->length);
+ 	}
+ 	block_group->space_info->total_bytes -= block_group->length;
+-	if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
+-		block_group->space_info->active_total_bytes -= block_group->length;
+ 	block_group->space_info->bytes_readonly -=
+ 		(block_group->length - block_group->zone_unusable);
+ 	block_group->space_info->bytes_zone_unusable -=
+@@ -3342,13 +3336,15 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
+ 	spin_unlock(&info->delalloc_root_lock);
+ 
+ 	while (total) {
+-		bool reclaim;
++		struct btrfs_space_info *space_info;
++		bool reclaim = false;
+ 
+ 		cache = btrfs_lookup_block_group(info, bytenr);
+ 		if (!cache) {
+ 			ret = -ENOENT;
+ 			break;
+ 		}
++		space_info = cache->space_info;
+ 		factor = btrfs_bg_type_to_factor(cache->flags);
+ 
+ 		/*
+@@ -3363,7 +3359,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
+ 		byte_in_group = bytenr - cache->start;
+ 		WARN_ON(byte_in_group > cache->length);
+ 
+-		spin_lock(&cache->space_info->lock);
++		spin_lock(&space_info->lock);
+ 		spin_lock(&cache->lock);
+ 
+ 		if (btrfs_test_opt(info, SPACE_CACHE) &&
+@@ -3376,23 +3372,23 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
+ 			old_val += num_bytes;
+ 			cache->used = old_val;
+ 			cache->reserved -= num_bytes;
+-			cache->space_info->bytes_reserved -= num_bytes;
+-			cache->space_info->bytes_used += num_bytes;
+-			cache->space_info->disk_used += num_bytes * factor;
++			space_info->bytes_reserved -= num_bytes;
++			space_info->bytes_used += num_bytes;
++			space_info->disk_used += num_bytes * factor;
+ 			spin_unlock(&cache->lock);
+-			spin_unlock(&cache->space_info->lock);
++			spin_unlock(&space_info->lock);
+ 		} else {
+ 			old_val -= num_bytes;
+ 			cache->used = old_val;
+ 			cache->pinned += num_bytes;
+-			btrfs_space_info_update_bytes_pinned(info,
+-					cache->space_info, num_bytes);
+-			cache->space_info->bytes_used -= num_bytes;
+-			cache->space_info->disk_used -= num_bytes * factor;
++			btrfs_space_info_update_bytes_pinned(info, space_info,
++							     num_bytes);
++			space_info->bytes_used -= num_bytes;
++			space_info->disk_used -= num_bytes * factor;
+ 
+ 			reclaim = should_reclaim_block_group(cache, num_bytes);
+ 			spin_unlock(&cache->lock);
+-			spin_unlock(&cache->space_info->lock);
++			spin_unlock(&space_info->lock);
+ 
+ 			set_extent_dirty(&trans->transaction->pinned_extents,
+ 					 bytenr, bytenr + num_bytes - 1,
+diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
+index 0d250d052487c..d84cef89cdff5 100644
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2693,8 +2693,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
+ 		bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
+ 
+ 	spin_lock(&ctl->tree_lock);
++	/* Count initial region as zone_unusable until it gets activated. */
+ 	if (!used)
+ 		to_free = size;
++	else if (initial &&
++		 test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) &&
++		 (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
++		to_free = 0;
+ 	else if (initial)
+ 		to_free = block_group->zone_capacity;
+ 	else if (offset >= block_group->alloc_offset)
+@@ -2722,7 +2727,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
+ 	reclaimable_unusable = block_group->zone_unusable -
+ 			       (block_group->length - block_group->zone_capacity);
+ 	/* All the region is now unusable. Mark it as unused and reclaim */
+-	if (block_group->zone_unusable == block_group->length) {
++	if (block_group->zone_unusable == block_group->length &&
++	    block_group->alloc_offset) {
+ 		btrfs_mark_bg_unused(block_group);
+ 	} else if (bg_reclaim_threshold &&
+ 		   reclaimable_unusable >=
+diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
+index 3d8156fc8523f..f180ca061aef4 100644
+--- a/fs/btrfs/fs.h
++++ b/fs/btrfs/fs.h
+@@ -119,11 +119,8 @@ enum {
+ 	/* Indicate that we want to commit the transaction. */
+ 	BTRFS_FS_NEED_TRANS_COMMIT,
+ 
+-	/*
+-	 * Indicate metadata over-commit is disabled. This is set when active
+-	 * zone tracking is needed.
+-	 */
+-	BTRFS_FS_NO_OVERCOMMIT,
++	/* This is set when active zone tracking is needed. */
++	BTRFS_FS_ACTIVE_ZONE_TRACKING,
+ 
+ 	/*
+ 	 * Indicate if we have some features changed, this is mostly for
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 7e348bd2ccdeb..c232636ecdfea 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3731,7 +3731,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
+ 	}
+ 
+ 	/* update qgroup status and info */
++	mutex_lock(&fs_info->qgroup_ioctl_lock);
+ 	err = btrfs_run_qgroups(trans);
++	mutex_unlock(&fs_info->qgroup_ioctl_lock);
+ 	if (err < 0)
+ 		btrfs_handle_fs_error(fs_info, err,
+ 				      "failed to update qgroup status and info");
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index af97413abcf43..abf2b7f143078 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2828,13 +2828,22 @@ cleanup:
+ }
+ 
+ /*
+- * called from commit_transaction. Writes all changed qgroups to disk.
++ * Writes all changed qgroups to disk.
++ * Called by the transaction commit path and the qgroup assign ioctl.
+  */
+ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
+ {
+ 	struct btrfs_fs_info *fs_info = trans->fs_info;
+ 	int ret = 0;
+ 
++	/*
++	 * In case we are called from the qgroup assign ioctl, assert that we
++	 * are holding the qgroup_ioctl_lock, otherwise we can race with a quota
++	 * disable operation (ioctl) and access a freed quota root.
++	 */
++	if (trans->transaction->state != TRANS_STATE_COMMIT_DOING)
++		lockdep_assert_held(&fs_info->qgroup_ioctl_lock);
++
+ 	if (!fs_info->quota_root)
+ 		return ret;
+ 
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 69c09508afb50..3eecce86f63fc 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -308,8 +308,6 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
+ 	ASSERT(found);
+ 	spin_lock(&found->lock);
+ 	found->total_bytes += block_group->length;
+-	if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
+-		found->active_total_bytes += block_group->length;
+ 	found->disk_total += block_group->length * factor;
+ 	found->bytes_used += block_group->used;
+ 	found->disk_used += block_group->used * factor;
+@@ -379,22 +377,6 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
+ 	return avail;
+ }
+ 
+-static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info,
+-				       struct btrfs_space_info *space_info)
+-{
+-	/*
+-	 * On regular filesystem, all total_bytes are always writable. On zoned
+-	 * filesystem, there may be a limitation imposed by max_active_zones.
+-	 * For metadata allocation, we cannot finish an existing active block
+-	 * group to avoid a deadlock. Thus, we need to consider only the active
+-	 * groups to be writable for metadata space.
+-	 */
+-	if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
+-		return space_info->total_bytes;
+-
+-	return space_info->active_total_bytes;
+-}
+-
+ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+ 			 struct btrfs_space_info *space_info, u64 bytes,
+ 			 enum btrfs_reserve_flush_enum flush)
+@@ -407,13 +389,13 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+ 		return 0;
+ 
+ 	used = btrfs_space_info_used(space_info, true);
+-	if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) &&
++	if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
+ 	    (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
+ 		avail = 0;
+ 	else
+ 		avail = calc_available_free_space(fs_info, space_info, flush);
+ 
+-	if (used + bytes < writable_total_bytes(fs_info, space_info) + avail)
++	if (used + bytes < space_info->total_bytes + avail)
+ 		return 1;
+ 	return 0;
+ }
+@@ -449,7 +431,7 @@ again:
+ 		ticket = list_first_entry(head, struct reserve_ticket, list);
+ 
+ 		/* Check and see if our ticket can be satisfied now. */
+-		if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) ||
++		if ((used + ticket->bytes <= space_info->total_bytes) ||
+ 		    btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
+ 					 flush)) {
+ 			btrfs_space_info_update_bytes_may_use(fs_info,
+@@ -829,7 +811,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+ {
+ 	u64 used;
+ 	u64 avail;
+-	u64 total;
+ 	u64 to_reclaim = space_info->reclaim_size;
+ 
+ 	lockdep_assert_held(&space_info->lock);
+@@ -844,9 +825,8 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+ 	 * space.  If that's the case add in our overage so we make sure to put
+ 	 * appropriate pressure on the flushing state machine.
+ 	 */
+-	total = writable_total_bytes(fs_info, space_info);
+-	if (total + avail < used)
+-		to_reclaim += used - (total + avail);
++	if (space_info->total_bytes + avail < used)
++		to_reclaim += used - (space_info->total_bytes + avail);
+ 
+ 	return to_reclaim;
+ }
+@@ -856,11 +836,10 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
+ {
+ 	u64 global_rsv_size = fs_info->global_block_rsv.reserved;
+ 	u64 ordered, delalloc;
+-	u64 total = writable_total_bytes(fs_info, space_info);
+ 	u64 thresh;
+ 	u64 used;
+ 
+-	thresh = mult_perc(total, 90);
++	thresh = mult_perc(space_info->total_bytes, 90);
+ 
+ 	lockdep_assert_held(&space_info->lock);
+ 
+@@ -923,8 +902,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
+ 					   BTRFS_RESERVE_FLUSH_ALL);
+ 	used = space_info->bytes_used + space_info->bytes_reserved +
+ 	       space_info->bytes_readonly + global_rsv_size;
+-	if (used < total)
+-		thresh += total - used;
++	if (used < space_info->total_bytes)
++		thresh += space_info->total_bytes - used;
+ 	thresh >>= space_info->clamp;
+ 
+ 	used = space_info->bytes_pinned;
+@@ -1651,7 +1630,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
+ 	 * can_overcommit() to ensure we can overcommit to continue.
+ 	 */
+ 	if (!pending_tickets &&
+-	    ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) ||
++	    ((used + orig_bytes <= space_info->total_bytes) ||
+ 	     btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
+ 		btrfs_space_info_update_bytes_may_use(fs_info, space_info,
+ 						      orig_bytes);
+@@ -1665,8 +1644,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
+ 	 */
+ 	if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) {
+ 		used = btrfs_space_info_used(space_info, false);
+-		if (used + orig_bytes <=
+-		    writable_total_bytes(fs_info, space_info)) {
++		if (used + orig_bytes <= space_info->total_bytes) {
+ 			btrfs_space_info_update_bytes_may_use(fs_info, space_info,
+ 							      orig_bytes);
+ 			ret = 0;
+diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
+index fc99ea2b0c34f..2033b71b18cec 100644
+--- a/fs/btrfs/space-info.h
++++ b/fs/btrfs/space-info.h
+@@ -96,8 +96,6 @@ struct btrfs_space_info {
+ 	u64 bytes_may_use;	/* number of bytes that may be used for
+ 				   delalloc/allocations */
+ 	u64 bytes_readonly;	/* total bytes that are read only */
+-	/* Total bytes in the space, but only accounts active block groups. */
+-	u64 active_total_bytes;
+ 	u64 bytes_zone_unusable;	/* total bytes that are unusable until
+ 					   resetting the device zone */
+ 
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 8f8d0fce6e4a3..9094e2402922c 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -2035,7 +2035,20 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
+ 
+ 	if (current->journal_info == trans)
+ 		current->journal_info = NULL;
+-	btrfs_scrub_cancel(fs_info);
++
++	/*
++	 * If relocation is running, we can't cancel scrub because that will
++	 * result in a deadlock. Before relocating a block group, relocation
++	 * pauses scrub, then starts and commits a transaction before unpausing
++	 * scrub. If the transaction commit is being done by the relocation
++	 * task or triggered by another task and the relocation task is waiting
++	 * for the commit, and we end up here due to an error in the commit
++	 * path, then calling btrfs_scrub_cancel() will deadlock, as we are
++	 * asking for scrub to stop while having it asked to be paused higher
++	 * above in relocation code.
++	 */
++	if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
++		btrfs_scrub_cancel(fs_info);
+ 
+ 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
+ }
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index df43093b7a46d..fe652f8fc697b 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -1366,8 +1366,17 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
+ 	 * So, we need to add a special mount option to scan for
+ 	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
+ 	 */
+-	flags |= FMODE_EXCL;
+ 
++	/*
++	 * Avoid using flag |= FMODE_EXCL here, as the systemd-udev may
++	 * initiate the device scan which may race with the user's mount
++	 * or mkfs command, resulting in failure.
++	 * Since the device scan is solely for reading purposes, there is
++	 * no need for FMODE_EXCL. Additionally, the devices are read again
++	 * during the mount process. It is ok to get some inconsistent
++	 * values temporarily, as the device paths of the fsid are the only
++	 * required information for assembling the volume.
++	 */
+ 	bdev = blkdev_get_by_path(path, flags, holder);
+ 	if (IS_ERR(bdev))
+ 		return ERR_CAST(bdev);
+@@ -3266,8 +3275,15 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
+ 	btrfs_scrub_pause(fs_info);
+ 	ret = btrfs_relocate_block_group(fs_info, chunk_offset);
+ 	btrfs_scrub_continue(fs_info);
+-	if (ret)
++	if (ret) {
++		/*
++		 * If we had a transaction abort, stop all running scrubs.
++		 * See transaction.c:cleanup_transaction() why we do it here.
++		 */
++		if (BTRFS_FS_ERROR(fs_info))
++			btrfs_scrub_cancel(fs_info);
+ 		return ret;
++	}
+ 
+ 	block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
+ 	if (!block_group)
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index f3b7d8ae93a9f..ce5ebba7fdd9a 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -539,8 +539,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
+ 		}
+ 		atomic_set(&zone_info->active_zones_left,
+ 			   max_active_zones - nactive);
+-		/* Overcommit does not work well with active zone tacking. */
+-		set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags);
++		set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags);
+ 	}
+ 
+ 	/* Validate superblock log */
+@@ -1577,9 +1576,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
+ 		return;
+ 
+ 	WARN_ON(cache->bytes_super != 0);
+-	unusable = (cache->alloc_offset - cache->used) +
+-		   (cache->length - cache->zone_capacity);
+-	free = cache->zone_capacity - cache->alloc_offset;
++
++	/* Check for block groups never get activated */
++	if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
++	    cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
++	    !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
++	    cache->alloc_offset == 0) {
++		unusable = cache->length;
++		free = 0;
++	} else {
++		unusable = (cache->alloc_offset - cache->used) +
++			   (cache->length - cache->zone_capacity);
++		free = cache->zone_capacity - cache->alloc_offset;
++	}
+ 
+ 	/* We only need ->free_space in ALLOC_SEQ block groups */
+ 	cache->cached = BTRFS_CACHE_FINISHED;
+@@ -1916,7 +1925,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+ 
+ 	/* Successfully activated all the zones */
+ 	set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
+-	space_info->active_total_bytes += block_group->length;
++	WARN_ON(block_group->alloc_offset != 0);
++	if (block_group->zone_unusable == block_group->length) {
++		block_group->zone_unusable = block_group->length - block_group->zone_capacity;
++		space_info->bytes_zone_unusable -= block_group->zone_capacity;
++	}
+ 	spin_unlock(&block_group->lock);
+ 	btrfs_try_granting_tickets(fs_info, space_info);
+ 	spin_unlock(&space_info->lock);
+@@ -2280,7 +2293,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
+ 		u64 avail;
+ 
+ 		spin_lock(&block_group->lock);
+-		if (block_group->reserved ||
++		if (block_group->reserved || block_group->alloc_offset == 0 ||
+ 		    (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
+ 			spin_unlock(&block_group->lock);
+ 			continue;
+@@ -2317,10 +2330,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
+ 	if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
+ 		return 0;
+ 
+-	/* No more block groups to activate */
+-	if (space_info->active_total_bytes == space_info->total_bytes)
+-		return 0;
+-
+ 	for (;;) {
+ 		int ret;
+ 		bool need_finish = false;
+diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
+index 63a0ac2b93558..16a703c683b77 100644
+--- a/fs/cifs/cifsfs.h
++++ b/fs/cifs/cifsfs.h
+@@ -118,7 +118,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops;
+ #ifdef CONFIG_CIFS_DFS_UPCALL
+ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
+ #else
+-#define cifs_dfs_d_automount NULL
++static inline struct vfsmount *cifs_dfs_d_automount(struct path *path)
++{
++	return ERR_PTR(-EREMOTE);
++}
+ #endif
+ 
+ /* Functions related to symlinks */
+diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
+index e75184544ecb4..639df85dafd6c 100644
+--- a/fs/cifs/cifsproto.h
++++ b/fs/cifs/cifsproto.h
+@@ -697,5 +697,6 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
+ 
+ struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
+ void cifs_put_tcon_super(struct super_block *sb);
++int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
+ 
+ #endif			/* _CIFSPROTO_H */
+diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
+index 566e6a26b897c..f71f8533c2f4d 100644
+--- a/fs/cifs/cifssmb.c
++++ b/fs/cifs/cifssmb.c
+@@ -70,7 +70,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
+ 	struct cifs_ses *ses;
+ 	struct TCP_Server_Info *server;
+ 	struct nls_table *nls_codepage;
+-	int retries;
+ 
+ 	/*
+ 	 * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
+@@ -98,45 +97,9 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
+ 	}
+ 	spin_unlock(&tcon->tc_lock);
+ 
+-	retries = server->nr_targets;
+-
+-	/*
+-	 * Give demultiplex thread up to 10 seconds to each target available for
+-	 * reconnect -- should be greater than cifs socket timeout which is 7
+-	 * seconds.
+-	 */
+-	while (server->tcpStatus == CifsNeedReconnect) {
+-		rc = wait_event_interruptible_timeout(server->response_q,
+-						      (server->tcpStatus != CifsNeedReconnect),
+-						      10 * HZ);
+-		if (rc < 0) {
+-			cifs_dbg(FYI, "%s: aborting reconnect due to a received signal by the process\n",
+-				 __func__);
+-			return -ERESTARTSYS;
+-		}
+-
+-		/* are we still trying to reconnect? */
+-		spin_lock(&server->srv_lock);
+-		if (server->tcpStatus != CifsNeedReconnect) {
+-			spin_unlock(&server->srv_lock);
+-			break;
+-		}
+-		spin_unlock(&server->srv_lock);
+-
+-		if (retries && --retries)
+-			continue;
+-
+-		/*
+-		 * on "soft" mounts we wait once. Hard mounts keep
+-		 * retrying until process is killed or server comes
+-		 * back on-line
+-		 */
+-		if (!tcon->retry) {
+-			cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n");
+-			return -EHOSTDOWN;
+-		}
+-		retries = server->nr_targets;
+-	}
++	rc = cifs_wait_for_server_reconnect(server, tcon->retry);
++	if (rc)
++		return rc;
+ 
+ 	spin_lock(&ses->chan_lock);
+ 	if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) {
+@@ -4414,8 +4377,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
+ 		return -ENODEV;
+ 
+ getDFSRetry:
+-	rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB,
+-		      (void **) &pSMBr);
++	/*
++	 * Use smb_init_no_reconnect() instead of smb_init() as
++	 * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus
++	 * causing an infinite recursion.
++	 */
++	rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc,
++				   (void **)&pSMB, (void **)&pSMBr);
+ 	if (rc)
+ 		return rc;
+ 
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index 6da2af97b8bac..985e962cf0858 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -244,31 +244,42 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
+ 			cifs_chan_update_iface(ses, server);
+ 
+ 		spin_lock(&ses->chan_lock);
+-		if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server))
+-			goto next_session;
++		if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) {
++			spin_unlock(&ses->chan_lock);
++			continue;
++		}
+ 
+ 		if (mark_smb_session)
+ 			CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses);
+ 		else
+ 			cifs_chan_set_need_reconnect(ses, server);
+ 
++		cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n",
++			 __func__, ses->chans_need_reconnect);
++
+ 		/* If all channels need reconnect, then tcon needs reconnect */
+-		if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses))
+-			goto next_session;
++		if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
++			spin_unlock(&ses->chan_lock);
++			continue;
++		}
++		spin_unlock(&ses->chan_lock);
+ 
++		spin_lock(&ses->ses_lock);
+ 		ses->ses_status = SES_NEED_RECON;
++		spin_unlock(&ses->ses_lock);
+ 
+ 		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 			tcon->need_reconnect = true;
++			spin_lock(&tcon->tc_lock);
+ 			tcon->status = TID_NEED_RECON;
++			spin_unlock(&tcon->tc_lock);
+ 		}
+ 		if (ses->tcon_ipc) {
+ 			ses->tcon_ipc->need_reconnect = true;
++			spin_lock(&ses->tcon_ipc->tc_lock);
+ 			ses->tcon_ipc->status = TID_NEED_RECON;
++			spin_unlock(&ses->tcon_ipc->tc_lock);
+ 		}
+-
+-next_session:
+-		spin_unlock(&ses->chan_lock);
+ 	}
+ 	spin_unlock(&cifs_tcp_ses_lock);
+ }
+@@ -3703,11 +3714,19 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses,
+ 
+ 	/* only send once per connect */
+ 	spin_lock(&server->srv_lock);
+-	if (!server->ops->need_neg(server) ||
++	if (server->tcpStatus != CifsGood &&
++	    server->tcpStatus != CifsNew &&
+ 	    server->tcpStatus != CifsNeedNegotiate) {
++		spin_unlock(&server->srv_lock);
++		return -EHOSTDOWN;
++	}
++
++	if (!server->ops->need_neg(server) &&
++	    server->tcpStatus == CifsGood) {
+ 		spin_unlock(&server->srv_lock);
+ 		return 0;
+ 	}
++
+ 	server->tcpStatus = CifsInNegotiate;
+ 	spin_unlock(&server->srv_lock);
+ 
+@@ -3735,39 +3754,48 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
+ 		   struct nls_table *nls_info)
+ {
+ 	int rc = -ENOSYS;
+-	struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
+-	struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
++	struct TCP_Server_Info *pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
++	struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&pserver->dstaddr;
++	struct sockaddr_in *addr = (struct sockaddr_in *)&pserver->dstaddr;
+ 	bool is_binding = false;
+ 
+ 	spin_lock(&ses->ses_lock);
+-	if (server->dstaddr.ss_family == AF_INET6)
+-		scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr);
+-	else
+-		scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr);
++	cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n",
++		 __func__, ses->chans_need_reconnect);
+ 
+ 	if (ses->ses_status != SES_GOOD &&
+ 	    ses->ses_status != SES_NEW &&
+ 	    ses->ses_status != SES_NEED_RECON) {
+ 		spin_unlock(&ses->ses_lock);
+-		return 0;
++		return -EHOSTDOWN;
+ 	}
+ 
+ 	/* only send once per connect */
+ 	spin_lock(&ses->chan_lock);
+-	if (CIFS_ALL_CHANS_GOOD(ses) ||
+-	    cifs_chan_in_reconnect(ses, server)) {
++	if (CIFS_ALL_CHANS_GOOD(ses)) {
++		if (ses->ses_status == SES_NEED_RECON)
++			ses->ses_status = SES_GOOD;
+ 		spin_unlock(&ses->chan_lock);
+ 		spin_unlock(&ses->ses_lock);
+ 		return 0;
+ 	}
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
++
+ 	cifs_chan_set_in_reconnect(ses, server);
++	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ 	spin_unlock(&ses->chan_lock);
+ 
+ 	if (!is_binding)
+ 		ses->ses_status = SES_IN_SETUP;
+ 	spin_unlock(&ses->ses_lock);
+ 
++	/* update ses ip_addr only for primary chan */
++	if (server == pserver) {
++		if (server->dstaddr.ss_family == AF_INET6)
++			scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr);
++		else
++			scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr);
++	}
++
+ 	if (!is_binding) {
+ 		ses->capabilities = server->capabilities;
+ 		if (!linuxExtEnabled)
+diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
+index 9f4486b705d5c..5542893ef03f7 100644
+--- a/fs/cifs/misc.c
++++ b/fs/cifs/misc.c
+@@ -1376,3 +1376,47 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
+ 	return 0;
+ }
+ #endif
++
++int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry)
++{
++	int timeout = 10;
++	int rc;
++
++	spin_lock(&server->srv_lock);
++	if (server->tcpStatus != CifsNeedReconnect) {
++		spin_unlock(&server->srv_lock);
++		return 0;
++	}
++	timeout *= server->nr_targets;
++	spin_unlock(&server->srv_lock);
++
++	/*
++	 * Give demultiplex thread up to 10 seconds to each target available for
++	 * reconnect -- should be greater than cifs socket timeout which is 7
++	 * seconds.
++	 *
++	 * On "soft" mounts we wait once. Hard mounts keep retrying until
++	 * process is killed or server comes back on-line.
++	 */
++	do {
++		rc = wait_event_interruptible_timeout(server->response_q,
++						      (server->tcpStatus != CifsNeedReconnect),
++						      timeout * HZ);
++		if (rc < 0) {
++			cifs_dbg(FYI, "%s: aborting reconnect due to received signal\n",
++				 __func__);
++			return -ERESTARTSYS;
++		}
++
++		/* are we still trying to reconnect? */
++		spin_lock(&server->srv_lock);
++		if (server->tcpStatus != CifsNeedReconnect) {
++			spin_unlock(&server->srv_lock);
++			return 0;
++		}
++		spin_unlock(&server->srv_lock);
++	} while (retry);
++
++	cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__);
++	return -EHOSTDOWN;
++}
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index 6e6e44d8b4c79..b37379b62cc77 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -139,72 +139,12 @@ out:
+ 	return;
+ }
+ 
+-static int wait_for_server_reconnect(struct TCP_Server_Info *server,
+-				     __le16 smb2_command, bool retry)
+-{
+-	int timeout = 10;
+-	int rc;
+-
+-	spin_lock(&server->srv_lock);
+-	if (server->tcpStatus != CifsNeedReconnect) {
+-		spin_unlock(&server->srv_lock);
+-		return 0;
+-	}
+-	timeout *= server->nr_targets;
+-	spin_unlock(&server->srv_lock);
+-
+-	/*
+-	 * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE
+-	 * here since they are implicitly done when session drops.
+-	 */
+-	switch (smb2_command) {
+-	/*
+-	 * BB Should we keep oplock break and add flush to exceptions?
+-	 */
+-	case SMB2_TREE_DISCONNECT:
+-	case SMB2_CANCEL:
+-	case SMB2_CLOSE:
+-	case SMB2_OPLOCK_BREAK:
+-		return -EAGAIN;
+-	}
+-
+-	/*
+-	 * Give demultiplex thread up to 10 seconds to each target available for
+-	 * reconnect -- should be greater than cifs socket timeout which is 7
+-	 * seconds.
+-	 *
+-	 * On "soft" mounts we wait once. Hard mounts keep retrying until
+-	 * process is killed or server comes back on-line.
+-	 */
+-	do {
+-		rc = wait_event_interruptible_timeout(server->response_q,
+-						      (server->tcpStatus != CifsNeedReconnect),
+-						      timeout * HZ);
+-		if (rc < 0) {
+-			cifs_dbg(FYI, "%s: aborting reconnect due to received signal\n",
+-				 __func__);
+-			return -ERESTARTSYS;
+-		}
+-
+-		/* are we still trying to reconnect? */
+-		spin_lock(&server->srv_lock);
+-		if (server->tcpStatus != CifsNeedReconnect) {
+-			spin_unlock(&server->srv_lock);
+-			return 0;
+-		}
+-		spin_unlock(&server->srv_lock);
+-	} while (retry);
+-
+-	cifs_dbg(FYI, "%s: gave up waiting on reconnect\n", __func__);
+-	return -EHOSTDOWN;
+-}
+-
+ static int
+ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	       struct TCP_Server_Info *server)
+ {
+ 	int rc = 0;
+-	struct nls_table *nls_codepage;
++	struct nls_table *nls_codepage = NULL;
+ 	struct cifs_ses *ses;
+ 
+ 	/*
+@@ -239,7 +179,28 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	    (!tcon->ses->server) || !server)
+ 		return -EIO;
+ 
+-	rc = wait_for_server_reconnect(server, smb2_command, tcon->retry);
++	spin_lock(&server->srv_lock);
++	if (server->tcpStatus == CifsNeedReconnect) {
++		/*
++		 * Return to caller for TREE_DISCONNECT and LOGOFF and CLOSE
++		 * here since they are implicitly done when session drops.
++		 */
++		switch (smb2_command) {
++		/*
++		 * BB Should we keep oplock break and add flush to exceptions?
++		 */
++		case SMB2_TREE_DISCONNECT:
++		case SMB2_CANCEL:
++		case SMB2_CLOSE:
++		case SMB2_OPLOCK_BREAK:
++			spin_unlock(&server->srv_lock);
++			return -EAGAIN;
++		}
++	}
++	spin_unlock(&server->srv_lock);
++
++again:
++	rc = cifs_wait_for_server_reconnect(server, tcon->retry);
+ 	if (rc)
+ 		return rc;
+ 
+@@ -255,8 +216,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 		 tcon->ses->chans_need_reconnect,
+ 		 tcon->need_reconnect);
+ 
+-	nls_codepage = load_nls_default();
+-
++	mutex_lock(&ses->session_mutex);
+ 	/*
+ 	 * Recheck after acquire mutex. If another thread is negotiating
+ 	 * and the server never sends an answer the socket will be closed
+@@ -265,28 +225,38 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 	spin_lock(&server->srv_lock);
+ 	if (server->tcpStatus == CifsNeedReconnect) {
+ 		spin_unlock(&server->srv_lock);
++		mutex_unlock(&ses->session_mutex);
++
++		if (tcon->retry)
++			goto again;
++
+ 		rc = -EHOSTDOWN;
+ 		goto out;
+ 	}
+ 	spin_unlock(&server->srv_lock);
+ 
++	nls_codepage = load_nls_default();
++
+ 	/*
+ 	 * need to prevent multiple threads trying to simultaneously
+ 	 * reconnect the same SMB session
+ 	 */
++	spin_lock(&ses->ses_lock);
+ 	spin_lock(&ses->chan_lock);
+-	if (!cifs_chan_needs_reconnect(ses, server)) {
++	if (!cifs_chan_needs_reconnect(ses, server) &&
++	    ses->ses_status == SES_GOOD) {
+ 		spin_unlock(&ses->chan_lock);
+-
++		spin_unlock(&ses->ses_lock);
+ 		/* this means that we only need to tree connect */
+ 		if (tcon->need_reconnect)
+ 			goto skip_sess_setup;
+ 
++		mutex_unlock(&ses->session_mutex);
+ 		goto out;
+ 	}
+ 	spin_unlock(&ses->chan_lock);
++	spin_unlock(&ses->ses_lock);
+ 
+-	mutex_lock(&ses->session_mutex);
+ 	rc = cifs_negotiate_protocol(0, ses, server);
+ 	if (!rc) {
+ 		rc = cifs_setup_session(0, ses, server, nls_codepage);
+@@ -302,10 +272,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
+ 		mutex_unlock(&ses->session_mutex);
+ 		goto out;
+ 	}
+-	mutex_unlock(&ses->session_mutex);
+ 
+ skip_sess_setup:
+-	mutex_lock(&ses->session_mutex);
+ 	if (!tcon->need_reconnect) {
+ 		mutex_unlock(&ses->session_mutex);
+ 		goto out;
+@@ -320,7 +288,7 @@ skip_sess_setup:
+ 	cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
+ 	if (rc) {
+ 		/* If sess reconnected but tcon didn't, something strange ... */
+-		pr_warn_once("reconnect tcon failed rc = %d\n", rc);
++		cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc);
+ 		goto out;
+ 	}
+ 
+@@ -1292,9 +1260,9 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
+ 	if (rc)
+ 		return rc;
+ 
+-	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+-	spin_unlock(&ses->chan_lock);
++	spin_lock(&ses->ses_lock);
++	is_binding = (ses->ses_status == SES_GOOD);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	if (is_binding) {
+ 		req->hdr.SessionId = cpu_to_le64(ses->Suid);
+@@ -1452,9 +1420,9 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
+ 		goto out_put_spnego_key;
+ 	}
+ 
+-	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+-	spin_unlock(&ses->chan_lock);
++	spin_lock(&ses->ses_lock);
++	is_binding = (ses->ses_status == SES_GOOD);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	/* keep session key if binding */
+ 	if (!is_binding) {
+@@ -1578,9 +1546,9 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
+ 
+ 	cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
+ 
+-	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+-	spin_unlock(&ses->chan_lock);
++	spin_lock(&ses->ses_lock);
++	is_binding = (ses->ses_status == SES_GOOD);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	/* keep existing ses id and flags if binding */
+ 	if (!is_binding) {
+@@ -1646,9 +1614,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
+ 
+ 	rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+ 
+-	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+-	spin_unlock(&ses->chan_lock);
++	spin_lock(&ses->ses_lock);
++	is_binding = (ses->ses_status == SES_GOOD);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	/* keep existing ses id and flags if binding */
+ 	if (!is_binding) {
+diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
+index d827b7547ffad..790acf65a0926 100644
+--- a/fs/cifs/smb2transport.c
++++ b/fs/cifs/smb2transport.c
+@@ -81,6 +81,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
+ 	struct cifs_ses *ses = NULL;
+ 	int i;
+ 	int rc = 0;
++	bool is_binding = false;
+ 
+ 	spin_lock(&cifs_tcp_ses_lock);
+ 
+@@ -97,9 +98,12 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
+ 	goto out;
+ 
+ found:
++	spin_lock(&ses->ses_lock);
+ 	spin_lock(&ses->chan_lock);
+-	if (cifs_chan_needs_reconnect(ses, server) &&
+-	    !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
++
++	is_binding = (cifs_chan_needs_reconnect(ses, server) &&
++		      ses->ses_status == SES_GOOD);
++	if (is_binding) {
+ 		/*
+ 		 * If we are in the process of binding a new channel
+ 		 * to an existing session, use the master connection
+@@ -107,6 +111,7 @@ found:
+ 		 */
+ 		memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE);
+ 		spin_unlock(&ses->chan_lock);
++		spin_unlock(&ses->ses_lock);
+ 		goto out;
+ 	}
+ 
+@@ -119,10 +124,12 @@ found:
+ 		if (chan->server == server) {
+ 			memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE);
+ 			spin_unlock(&ses->chan_lock);
++			spin_unlock(&ses->ses_lock);
+ 			goto out;
+ 		}
+ 	}
+ 	spin_unlock(&ses->chan_lock);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	cifs_dbg(VFS,
+ 		 "%s: Could not find channel signing key for session 0x%llx\n",
+@@ -392,11 +399,15 @@ generate_smb3signingkey(struct cifs_ses *ses,
+ 	bool is_binding = false;
+ 	int chan_index = 0;
+ 
++	spin_lock(&ses->ses_lock);
+ 	spin_lock(&ses->chan_lock);
+-	is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
++	is_binding = (cifs_chan_needs_reconnect(ses, server) &&
++		      ses->ses_status == SES_GOOD);
++
+ 	chan_index = cifs_ses_get_chan_index(ses, server);
+ 	/* TODO: introduce ref counting for channels when the can be freed */
+ 	spin_unlock(&ses->chan_lock);
++	spin_unlock(&ses->ses_lock);
+ 
+ 	/*
+ 	 * All channels use the same encryption/decryption keys but
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index 4214286e01450..4f4ef7aa2f4a0 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -1980,8 +1980,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
+ 	if (!data->rpc_done) {
+ 		if (data->rpc_status)
+ 			return ERR_PTR(data->rpc_status);
+-		/* cached opens have already been processed */
+-		goto update;
++		return nfs4_try_open_cached(data);
+ 	}
+ 
+ 	ret = nfs_refresh_inode(inode, &data->f_attr);
+@@ -1990,7 +1989,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
+ 
+ 	if (data->o_res.delegation_type != 0)
+ 		nfs4_opendata_check_deleg(data, state);
+-update:
++
+ 	if (!update_open_stateid(state, &data->o_res.stateid,
+ 				NULL, data->o_arg.fmode))
+ 		return ERR_PTR(-EAGAIN);
+diff --git a/fs/verity/enable.c b/fs/verity/enable.c
+index df6b499bf6a14..400c264bf8930 100644
+--- a/fs/verity/enable.c
++++ b/fs/verity/enable.c
+@@ -390,25 +390,27 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
+ 		goto out_drop_write;
+ 
+ 	err = enable_verity(filp, &arg);
+-	if (err)
+-		goto out_allow_write_access;
+ 
+ 	/*
+-	 * Some pages of the file may have been evicted from pagecache after
+-	 * being used in the Merkle tree construction, then read into pagecache
+-	 * again by another process reading from the file concurrently.  Since
+-	 * these pages didn't undergo verification against the file digest which
+-	 * fs-verity now claims to be enforcing, we have to wipe the pagecache
+-	 * to ensure that all future reads are verified.
++	 * We no longer drop the inode's pagecache after enabling verity.  This
++	 * used to be done to try to avoid a race condition where pages could be
++	 * evicted after being used in the Merkle tree construction, then
++	 * re-instantiated by a concurrent read.  Such pages are unverified, and
++	 * the backing storage could have filled them with different content, so
++	 * they shouldn't be used to fulfill reads once verity is enabled.
++	 *
++	 * But, dropping the pagecache has a big performance impact, and it
++	 * doesn't fully solve the race condition anyway.  So for those reasons,
++	 * and also because this race condition isn't very important relatively
++	 * speaking (especially for small-ish files, where the chance of a page
++	 * being used, evicted, *and* re-instantiated all while enabling verity
++	 * is quite small), we no longer drop the inode's pagecache.
+ 	 */
+-	filemap_write_and_wait(inode->i_mapping);
+-	invalidate_inode_pages2(inode->i_mapping);
+ 
+ 	/*
+ 	 * allow_write_access() is needed to pair with deny_write_access().
+ 	 * Regardless, the filesystem won't allow writing to verity files.
+ 	 */
+-out_allow_write_access:
+ 	allow_write_access(filp);
+ out_drop_write:
+ 	mnt_drop_write_file(filp);
+diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile
+index 9fe54f5319f22..645f7229de4a0 100644
+--- a/fs/zonefs/Makefile
++++ b/fs/zonefs/Makefile
+@@ -3,4 +3,4 @@ ccflags-y				+= -I$(src)
+ 
+ obj-$(CONFIG_ZONEFS_FS) += zonefs.o
+ 
+-zonefs-y	:= super.o sysfs.o
++zonefs-y	:= super.o file.o sysfs.o
+diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
+new file mode 100644
+index 0000000000000..63cd50840419c
+--- /dev/null
++++ b/fs/zonefs/file.c
+@@ -0,0 +1,902 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Simple file system for zoned block devices exposing zones as files.
++ *
++ * Copyright (C) 2022 Western Digital Corporation or its affiliates.
++ */
++#include <linux/module.h>
++#include <linux/pagemap.h>
++#include <linux/iomap.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/statfs.h>
++#include <linux/writeback.h>
++#include <linux/quotaops.h>
++#include <linux/seq_file.h>
++#include <linux/parser.h>
++#include <linux/uio.h>
++#include <linux/mman.h>
++#include <linux/sched/mm.h>
++#include <linux/task_io_accounting_ops.h>
++
++#include "zonefs.h"
++
++#include "trace.h"
++
++static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
++				   loff_t length, unsigned int flags,
++				   struct iomap *iomap, struct iomap *srcmap)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	struct super_block *sb = inode->i_sb;
++	loff_t isize;
++
++	/*
++	 * All blocks are always mapped below EOF. If reading past EOF,
++	 * act as if there is a hole up to the file maximum size.
++	 */
++	mutex_lock(&zi->i_truncate_mutex);
++	iomap->bdev = inode->i_sb->s_bdev;
++	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
++	isize = i_size_read(inode);
++	if (iomap->offset >= isize) {
++		iomap->type = IOMAP_HOLE;
++		iomap->addr = IOMAP_NULL_ADDR;
++		iomap->length = length;
++	} else {
++		iomap->type = IOMAP_MAPPED;
++		iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset;
++		iomap->length = isize - iomap->offset;
++	}
++	mutex_unlock(&zi->i_truncate_mutex);
++
++	trace_zonefs_iomap_begin(inode, iomap);
++
++	return 0;
++}
++
++static const struct iomap_ops zonefs_read_iomap_ops = {
++	.iomap_begin	= zonefs_read_iomap_begin,
++};
++
++static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
++				    loff_t length, unsigned int flags,
++				    struct iomap *iomap, struct iomap *srcmap)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	struct super_block *sb = inode->i_sb;
++	loff_t isize;
++
++	/* All write I/Os should always be within the file maximum size */
++	if (WARN_ON_ONCE(offset + length > z->z_capacity))
++		return -EIO;
++
++	/*
++	 * Sequential zones can only accept direct writes. This is already
++	 * checked when writes are issued, so warn if we see a page writeback
++	 * operation.
++	 */
++	if (WARN_ON_ONCE(zonefs_zone_is_seq(z) && !(flags & IOMAP_DIRECT)))
++		return -EIO;
++
++	/*
++	 * For conventional zones, all blocks are always mapped. For sequential
++	 * zones, all blocks after always mapped below the inode size (zone
++	 * write pointer) and unwriten beyond.
++	 */
++	mutex_lock(&zi->i_truncate_mutex);
++	iomap->bdev = inode->i_sb->s_bdev;
++	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
++	iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset;
++	isize = i_size_read(inode);
++	if (iomap->offset >= isize) {
++		iomap->type = IOMAP_UNWRITTEN;
++		iomap->length = z->z_capacity - iomap->offset;
++	} else {
++		iomap->type = IOMAP_MAPPED;
++		iomap->length = isize - iomap->offset;
++	}
++	mutex_unlock(&zi->i_truncate_mutex);
++
++	trace_zonefs_iomap_begin(inode, iomap);
++
++	return 0;
++}
++
++static const struct iomap_ops zonefs_write_iomap_ops = {
++	.iomap_begin	= zonefs_write_iomap_begin,
++};
++
++static int zonefs_read_folio(struct file *unused, struct folio *folio)
++{
++	return iomap_read_folio(folio, &zonefs_read_iomap_ops);
++}
++
++static void zonefs_readahead(struct readahead_control *rac)
++{
++	iomap_readahead(rac, &zonefs_read_iomap_ops);
++}
++
++/*
++ * Map blocks for page writeback. This is used only on conventional zone files,
++ * which implies that the page range can only be within the fixed inode size.
++ */
++static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
++				   struct inode *inode, loff_t offset)
++{
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++
++	if (WARN_ON_ONCE(zonefs_zone_is_seq(z)))
++		return -EIO;
++	if (WARN_ON_ONCE(offset >= i_size_read(inode)))
++		return -EIO;
++
++	/* If the mapping is already OK, nothing needs to be done */
++	if (offset >= wpc->iomap.offset &&
++	    offset < wpc->iomap.offset + wpc->iomap.length)
++		return 0;
++
++	return zonefs_write_iomap_begin(inode, offset,
++					z->z_capacity - offset,
++					IOMAP_WRITE, &wpc->iomap, NULL);
++}
++
++static const struct iomap_writeback_ops zonefs_writeback_ops = {
++	.map_blocks		= zonefs_write_map_blocks,
++};
++
++static int zonefs_writepages(struct address_space *mapping,
++			     struct writeback_control *wbc)
++{
++	struct iomap_writepage_ctx wpc = { };
++
++	return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops);
++}
++
++static int zonefs_swap_activate(struct swap_info_struct *sis,
++				struct file *swap_file, sector_t *span)
++{
++	struct inode *inode = file_inode(swap_file);
++
++	if (zonefs_inode_is_seq(inode)) {
++		zonefs_err(inode->i_sb,
++			   "swap file: not a conventional zone file\n");
++		return -EINVAL;
++	}
++
++	return iomap_swapfile_activate(sis, swap_file, span,
++				       &zonefs_read_iomap_ops);
++}
++
++const struct address_space_operations zonefs_file_aops = {
++	.read_folio		= zonefs_read_folio,
++	.readahead		= zonefs_readahead,
++	.writepages		= zonefs_writepages,
++	.dirty_folio		= filemap_dirty_folio,
++	.release_folio		= iomap_release_folio,
++	.invalidate_folio	= iomap_invalidate_folio,
++	.migrate_folio		= filemap_migrate_folio,
++	.is_partially_uptodate	= iomap_is_partially_uptodate,
++	.error_remove_page	= generic_error_remove_page,
++	.direct_IO		= noop_direct_IO,
++	.swap_activate		= zonefs_swap_activate,
++};
++
++int zonefs_file_truncate(struct inode *inode, loff_t isize)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	loff_t old_isize;
++	enum req_op op;
++	int ret = 0;
++
++	/*
++	 * Only sequential zone files can be truncated and truncation is allowed
++	 * only down to a 0 size, which is equivalent to a zone reset, and to
++	 * the maximum file size, which is equivalent to a zone finish.
++	 */
++	if (!zonefs_zone_is_seq(z))
++		return -EPERM;
++
++	if (!isize)
++		op = REQ_OP_ZONE_RESET;
++	else if (isize == z->z_capacity)
++		op = REQ_OP_ZONE_FINISH;
++	else
++		return -EPERM;
++
++	inode_dio_wait(inode);
++
++	/* Serialize against page faults */
++	filemap_invalidate_lock(inode->i_mapping);
++
++	/* Serialize against zonefs_iomap_begin() */
++	mutex_lock(&zi->i_truncate_mutex);
++
++	old_isize = i_size_read(inode);
++	if (isize == old_isize)
++		goto unlock;
++
++	ret = zonefs_inode_zone_mgmt(inode, op);
++	if (ret)
++		goto unlock;
++
++	/*
++	 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
++	 * take care of open zones.
++	 */
++	if (z->z_flags & ZONEFS_ZONE_OPEN) {
++		/*
++		 * Truncating a zone to EMPTY or FULL is the equivalent of
++		 * closing the zone. For a truncation to 0, we need to
++		 * re-open the zone to ensure new writes can be processed.
++		 * For a truncation to the maximum file size, the zone is
++		 * closed and writes cannot be accepted anymore, so clear
++		 * the open flag.
++		 */
++		if (!isize)
++			ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
++		else
++			z->z_flags &= ~ZONEFS_ZONE_OPEN;
++	}
++
++	zonefs_update_stats(inode, isize);
++	truncate_setsize(inode, isize);
++	z->z_wpoffset = isize;
++	zonefs_inode_account_active(inode);
++
++unlock:
++	mutex_unlock(&zi->i_truncate_mutex);
++	filemap_invalidate_unlock(inode->i_mapping);
++
++	return ret;
++}
++
++static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
++			     int datasync)
++{
++	struct inode *inode = file_inode(file);
++	int ret = 0;
++
++	if (unlikely(IS_IMMUTABLE(inode)))
++		return -EPERM;
++
++	/*
++	 * Since only direct writes are allowed in sequential files, page cache
++	 * flush is needed only for conventional zone files.
++	 */
++	if (zonefs_inode_is_cnv(inode))
++		ret = file_write_and_wait_range(file, start, end);
++	if (!ret)
++		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
++
++	if (ret)
++		zonefs_io_error(inode, true);
++
++	return ret;
++}
++
++static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
++{
++	struct inode *inode = file_inode(vmf->vma->vm_file);
++	vm_fault_t ret;
++
++	if (unlikely(IS_IMMUTABLE(inode)))
++		return VM_FAULT_SIGBUS;
++
++	/*
++	 * Sanity check: only conventional zone files can have shared
++	 * writeable mappings.
++	 */
++	if (zonefs_inode_is_seq(inode))
++		return VM_FAULT_NOPAGE;
++
++	sb_start_pagefault(inode->i_sb);
++	file_update_time(vmf->vma->vm_file);
++
++	/* Serialize against truncates */
++	filemap_invalidate_lock_shared(inode->i_mapping);
++	ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
++	filemap_invalidate_unlock_shared(inode->i_mapping);
++
++	sb_end_pagefault(inode->i_sb);
++	return ret;
++}
++
++static const struct vm_operations_struct zonefs_file_vm_ops = {
++	.fault		= filemap_fault,
++	.map_pages	= filemap_map_pages,
++	.page_mkwrite	= zonefs_filemap_page_mkwrite,
++};
++
++static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	/*
++	 * Conventional zones accept random writes, so their files can support
++	 * shared writable mappings. For sequential zone files, only read
++	 * mappings are possible since there are no guarantees for write
++	 * ordering between msync() and page cache writeback.
++	 */
++	if (zonefs_inode_is_seq(file_inode(file)) &&
++	    (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
++		return -EINVAL;
++
++	file_accessed(file);
++	vma->vm_ops = &zonefs_file_vm_ops;
++
++	return 0;
++}
++
++static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
++{
++	loff_t isize = i_size_read(file_inode(file));
++
++	/*
++	 * Seeks are limited to below the zone size for conventional zones
++	 * and below the zone write pointer for sequential zones. In both
++	 * cases, this limit is the inode size.
++	 */
++	return generic_file_llseek_size(file, offset, whence, isize, isize);
++}
++
++static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
++					int error, unsigned int flags)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++
++	if (error) {
++		zonefs_io_error(inode, true);
++		return error;
++	}
++
++	if (size && zonefs_inode_is_seq(inode)) {
++		/*
++		 * Note that we may be seeing completions out of order,
++		 * but that is not a problem since a write completed
++		 * successfully necessarily means that all preceding writes
++		 * were also successful. So we can safely increase the inode
++		 * size to the write end location.
++		 */
++		mutex_lock(&zi->i_truncate_mutex);
++		if (i_size_read(inode) < iocb->ki_pos + size) {
++			zonefs_update_stats(inode, iocb->ki_pos + size);
++			zonefs_i_size_write(inode, iocb->ki_pos + size);
++		}
++		mutex_unlock(&zi->i_truncate_mutex);
++	}
++
++	return 0;
++}
++
++static const struct iomap_dio_ops zonefs_write_dio_ops = {
++	.end_io			= zonefs_file_write_dio_end_io,
++};
++
++static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	struct block_device *bdev = inode->i_sb->s_bdev;
++	unsigned int max = bdev_max_zone_append_sectors(bdev);
++	pgoff_t start, end;
++	struct bio *bio;
++	ssize_t size;
++	int nr_pages;
++	ssize_t ret;
++
++	max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
++	iov_iter_truncate(from, max);
++
++	/*
++	 * If the inode block size (zone write granularity) is smaller than the
++	 * page size, we may be appending data belonging to the last page of the
++	 * inode straddling inode->i_size, with that page already cached due to
++	 * a buffered read or readahead. So make sure to invalidate that page.
++	 * This will always be a no-op for the case where the block size is
++	 * equal to the page size.
++	 */
++	start = iocb->ki_pos >> PAGE_SHIFT;
++	end = (iocb->ki_pos + iov_iter_count(from) - 1) >> PAGE_SHIFT;
++	if (invalidate_inode_pages2_range(inode->i_mapping, start, end))
++		return -EBUSY;
++
++	nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
++	if (!nr_pages)
++		return 0;
++
++	bio = bio_alloc(bdev, nr_pages,
++			REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
++	bio->bi_iter.bi_sector = z->z_sector;
++	bio->bi_ioprio = iocb->ki_ioprio;
++	if (iocb_is_dsync(iocb))
++		bio->bi_opf |= REQ_FUA;
++
++	ret = bio_iov_iter_get_pages(bio, from);
++	if (unlikely(ret))
++		goto out_release;
++
++	size = bio->bi_iter.bi_size;
++	task_io_account_write(size);
++
++	if (iocb->ki_flags & IOCB_HIPRI)
++		bio_set_polled(bio, iocb);
++
++	ret = submit_bio_wait(bio);
++
++	/*
++	 * If the file zone was written underneath the file system, the zone
++	 * write pointer may not be where we expect it to be, but the zone
++	 * append write can still succeed. So check manually that we wrote where
++	 * we intended to, that is, at zi->i_wpoffset.
++	 */
++	if (!ret) {
++		sector_t wpsector =
++			z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT);
++
++		if (bio->bi_iter.bi_sector != wpsector) {
++			zonefs_warn(inode->i_sb,
++				"Corrupted write pointer %llu for zone at %llu\n",
++				bio->bi_iter.bi_sector, z->z_sector);
++			ret = -EIO;
++		}
++	}
++
++	zonefs_file_write_dio_end_io(iocb, size, ret, 0);
++	trace_zonefs_file_dio_append(inode, size, ret);
++
++out_release:
++	bio_release_pages(bio, false);
++	bio_put(bio);
++
++	if (ret >= 0) {
++		iocb->ki_pos += size;
++		return size;
++	}
++
++	return ret;
++}
++
++/*
++ * Do not exceed the LFS limits nor the file zone size. If pos is under the
++ * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
++ */
++static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
++					loff_t count)
++{
++	struct inode *inode = file_inode(file);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	loff_t limit = rlimit(RLIMIT_FSIZE);
++	loff_t max_size = z->z_capacity;
++
++	if (limit != RLIM_INFINITY) {
++		if (pos >= limit) {
++			send_sig(SIGXFSZ, current, 0);
++			return -EFBIG;
++		}
++		count = min(count, limit - pos);
++	}
++
++	if (!(file->f_flags & O_LARGEFILE))
++		max_size = min_t(loff_t, MAX_NON_LFS, max_size);
++
++	if (unlikely(pos >= max_size))
++		return -EFBIG;
++
++	return min(count, max_size - pos);
++}
++
++static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
++{
++	struct file *file = iocb->ki_filp;
++	struct inode *inode = file_inode(file);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	loff_t count;
++
++	if (IS_SWAPFILE(inode))
++		return -ETXTBSY;
++
++	if (!iov_iter_count(from))
++		return 0;
++
++	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
++		return -EINVAL;
++
++	if (iocb->ki_flags & IOCB_APPEND) {
++		if (zonefs_zone_is_cnv(z))
++			return -EINVAL;
++		mutex_lock(&zi->i_truncate_mutex);
++		iocb->ki_pos = z->z_wpoffset;
++		mutex_unlock(&zi->i_truncate_mutex);
++	}
++
++	count = zonefs_write_check_limits(file, iocb->ki_pos,
++					  iov_iter_count(from));
++	if (count < 0)
++		return count;
++
++	iov_iter_truncate(from, count);
++	return iov_iter_count(from);
++}
++
++/*
++ * Handle direct writes. For sequential zone files, this is the only possible
++ * write path. For these files, check that the user is issuing writes
++ * sequentially from the end of the file. This code assumes that the block layer
++ * delivers write requests to the device in sequential order. This is always the
++ * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
++ * elevator feature is being used (e.g. mq-deadline). The block layer always
++ * automatically select such an elevator for zoned block devices during the
++ * device initialization.
++ */
++static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	struct super_block *sb = inode->i_sb;
++	bool sync = is_sync_kiocb(iocb);
++	bool append = false;
++	ssize_t ret, count;
++
++	/*
++	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
++	 * as this can cause write reordering (e.g. the first aio gets EAGAIN
++	 * on the inode lock but the second goes through but is now unaligned).
++	 */
++	if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
++		return -EOPNOTSUPP;
++
++	if (iocb->ki_flags & IOCB_NOWAIT) {
++		if (!inode_trylock(inode))
++			return -EAGAIN;
++	} else {
++		inode_lock(inode);
++	}
++
++	count = zonefs_write_checks(iocb, from);
++	if (count <= 0) {
++		ret = count;
++		goto inode_unlock;
++	}
++
++	if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
++		ret = -EINVAL;
++		goto inode_unlock;
++	}
++
++	/* Enforce sequential writes (append only) in sequential zones */
++	if (zonefs_zone_is_seq(z)) {
++		mutex_lock(&zi->i_truncate_mutex);
++		if (iocb->ki_pos != z->z_wpoffset) {
++			mutex_unlock(&zi->i_truncate_mutex);
++			ret = -EINVAL;
++			goto inode_unlock;
++		}
++		mutex_unlock(&zi->i_truncate_mutex);
++		append = sync;
++	}
++
++	if (append) {
++		ret = zonefs_file_dio_append(iocb, from);
++	} else {
++		/*
++		 * iomap_dio_rw() may return ENOTBLK if there was an issue with
++		 * page invalidation. Overwrite that error code with EBUSY to
++		 * be consistent with zonefs_file_dio_append() return value for
++		 * similar issues.
++		 */
++		ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
++				   &zonefs_write_dio_ops, 0, NULL, 0);
++		if (ret == -ENOTBLK)
++			ret = -EBUSY;
++	}
++
++	if (zonefs_zone_is_seq(z) &&
++	    (ret > 0 || ret == -EIOCBQUEUED)) {
++		if (ret > 0)
++			count = ret;
++
++		/*
++		 * Update the zone write pointer offset assuming the write
++		 * operation succeeded. If it did not, the error recovery path
++		 * will correct it. Also do active seq file accounting.
++		 */
++		mutex_lock(&zi->i_truncate_mutex);
++		z->z_wpoffset += count;
++		zonefs_inode_account_active(inode);
++		mutex_unlock(&zi->i_truncate_mutex);
++	}
++
++inode_unlock:
++	inode_unlock(inode);
++
++	return ret;
++}
++
++static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
++					  struct iov_iter *from)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	ssize_t ret;
++
++	/*
++	 * Direct IO writes are mandatory for sequential zone files so that the
++	 * write IO issuing order is preserved.
++	 */
++	if (zonefs_inode_is_seq(inode))
++		return -EIO;
++
++	if (iocb->ki_flags & IOCB_NOWAIT) {
++		if (!inode_trylock(inode))
++			return -EAGAIN;
++	} else {
++		inode_lock(inode);
++	}
++
++	ret = zonefs_write_checks(iocb, from);
++	if (ret <= 0)
++		goto inode_unlock;
++
++	ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
++	if (ret > 0)
++		iocb->ki_pos += ret;
++	else if (ret == -EIO)
++		zonefs_io_error(inode, true);
++
++inode_unlock:
++	inode_unlock(inode);
++	if (ret > 0)
++		ret = generic_write_sync(iocb, ret);
++
++	return ret;
++}
++
++static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++
++	if (unlikely(IS_IMMUTABLE(inode)))
++		return -EPERM;
++
++	if (sb_rdonly(inode->i_sb))
++		return -EROFS;
++
++	/* Write operations beyond the zone capacity are not allowed */
++	if (iocb->ki_pos >= z->z_capacity)
++		return -EFBIG;
++
++	if (iocb->ki_flags & IOCB_DIRECT) {
++		ssize_t ret = zonefs_file_dio_write(iocb, from);
++
++		if (ret != -ENOTBLK)
++			return ret;
++	}
++
++	return zonefs_file_buffered_write(iocb, from);
++}
++
++static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size,
++				       int error, unsigned int flags)
++{
++	if (error) {
++		zonefs_io_error(file_inode(iocb->ki_filp), false);
++		return error;
++	}
++
++	return 0;
++}
++
++static const struct iomap_dio_ops zonefs_read_dio_ops = {
++	.end_io			= zonefs_file_read_dio_end_io,
++};
++
++static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
++{
++	struct inode *inode = file_inode(iocb->ki_filp);
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	struct super_block *sb = inode->i_sb;
++	loff_t isize;
++	ssize_t ret;
++
++	/* Offline zones cannot be read */
++	if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
++		return -EPERM;
++
++	if (iocb->ki_pos >= z->z_capacity)
++		return 0;
++
++	if (iocb->ki_flags & IOCB_NOWAIT) {
++		if (!inode_trylock_shared(inode))
++			return -EAGAIN;
++	} else {
++		inode_lock_shared(inode);
++	}
++
++	/* Limit read operations to written data */
++	mutex_lock(&zi->i_truncate_mutex);
++	isize = i_size_read(inode);
++	if (iocb->ki_pos >= isize) {
++		mutex_unlock(&zi->i_truncate_mutex);
++		ret = 0;
++		goto inode_unlock;
++	}
++	iov_iter_truncate(to, isize - iocb->ki_pos);
++	mutex_unlock(&zi->i_truncate_mutex);
++
++	if (iocb->ki_flags & IOCB_DIRECT) {
++		size_t count = iov_iter_count(to);
++
++		if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
++			ret = -EINVAL;
++			goto inode_unlock;
++		}
++		file_accessed(iocb->ki_filp);
++		ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
++				   &zonefs_read_dio_ops, 0, NULL, 0);
++	} else {
++		ret = generic_file_read_iter(iocb, to);
++		if (ret == -EIO)
++			zonefs_io_error(inode, false);
++	}
++
++inode_unlock:
++	inode_unlock_shared(inode);
++
++	return ret;
++}
++
++/*
++ * Write open accounting is done only for sequential files.
++ */
++static inline bool zonefs_seq_file_need_wro(struct inode *inode,
++					    struct file *file)
++{
++	if (zonefs_inode_is_cnv(inode))
++		return false;
++
++	if (!(file->f_mode & FMODE_WRITE))
++		return false;
++
++	return true;
++}
++
++static int zonefs_seq_file_write_open(struct inode *inode)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	int ret = 0;
++
++	mutex_lock(&zi->i_truncate_mutex);
++
++	if (!zi->i_wr_refcnt) {
++		struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
++		unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
++
++		if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
++
++			if (sbi->s_max_wro_seq_files
++			    && wro > sbi->s_max_wro_seq_files) {
++				atomic_dec(&sbi->s_wro_seq_files);
++				ret = -EBUSY;
++				goto unlock;
++			}
++
++			if (i_size_read(inode) < z->z_capacity) {
++				ret = zonefs_inode_zone_mgmt(inode,
++							     REQ_OP_ZONE_OPEN);
++				if (ret) {
++					atomic_dec(&sbi->s_wro_seq_files);
++					goto unlock;
++				}
++				z->z_flags |= ZONEFS_ZONE_OPEN;
++				zonefs_inode_account_active(inode);
++			}
++		}
++	}
++
++	zi->i_wr_refcnt++;
++
++unlock:
++	mutex_unlock(&zi->i_truncate_mutex);
++
++	return ret;
++}
++
++static int zonefs_file_open(struct inode *inode, struct file *file)
++{
++	int ret;
++
++	ret = generic_file_open(inode, file);
++	if (ret)
++		return ret;
++
++	if (zonefs_seq_file_need_wro(inode, file))
++		return zonefs_seq_file_write_open(inode);
++
++	return 0;
++}
++
++static void zonefs_seq_file_write_close(struct inode *inode)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++	struct super_block *sb = inode->i_sb;
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
++	int ret = 0;
++
++	mutex_lock(&zi->i_truncate_mutex);
++
++	zi->i_wr_refcnt--;
++	if (zi->i_wr_refcnt)
++		goto unlock;
++
++	/*
++	 * The file zone may not be open anymore (e.g. the file was truncated to
++	 * its maximum size or it was fully written). For this case, we only
++	 * need to decrement the write open count.
++	 */
++	if (z->z_flags & ZONEFS_ZONE_OPEN) {
++		ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
++		if (ret) {
++			__zonefs_io_error(inode, false);
++			/*
++			 * Leaving zones explicitly open may lead to a state
++			 * where most zones cannot be written (zone resources
++			 * exhausted). So take preventive action by remounting
++			 * read-only.
++			 */
++			if (z->z_flags & ZONEFS_ZONE_OPEN &&
++			    !(sb->s_flags & SB_RDONLY)) {
++				zonefs_warn(sb,
++					"closing zone at %llu failed %d\n",
++					z->z_sector, ret);
++				zonefs_warn(sb,
++					"remounting filesystem read-only\n");
++				sb->s_flags |= SB_RDONLY;
++			}
++			goto unlock;
++		}
++
++		z->z_flags &= ~ZONEFS_ZONE_OPEN;
++		zonefs_inode_account_active(inode);
++	}
++
++	atomic_dec(&sbi->s_wro_seq_files);
++
++unlock:
++	mutex_unlock(&zi->i_truncate_mutex);
++}
++
++static int zonefs_file_release(struct inode *inode, struct file *file)
++{
++	/*
++	 * If we explicitly open a zone we must close it again as well, but the
++	 * zone management operation can fail (either due to an IO error or as
++	 * the zone has gone offline or read-only). Make sure we don't fail the
++	 * close(2) for user-space.
++	 */
++	if (zonefs_seq_file_need_wro(inode, file))
++		zonefs_seq_file_write_close(inode);
++
++	return 0;
++}
++
++const struct file_operations zonefs_file_operations = {
++	.open		= zonefs_file_open,
++	.release	= zonefs_file_release,
++	.fsync		= zonefs_file_fsync,
++	.mmap		= zonefs_file_mmap,
++	.llseek		= zonefs_file_llseek,
++	.read_iter	= zonefs_file_read_iter,
++	.write_iter	= zonefs_file_write_iter,
++	.splice_read	= generic_file_splice_read,
++	.splice_write	= iter_file_splice_write,
++	.iopoll		= iocb_bio_iopoll,
++};
+diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
+index a9c5c3f720adf..270ded209dde5 100644
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -28,33 +28,47 @@
+ #include "trace.h"
+ 
+ /*
+- * Manage the active zone count. Called with zi->i_truncate_mutex held.
++ * Get the name of a zone group directory.
+  */
+-static void zonefs_account_active(struct inode *inode)
++static const char *zonefs_zgroup_name(enum zonefs_ztype ztype)
+ {
+-	struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	switch (ztype) {
++	case ZONEFS_ZTYPE_CNV:
++		return "cnv";
++	case ZONEFS_ZTYPE_SEQ:
++		return "seq";
++	default:
++		WARN_ON_ONCE(1);
++		return "???";
++	}
++}
+ 
+-	lockdep_assert_held(&zi->i_truncate_mutex);
++/*
++ * Manage the active zone count.
++ */
++static void zonefs_account_active(struct super_block *sb,
++				  struct zonefs_zone *z)
++{
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ 
+-	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
++	if (zonefs_zone_is_cnv(z))
+ 		return;
+ 
+ 	/*
+ 	 * For zones that transitioned to the offline or readonly condition,
+ 	 * we only need to clear the active state.
+ 	 */
+-	if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
++	if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
+ 		goto out;
+ 
+ 	/*
+ 	 * If the zone is active, that is, if it is explicitly open or
+ 	 * partially written, check if it was already accounted as active.
+ 	 */
+-	if ((zi->i_flags & ZONEFS_ZONE_OPEN) ||
+-	    (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) {
+-		if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) {
+-			zi->i_flags |= ZONEFS_ZONE_ACTIVE;
++	if ((z->z_flags & ZONEFS_ZONE_OPEN) ||
++	    (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) {
++		if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) {
++			z->z_flags |= ZONEFS_ZONE_ACTIVE;
+ 			atomic_inc(&sbi->s_active_seq_files);
+ 		}
+ 		return;
+@@ -62,18 +76,29 @@ static void zonefs_account_active(struct inode *inode)
+ 
+ out:
+ 	/* The zone is not active. If it was, update the active count */
+-	if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
+-		zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
++	if (z->z_flags & ZONEFS_ZONE_ACTIVE) {
++		z->z_flags &= ~ZONEFS_ZONE_ACTIVE;
+ 		atomic_dec(&sbi->s_active_seq_files);
+ 	}
+ }
+ 
+-static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
++/*
++ * Manage the active zone count. Called with zi->i_truncate_mutex held.
++ */
++void zonefs_inode_account_active(struct inode *inode)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	int ret;
++	lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
+ 
+-	lockdep_assert_held(&zi->i_truncate_mutex);
++	return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode));
++}
++
++/*
++ * Execute a zone management operation.
++ */
++static int zonefs_zone_mgmt(struct super_block *sb,
++			    struct zonefs_zone *z, enum req_op op)
++{
++	int ret;
+ 
+ 	/*
+ 	 * With ZNS drives, closing an explicitly open zone that has not been
+@@ -83,201 +108,49 @@ static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
+ 	 * are exceeded, make sure that the zone does not remain active by
+ 	 * resetting it.
+ 	 */
+-	if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset)
++	if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset)
+ 		op = REQ_OP_ZONE_RESET;
+ 
+-	trace_zonefs_zone_mgmt(inode, op);
+-	ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
+-			       zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
++	trace_zonefs_zone_mgmt(sb, z, op);
++	ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
++			       z->z_size >> SECTOR_SHIFT, GFP_NOFS);
+ 	if (ret) {
+-		zonefs_err(inode->i_sb,
++		zonefs_err(sb,
+ 			   "Zone management operation %s at %llu failed %d\n",
+-			   blk_op_str(op), zi->i_zsector, ret);
++			   blk_op_str(op), z->z_sector, ret);
+ 		return ret;
+ 	}
+ 
+ 	return 0;
+ }
+ 
+-static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
++int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
+ 
+-	i_size_write(inode, isize);
+-	/*
+-	 * A full zone is no longer open/active and does not need
+-	 * explicit closing.
+-	 */
+-	if (isize >= zi->i_max_size) {
+-		struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+-
+-		if (zi->i_flags & ZONEFS_ZONE_ACTIVE)
+-			atomic_dec(&sbi->s_active_seq_files);
+-		zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
+-	}
++	return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op);
+ }
+ 
+-static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
+-				   loff_t length, unsigned int flags,
+-				   struct iomap *iomap, struct iomap *srcmap)
++void zonefs_i_size_write(struct inode *inode, loff_t isize)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	loff_t isize;
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 
+-	/*
+-	 * All blocks are always mapped below EOF. If reading past EOF,
+-	 * act as if there is a hole up to the file maximum size.
+-	 */
+-	mutex_lock(&zi->i_truncate_mutex);
+-	iomap->bdev = inode->i_sb->s_bdev;
+-	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+-	isize = i_size_read(inode);
+-	if (iomap->offset >= isize) {
+-		iomap->type = IOMAP_HOLE;
+-		iomap->addr = IOMAP_NULL_ADDR;
+-		iomap->length = length;
+-	} else {
+-		iomap->type = IOMAP_MAPPED;
+-		iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+-		iomap->length = isize - iomap->offset;
+-	}
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	trace_zonefs_iomap_begin(inode, iomap);
+-
+-	return 0;
+-}
+-
+-static const struct iomap_ops zonefs_read_iomap_ops = {
+-	.iomap_begin	= zonefs_read_iomap_begin,
+-};
+-
+-static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
+-				    loff_t length, unsigned int flags,
+-				    struct iomap *iomap, struct iomap *srcmap)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	loff_t isize;
+-
+-	/* All write I/Os should always be within the file maximum size */
+-	if (WARN_ON_ONCE(offset + length > zi->i_max_size))
+-		return -EIO;
+-
+-	/*
+-	 * Sequential zones can only accept direct writes. This is already
+-	 * checked when writes are issued, so warn if we see a page writeback
+-	 * operation.
+-	 */
+-	if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+-			 !(flags & IOMAP_DIRECT)))
+-		return -EIO;
++	i_size_write(inode, isize);
+ 
+ 	/*
+-	 * For conventional zones, all blocks are always mapped. For sequential
+-	 * zones, all blocks after always mapped below the inode size (zone
+-	 * write pointer) and unwriten beyond.
++	 * A full zone is no longer open/active and does not need
++	 * explicit closing.
+ 	 */
+-	mutex_lock(&zi->i_truncate_mutex);
+-	iomap->bdev = inode->i_sb->s_bdev;
+-	iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+-	iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+-	isize = i_size_read(inode);
+-	if (iomap->offset >= isize) {
+-		iomap->type = IOMAP_UNWRITTEN;
+-		iomap->length = zi->i_max_size - iomap->offset;
+-	} else {
+-		iomap->type = IOMAP_MAPPED;
+-		iomap->length = isize - iomap->offset;
+-	}
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	trace_zonefs_iomap_begin(inode, iomap);
+-
+-	return 0;
+-}
+-
+-static const struct iomap_ops zonefs_write_iomap_ops = {
+-	.iomap_begin	= zonefs_write_iomap_begin,
+-};
+-
+-static int zonefs_read_folio(struct file *unused, struct folio *folio)
+-{
+-	return iomap_read_folio(folio, &zonefs_read_iomap_ops);
+-}
+-
+-static void zonefs_readahead(struct readahead_control *rac)
+-{
+-	iomap_readahead(rac, &zonefs_read_iomap_ops);
+-}
+-
+-/*
+- * Map blocks for page writeback. This is used only on conventional zone files,
+- * which implies that the page range can only be within the fixed inode size.
+- */
+-static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
+-				   struct inode *inode, loff_t offset)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
+-		return -EIO;
+-	if (WARN_ON_ONCE(offset >= i_size_read(inode)))
+-		return -EIO;
+-
+-	/* If the mapping is already OK, nothing needs to be done */
+-	if (offset >= wpc->iomap.offset &&
+-	    offset < wpc->iomap.offset + wpc->iomap.length)
+-		return 0;
+-
+-	return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
+-					IOMAP_WRITE, &wpc->iomap, NULL);
+-}
+-
+-static const struct iomap_writeback_ops zonefs_writeback_ops = {
+-	.map_blocks		= zonefs_write_map_blocks,
+-};
+-
+-static int zonefs_writepages(struct address_space *mapping,
+-			     struct writeback_control *wbc)
+-{
+-	struct iomap_writepage_ctx wpc = { };
+-
+-	return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops);
+-}
+-
+-static int zonefs_swap_activate(struct swap_info_struct *sis,
+-				struct file *swap_file, sector_t *span)
+-{
+-	struct inode *inode = file_inode(swap_file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	if (isize >= z->z_capacity) {
++		struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+ 
+-	if (zi->i_ztype != ZONEFS_ZTYPE_CNV) {
+-		zonefs_err(inode->i_sb,
+-			   "swap file: not a conventional zone file\n");
+-		return -EINVAL;
++		if (z->z_flags & ZONEFS_ZONE_ACTIVE)
++			atomic_dec(&sbi->s_active_seq_files);
++		z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
+ 	}
+-
+-	return iomap_swapfile_activate(sis, swap_file, span,
+-				       &zonefs_read_iomap_ops);
+ }
+ 
+-static const struct address_space_operations zonefs_file_aops = {
+-	.read_folio		= zonefs_read_folio,
+-	.readahead		= zonefs_readahead,
+-	.writepages		= zonefs_writepages,
+-	.dirty_folio		= filemap_dirty_folio,
+-	.release_folio		= iomap_release_folio,
+-	.invalidate_folio	= iomap_invalidate_folio,
+-	.migrate_folio		= filemap_migrate_folio,
+-	.is_partially_uptodate	= iomap_is_partially_uptodate,
+-	.error_remove_page	= generic_error_remove_page,
+-	.direct_IO		= noop_direct_IO,
+-	.swap_activate		= zonefs_swap_activate,
+-};
+-
+-static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
++void zonefs_update_stats(struct inode *inode, loff_t new_isize)
+ {
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+@@ -310,63 +183,68 @@ static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
+ }
+ 
+ /*
+- * Check a zone condition and adjust its file inode access permissions for
+- * offline and readonly zones. Return the inode size corresponding to the
+- * amount of readable data in the zone.
++ * Check a zone condition. Return the amount of written (and still readable)
++ * data in the zone.
+  */
+-static loff_t zonefs_check_zone_condition(struct inode *inode,
+-					  struct blk_zone *zone, bool warn,
+-					  bool mount)
++static loff_t zonefs_check_zone_condition(struct super_block *sb,
++					  struct zonefs_zone *z,
++					  struct blk_zone *zone)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+ 	switch (zone->cond) {
+ 	case BLK_ZONE_COND_OFFLINE:
+-		/*
+-		 * Dead zone: make the inode immutable, disable all accesses
+-		 * and set the file size to 0 (zone wp set to zone start).
+-		 */
+-		if (warn)
+-			zonefs_warn(inode->i_sb, "inode %lu: offline zone\n",
+-				    inode->i_ino);
+-		inode->i_flags |= S_IMMUTABLE;
+-		inode->i_mode &= ~0777;
+-		zone->wp = zone->start;
+-		zi->i_flags |= ZONEFS_ZONE_OFFLINE;
++		zonefs_warn(sb, "Zone %llu: offline zone\n",
++			    z->z_sector);
++		z->z_flags |= ZONEFS_ZONE_OFFLINE;
+ 		return 0;
+ 	case BLK_ZONE_COND_READONLY:
+ 		/*
+-		 * The write pointer of read-only zones is invalid. If such a
+-		 * zone is found during mount, the file size cannot be retrieved
+-		 * so we treat the zone as offline (mount == true case).
+-		 * Otherwise, keep the file size as it was when last updated
+-		 * so that the user can recover data. In both cases, writes are
+-		 * always disabled for the zone.
++		 * The write pointer of read-only zones is invalid, so we cannot
++		 * determine the zone wpoffset (inode size). We thus keep the
++		 * zone wpoffset as is, which leads to an empty file
++		 * (wpoffset == 0) on mount. For a runtime error, this keeps
++		 * the inode size as it was when last updated so that the user
++		 * can recover data.
+ 		 */
+-		if (warn)
+-			zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
+-				    inode->i_ino);
+-		inode->i_flags |= S_IMMUTABLE;
+-		if (mount) {
+-			zone->cond = BLK_ZONE_COND_OFFLINE;
+-			inode->i_mode &= ~0777;
+-			zone->wp = zone->start;
+-			zi->i_flags |= ZONEFS_ZONE_OFFLINE;
+-			return 0;
+-		}
+-		zi->i_flags |= ZONEFS_ZONE_READONLY;
+-		inode->i_mode &= ~0222;
+-		return i_size_read(inode);
++		zonefs_warn(sb, "Zone %llu: read-only zone\n",
++			    z->z_sector);
++		z->z_flags |= ZONEFS_ZONE_READONLY;
++		if (zonefs_zone_is_cnv(z))
++			return z->z_capacity;
++		return z->z_wpoffset;
+ 	case BLK_ZONE_COND_FULL:
+ 		/* The write pointer of full zones is invalid. */
+-		return zi->i_max_size;
++		return z->z_capacity;
+ 	default:
+-		if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
+-			return zi->i_max_size;
++		if (zonefs_zone_is_cnv(z))
++			return z->z_capacity;
+ 		return (zone->wp - zone->start) << SECTOR_SHIFT;
+ 	}
+ }
+ 
++/*
++ * Check a zone condition and adjust its inode access permissions for
++ * offline and readonly zones.
++ */
++static void zonefs_inode_update_mode(struct inode *inode)
++{
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
++
++	if (z->z_flags & ZONEFS_ZONE_OFFLINE) {
++		/* Offline zones cannot be read nor written */
++		inode->i_flags |= S_IMMUTABLE;
++		inode->i_mode &= ~0777;
++	} else if (z->z_flags & ZONEFS_ZONE_READONLY) {
++		/* Readonly zones cannot be written */
++		inode->i_flags |= S_IMMUTABLE;
++		if (z->z_flags & ZONEFS_ZONE_INIT_MODE)
++			inode->i_mode &= ~0777;
++		else
++			inode->i_mode &= ~0222;
++	}
++
++	z->z_flags &= ~ZONEFS_ZONE_INIT_MODE;
++}
++
+ struct zonefs_ioerr_data {
+ 	struct inode	*inode;
+ 	bool		write;
+@@ -377,7 +255,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ {
+ 	struct zonefs_ioerr_data *err = data;
+ 	struct inode *inode = err->inode;
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ 	loff_t isize, data_size;
+@@ -388,10 +266,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * as there is no inconsistency between the inode size and the amount of
+ 	 * data writen in the zone (data_size).
+ 	 */
+-	data_size = zonefs_check_zone_condition(inode, zone, true, false);
++	data_size = zonefs_check_zone_condition(sb, z, zone);
+ 	isize = i_size_read(inode);
+-	if (zone->cond != BLK_ZONE_COND_OFFLINE &&
+-	    zone->cond != BLK_ZONE_COND_READONLY &&
++	if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
+ 	    !err->write && isize == data_size)
+ 		return 0;
+ 
+@@ -414,8 +291,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * In all cases, warn about inode size inconsistency and handle the
+ 	 * IO error according to the zone condition and to the mount options.
+ 	 */
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size)
+-		zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n",
++	if (zonefs_zone_is_seq(z) && isize != data_size)
++		zonefs_warn(sb,
++			    "inode %lu: invalid size %lld (should be %lld)\n",
+ 			    inode->i_ino, isize, data_size);
+ 
+ 	/*
+@@ -424,24 +302,22 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * zone condition to read-only and offline respectively, as if the
+ 	 * condition was signaled by the hardware.
+ 	 */
+-	if (zone->cond == BLK_ZONE_COND_OFFLINE ||
+-	    sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) {
++	if ((z->z_flags & ZONEFS_ZONE_OFFLINE) ||
++	    (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
+ 		zonefs_warn(sb, "inode %lu: read/write access disabled\n",
+ 			    inode->i_ino);
+-		if (zone->cond != BLK_ZONE_COND_OFFLINE) {
+-			zone->cond = BLK_ZONE_COND_OFFLINE;
+-			data_size = zonefs_check_zone_condition(inode, zone,
+-								false, false);
+-		}
+-	} else if (zone->cond == BLK_ZONE_COND_READONLY ||
+-		   sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) {
++		if (!(z->z_flags & ZONEFS_ZONE_OFFLINE))
++			z->z_flags |= ZONEFS_ZONE_OFFLINE;
++		zonefs_inode_update_mode(inode);
++		data_size = 0;
++	} else if ((z->z_flags & ZONEFS_ZONE_READONLY) ||
++		   (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
+ 		zonefs_warn(sb, "inode %lu: write access disabled\n",
+ 			    inode->i_ino);
+-		if (zone->cond != BLK_ZONE_COND_READONLY) {
+-			zone->cond = BLK_ZONE_COND_READONLY;
+-			data_size = zonefs_check_zone_condition(inode, zone,
+-								false, false);
+-		}
++		if (!(z->z_flags & ZONEFS_ZONE_READONLY))
++			z->z_flags |= ZONEFS_ZONE_READONLY;
++		zonefs_inode_update_mode(inode);
++		data_size = isize;
+ 	} else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
+ 		   data_size > isize) {
+ 		/* Do not expose garbage data */
+@@ -455,9 +331,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 * close of the zone when the inode file is closed.
+ 	 */
+ 	if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
+-	    (zone->cond == BLK_ZONE_COND_OFFLINE ||
+-	     zone->cond == BLK_ZONE_COND_READONLY))
+-		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
++	    (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
++		z->z_flags &= ~ZONEFS_ZONE_OPEN;
+ 
+ 	/*
+ 	 * If error=remount-ro was specified, any error result in remounting
+@@ -474,8 +349,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ 	 */
+ 	zonefs_update_stats(inode, data_size);
+ 	zonefs_i_size_write(inode, data_size);
+-	zi->i_wpoffset = data_size;
+-	zonefs_account_active(inode);
++	z->z_wpoffset = data_size;
++	zonefs_inode_account_active(inode);
+ 
+ 	return 0;
+ }
+@@ -487,9 +362,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+  * eventually correct the file size and zonefs inode write pointer offset
+  * (which can be out of sync with the drive due to partial write failures).
+  */
+-static void __zonefs_io_error(struct inode *inode, bool write)
++void __zonefs_io_error(struct inode *inode, bool write)
+ {
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++	struct zonefs_zone *z = zonefs_inode_zone(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ 	unsigned int noio_flag;
+@@ -505,8 +380,8 @@ static void __zonefs_io_error(struct inode *inode, bool write)
+ 	 * files with aggregated conventional zones, for which the inode zone
+ 	 * size is always larger than the device zone size.
+ 	 */
+-	if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev))
+-		nr_zones = zi->i_zone_size >>
++	if (z->z_size > bdev_zone_sectors(sb->s_bdev))
++		nr_zones = z->z_size >>
+ 			(sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ 
+ 	/*
+@@ -518,7 +393,7 @@ static void __zonefs_io_error(struct inode *inode, bool write)
+ 	 * the GFP_NOIO context avoids both problems.
+ 	 */
+ 	noio_flag = memalloc_noio_save();
+-	ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones,
++	ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
+ 				  zonefs_io_error_cb, &err);
+ 	if (ret != nr_zones)
+ 		zonefs_err(sb, "Get inode %lu zone information failed %d\n",
+@@ -526,749 +401,6 @@ static void __zonefs_io_error(struct inode *inode, bool write)
+ 	memalloc_noio_restore(noio_flag);
+ }
+ 
+-static void zonefs_io_error(struct inode *inode, bool write)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	mutex_lock(&zi->i_truncate_mutex);
+-	__zonefs_io_error(inode, write);
+-	mutex_unlock(&zi->i_truncate_mutex);
+-}
+-
+-static int zonefs_file_truncate(struct inode *inode, loff_t isize)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	loff_t old_isize;
+-	enum req_op op;
+-	int ret = 0;
+-
+-	/*
+-	 * Only sequential zone files can be truncated and truncation is allowed
+-	 * only down to a 0 size, which is equivalent to a zone reset, and to
+-	 * the maximum file size, which is equivalent to a zone finish.
+-	 */
+-	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+-		return -EPERM;
+-
+-	if (!isize)
+-		op = REQ_OP_ZONE_RESET;
+-	else if (isize == zi->i_max_size)
+-		op = REQ_OP_ZONE_FINISH;
+-	else
+-		return -EPERM;
+-
+-	inode_dio_wait(inode);
+-
+-	/* Serialize against page faults */
+-	filemap_invalidate_lock(inode->i_mapping);
+-
+-	/* Serialize against zonefs_iomap_begin() */
+-	mutex_lock(&zi->i_truncate_mutex);
+-
+-	old_isize = i_size_read(inode);
+-	if (isize == old_isize)
+-		goto unlock;
+-
+-	ret = zonefs_zone_mgmt(inode, op);
+-	if (ret)
+-		goto unlock;
+-
+-	/*
+-	 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
+-	 * take care of open zones.
+-	 */
+-	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
+-		/*
+-		 * Truncating a zone to EMPTY or FULL is the equivalent of
+-		 * closing the zone. For a truncation to 0, we need to
+-		 * re-open the zone to ensure new writes can be processed.
+-		 * For a truncation to the maximum file size, the zone is
+-		 * closed and writes cannot be accepted anymore, so clear
+-		 * the open flag.
+-		 */
+-		if (!isize)
+-			ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+-		else
+-			zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+-	}
+-
+-	zonefs_update_stats(inode, isize);
+-	truncate_setsize(inode, isize);
+-	zi->i_wpoffset = isize;
+-	zonefs_account_active(inode);
+-
+-unlock:
+-	mutex_unlock(&zi->i_truncate_mutex);
+-	filemap_invalidate_unlock(inode->i_mapping);
+-
+-	return ret;
+-}
+-
+-static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
+-				struct dentry *dentry, struct iattr *iattr)
+-{
+-	struct inode *inode = d_inode(dentry);
+-	int ret;
+-
+-	if (unlikely(IS_IMMUTABLE(inode)))
+-		return -EPERM;
+-
+-	ret = setattr_prepare(&init_user_ns, dentry, iattr);
+-	if (ret)
+-		return ret;
+-
+-	/*
+-	 * Since files and directories cannot be created nor deleted, do not
+-	 * allow setting any write attributes on the sub-directories grouping
+-	 * files by zone type.
+-	 */
+-	if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
+-	    (iattr->ia_mode & 0222))
+-		return -EPERM;
+-
+-	if (((iattr->ia_valid & ATTR_UID) &&
+-	     !uid_eq(iattr->ia_uid, inode->i_uid)) ||
+-	    ((iattr->ia_valid & ATTR_GID) &&
+-	     !gid_eq(iattr->ia_gid, inode->i_gid))) {
+-		ret = dquot_transfer(mnt_userns, inode, iattr);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	if (iattr->ia_valid & ATTR_SIZE) {
+-		ret = zonefs_file_truncate(inode, iattr->ia_size);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	setattr_copy(&init_user_ns, inode, iattr);
+-
+-	return 0;
+-}
+-
+-static const struct inode_operations zonefs_file_inode_operations = {
+-	.setattr	= zonefs_inode_setattr,
+-};
+-
+-static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
+-			     int datasync)
+-{
+-	struct inode *inode = file_inode(file);
+-	int ret = 0;
+-
+-	if (unlikely(IS_IMMUTABLE(inode)))
+-		return -EPERM;
+-
+-	/*
+-	 * Since only direct writes are allowed in sequential files, page cache
+-	 * flush is needed only for conventional zone files.
+-	 */
+-	if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
+-		ret = file_write_and_wait_range(file, start, end);
+-	if (!ret)
+-		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
+-
+-	if (ret)
+-		zonefs_io_error(inode, true);
+-
+-	return ret;
+-}
+-
+-static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
+-{
+-	struct inode *inode = file_inode(vmf->vma->vm_file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	vm_fault_t ret;
+-
+-	if (unlikely(IS_IMMUTABLE(inode)))
+-		return VM_FAULT_SIGBUS;
+-
+-	/*
+-	 * Sanity check: only conventional zone files can have shared
+-	 * writeable mappings.
+-	 */
+-	if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
+-		return VM_FAULT_NOPAGE;
+-
+-	sb_start_pagefault(inode->i_sb);
+-	file_update_time(vmf->vma->vm_file);
+-
+-	/* Serialize against truncates */
+-	filemap_invalidate_lock_shared(inode->i_mapping);
+-	ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
+-	filemap_invalidate_unlock_shared(inode->i_mapping);
+-
+-	sb_end_pagefault(inode->i_sb);
+-	return ret;
+-}
+-
+-static const struct vm_operations_struct zonefs_file_vm_ops = {
+-	.fault		= filemap_fault,
+-	.map_pages	= filemap_map_pages,
+-	.page_mkwrite	= zonefs_filemap_page_mkwrite,
+-};
+-
+-static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
+-{
+-	/*
+-	 * Conventional zones accept random writes, so their files can support
+-	 * shared writable mappings. For sequential zone files, only read
+-	 * mappings are possible since there are no guarantees for write
+-	 * ordering between msync() and page cache writeback.
+-	 */
+-	if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ &&
+-	    (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+-		return -EINVAL;
+-
+-	file_accessed(file);
+-	vma->vm_ops = &zonefs_file_vm_ops;
+-
+-	return 0;
+-}
+-
+-static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
+-{
+-	loff_t isize = i_size_read(file_inode(file));
+-
+-	/*
+-	 * Seeks are limited to below the zone size for conventional zones
+-	 * and below the zone write pointer for sequential zones. In both
+-	 * cases, this limit is the inode size.
+-	 */
+-	return generic_file_llseek_size(file, offset, whence, isize, isize);
+-}
+-
+-static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
+-					int error, unsigned int flags)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	if (error) {
+-		zonefs_io_error(inode, true);
+-		return error;
+-	}
+-
+-	if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) {
+-		/*
+-		 * Note that we may be seeing completions out of order,
+-		 * but that is not a problem since a write completed
+-		 * successfully necessarily means that all preceding writes
+-		 * were also successful. So we can safely increase the inode
+-		 * size to the write end location.
+-		 */
+-		mutex_lock(&zi->i_truncate_mutex);
+-		if (i_size_read(inode) < iocb->ki_pos + size) {
+-			zonefs_update_stats(inode, iocb->ki_pos + size);
+-			zonefs_i_size_write(inode, iocb->ki_pos + size);
+-		}
+-		mutex_unlock(&zi->i_truncate_mutex);
+-	}
+-
+-	return 0;
+-}
+-
+-static const struct iomap_dio_ops zonefs_write_dio_ops = {
+-	.end_io			= zonefs_file_write_dio_end_io,
+-};
+-
+-static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct block_device *bdev = inode->i_sb->s_bdev;
+-	unsigned int max = bdev_max_zone_append_sectors(bdev);
+-	struct bio *bio;
+-	ssize_t size;
+-	int nr_pages;
+-	ssize_t ret;
+-
+-	max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+-	iov_iter_truncate(from, max);
+-
+-	nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
+-	if (!nr_pages)
+-		return 0;
+-
+-	bio = bio_alloc(bdev, nr_pages,
+-			REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
+-	bio->bi_iter.bi_sector = zi->i_zsector;
+-	bio->bi_ioprio = iocb->ki_ioprio;
+-	if (iocb_is_dsync(iocb))
+-		bio->bi_opf |= REQ_FUA;
+-
+-	ret = bio_iov_iter_get_pages(bio, from);
+-	if (unlikely(ret))
+-		goto out_release;
+-
+-	size = bio->bi_iter.bi_size;
+-	task_io_account_write(size);
+-
+-	if (iocb->ki_flags & IOCB_HIPRI)
+-		bio_set_polled(bio, iocb);
+-
+-	ret = submit_bio_wait(bio);
+-
+-	/*
+-	 * If the file zone was written underneath the file system, the zone
+-	 * write pointer may not be where we expect it to be, but the zone
+-	 * append write can still succeed. So check manually that we wrote where
+-	 * we intended to, that is, at zi->i_wpoffset.
+-	 */
+-	if (!ret) {
+-		sector_t wpsector =
+-			zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
+-
+-		if (bio->bi_iter.bi_sector != wpsector) {
+-			zonefs_warn(inode->i_sb,
+-				"Corrupted write pointer %llu for zone at %llu\n",
+-				wpsector, zi->i_zsector);
+-			ret = -EIO;
+-		}
+-	}
+-
+-	zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+-	trace_zonefs_file_dio_append(inode, size, ret);
+-
+-out_release:
+-	bio_release_pages(bio, false);
+-	bio_put(bio);
+-
+-	if (ret >= 0) {
+-		iocb->ki_pos += size;
+-		return size;
+-	}
+-
+-	return ret;
+-}
+-
+-/*
+- * Do not exceed the LFS limits nor the file zone size. If pos is under the
+- * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
+- */
+-static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
+-					loff_t count)
+-{
+-	struct inode *inode = file_inode(file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	loff_t limit = rlimit(RLIMIT_FSIZE);
+-	loff_t max_size = zi->i_max_size;
+-
+-	if (limit != RLIM_INFINITY) {
+-		if (pos >= limit) {
+-			send_sig(SIGXFSZ, current, 0);
+-			return -EFBIG;
+-		}
+-		count = min(count, limit - pos);
+-	}
+-
+-	if (!(file->f_flags & O_LARGEFILE))
+-		max_size = min_t(loff_t, MAX_NON_LFS, max_size);
+-
+-	if (unlikely(pos >= max_size))
+-		return -EFBIG;
+-
+-	return min(count, max_size - pos);
+-}
+-
+-static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+-{
+-	struct file *file = iocb->ki_filp;
+-	struct inode *inode = file_inode(file);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	loff_t count;
+-
+-	if (IS_SWAPFILE(inode))
+-		return -ETXTBSY;
+-
+-	if (!iov_iter_count(from))
+-		return 0;
+-
+-	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+-		return -EINVAL;
+-
+-	if (iocb->ki_flags & IOCB_APPEND) {
+-		if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+-			return -EINVAL;
+-		mutex_lock(&zi->i_truncate_mutex);
+-		iocb->ki_pos = zi->i_wpoffset;
+-		mutex_unlock(&zi->i_truncate_mutex);
+-	}
+-
+-	count = zonefs_write_check_limits(file, iocb->ki_pos,
+-					  iov_iter_count(from));
+-	if (count < 0)
+-		return count;
+-
+-	iov_iter_truncate(from, count);
+-	return iov_iter_count(from);
+-}
+-
+-/*
+- * Handle direct writes. For sequential zone files, this is the only possible
+- * write path. For these files, check that the user is issuing writes
+- * sequentially from the end of the file. This code assumes that the block layer
+- * delivers write requests to the device in sequential order. This is always the
+- * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
+- * elevator feature is being used (e.g. mq-deadline). The block layer always
+- * automatically select such an elevator for zoned block devices during the
+- * device initialization.
+- */
+-static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	bool sync = is_sync_kiocb(iocb);
+-	bool append = false;
+-	ssize_t ret, count;
+-
+-	/*
+-	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
+-	 * as this can cause write reordering (e.g. the first aio gets EAGAIN
+-	 * on the inode lock but the second goes through but is now unaligned).
+-	 */
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
+-	    (iocb->ki_flags & IOCB_NOWAIT))
+-		return -EOPNOTSUPP;
+-
+-	if (iocb->ki_flags & IOCB_NOWAIT) {
+-		if (!inode_trylock(inode))
+-			return -EAGAIN;
+-	} else {
+-		inode_lock(inode);
+-	}
+-
+-	count = zonefs_write_checks(iocb, from);
+-	if (count <= 0) {
+-		ret = count;
+-		goto inode_unlock;
+-	}
+-
+-	if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
+-		ret = -EINVAL;
+-		goto inode_unlock;
+-	}
+-
+-	/* Enforce sequential writes (append only) in sequential zones */
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
+-		mutex_lock(&zi->i_truncate_mutex);
+-		if (iocb->ki_pos != zi->i_wpoffset) {
+-			mutex_unlock(&zi->i_truncate_mutex);
+-			ret = -EINVAL;
+-			goto inode_unlock;
+-		}
+-		mutex_unlock(&zi->i_truncate_mutex);
+-		append = sync;
+-	}
+-
+-	if (append)
+-		ret = zonefs_file_dio_append(iocb, from);
+-	else
+-		ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
+-				   &zonefs_write_dio_ops, 0, NULL, 0);
+-	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+-	    (ret > 0 || ret == -EIOCBQUEUED)) {
+-		if (ret > 0)
+-			count = ret;
+-
+-		/*
+-		 * Update the zone write pointer offset assuming the write
+-		 * operation succeeded. If it did not, the error recovery path
+-		 * will correct it. Also do active seq file accounting.
+-		 */
+-		mutex_lock(&zi->i_truncate_mutex);
+-		zi->i_wpoffset += count;
+-		zonefs_account_active(inode);
+-		mutex_unlock(&zi->i_truncate_mutex);
+-	}
+-
+-inode_unlock:
+-	inode_unlock(inode);
+-
+-	return ret;
+-}
+-
+-static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
+-					  struct iov_iter *from)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	ssize_t ret;
+-
+-	/*
+-	 * Direct IO writes are mandatory for sequential zone files so that the
+-	 * write IO issuing order is preserved.
+-	 */
+-	if (zi->i_ztype != ZONEFS_ZTYPE_CNV)
+-		return -EIO;
+-
+-	if (iocb->ki_flags & IOCB_NOWAIT) {
+-		if (!inode_trylock(inode))
+-			return -EAGAIN;
+-	} else {
+-		inode_lock(inode);
+-	}
+-
+-	ret = zonefs_write_checks(iocb, from);
+-	if (ret <= 0)
+-		goto inode_unlock;
+-
+-	ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
+-	if (ret > 0)
+-		iocb->ki_pos += ret;
+-	else if (ret == -EIO)
+-		zonefs_io_error(inode, true);
+-
+-inode_unlock:
+-	inode_unlock(inode);
+-	if (ret > 0)
+-		ret = generic_write_sync(iocb, ret);
+-
+-	return ret;
+-}
+-
+-static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-
+-	if (unlikely(IS_IMMUTABLE(inode)))
+-		return -EPERM;
+-
+-	if (sb_rdonly(inode->i_sb))
+-		return -EROFS;
+-
+-	/* Write operations beyond the zone size are not allowed */
+-	if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
+-		return -EFBIG;
+-
+-	if (iocb->ki_flags & IOCB_DIRECT) {
+-		ssize_t ret = zonefs_file_dio_write(iocb, from);
+-		if (ret != -ENOTBLK)
+-			return ret;
+-	}
+-
+-	return zonefs_file_buffered_write(iocb, from);
+-}
+-
+-static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size,
+-				       int error, unsigned int flags)
+-{
+-	if (error) {
+-		zonefs_io_error(file_inode(iocb->ki_filp), false);
+-		return error;
+-	}
+-
+-	return 0;
+-}
+-
+-static const struct iomap_dio_ops zonefs_read_dio_ops = {
+-	.end_io			= zonefs_file_read_dio_end_io,
+-};
+-
+-static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+-{
+-	struct inode *inode = file_inode(iocb->ki_filp);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	loff_t isize;
+-	ssize_t ret;
+-
+-	/* Offline zones cannot be read */
+-	if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
+-		return -EPERM;
+-
+-	if (iocb->ki_pos >= zi->i_max_size)
+-		return 0;
+-
+-	if (iocb->ki_flags & IOCB_NOWAIT) {
+-		if (!inode_trylock_shared(inode))
+-			return -EAGAIN;
+-	} else {
+-		inode_lock_shared(inode);
+-	}
+-
+-	/* Limit read operations to written data */
+-	mutex_lock(&zi->i_truncate_mutex);
+-	isize = i_size_read(inode);
+-	if (iocb->ki_pos >= isize) {
+-		mutex_unlock(&zi->i_truncate_mutex);
+-		ret = 0;
+-		goto inode_unlock;
+-	}
+-	iov_iter_truncate(to, isize - iocb->ki_pos);
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	if (iocb->ki_flags & IOCB_DIRECT) {
+-		size_t count = iov_iter_count(to);
+-
+-		if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
+-			ret = -EINVAL;
+-			goto inode_unlock;
+-		}
+-		file_accessed(iocb->ki_filp);
+-		ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
+-				   &zonefs_read_dio_ops, 0, NULL, 0);
+-	} else {
+-		ret = generic_file_read_iter(iocb, to);
+-		if (ret == -EIO)
+-			zonefs_io_error(inode, false);
+-	}
+-
+-inode_unlock:
+-	inode_unlock_shared(inode);
+-
+-	return ret;
+-}
+-
+-/*
+- * Write open accounting is done only for sequential files.
+- */
+-static inline bool zonefs_seq_file_need_wro(struct inode *inode,
+-					    struct file *file)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-
+-	if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+-		return false;
+-
+-	if (!(file->f_mode & FMODE_WRITE))
+-		return false;
+-
+-	return true;
+-}
+-
+-static int zonefs_seq_file_write_open(struct inode *inode)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	int ret = 0;
+-
+-	mutex_lock(&zi->i_truncate_mutex);
+-
+-	if (!zi->i_wr_refcnt) {
+-		struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+-		unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
+-
+-		if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
+-
+-			if (sbi->s_max_wro_seq_files
+-			    && wro > sbi->s_max_wro_seq_files) {
+-				atomic_dec(&sbi->s_wro_seq_files);
+-				ret = -EBUSY;
+-				goto unlock;
+-			}
+-
+-			if (i_size_read(inode) < zi->i_max_size) {
+-				ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+-				if (ret) {
+-					atomic_dec(&sbi->s_wro_seq_files);
+-					goto unlock;
+-				}
+-				zi->i_flags |= ZONEFS_ZONE_OPEN;
+-				zonefs_account_active(inode);
+-			}
+-		}
+-	}
+-
+-	zi->i_wr_refcnt++;
+-
+-unlock:
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	return ret;
+-}
+-
+-static int zonefs_file_open(struct inode *inode, struct file *file)
+-{
+-	int ret;
+-
+-	ret = generic_file_open(inode, file);
+-	if (ret)
+-		return ret;
+-
+-	if (zonefs_seq_file_need_wro(inode, file))
+-		return zonefs_seq_file_write_open(inode);
+-
+-	return 0;
+-}
+-
+-static void zonefs_seq_file_write_close(struct inode *inode)
+-{
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	struct super_block *sb = inode->i_sb;
+-	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+-	int ret = 0;
+-
+-	mutex_lock(&zi->i_truncate_mutex);
+-
+-	zi->i_wr_refcnt--;
+-	if (zi->i_wr_refcnt)
+-		goto unlock;
+-
+-	/*
+-	 * The file zone may not be open anymore (e.g. the file was truncated to
+-	 * its maximum size or it was fully written). For this case, we only
+-	 * need to decrement the write open count.
+-	 */
+-	if (zi->i_flags & ZONEFS_ZONE_OPEN) {
+-		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+-		if (ret) {
+-			__zonefs_io_error(inode, false);
+-			/*
+-			 * Leaving zones explicitly open may lead to a state
+-			 * where most zones cannot be written (zone resources
+-			 * exhausted). So take preventive action by remounting
+-			 * read-only.
+-			 */
+-			if (zi->i_flags & ZONEFS_ZONE_OPEN &&
+-			    !(sb->s_flags & SB_RDONLY)) {
+-				zonefs_warn(sb,
+-					"closing zone at %llu failed %d\n",
+-					zi->i_zsector, ret);
+-				zonefs_warn(sb,
+-					"remounting filesystem read-only\n");
+-				sb->s_flags |= SB_RDONLY;
+-			}
+-			goto unlock;
+-		}
+-
+-		zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+-		zonefs_account_active(inode);
+-	}
+-
+-	atomic_dec(&sbi->s_wro_seq_files);
+-
+-unlock:
+-	mutex_unlock(&zi->i_truncate_mutex);
+-}
+-
+-static int zonefs_file_release(struct inode *inode, struct file *file)
+-{
+-	/*
+-	 * If we explicitly open a zone we must close it again as well, but the
+-	 * zone management operation can fail (either due to an IO error or as
+-	 * the zone has gone offline or read-only). Make sure we don't fail the
+-	 * close(2) for user-space.
+-	 */
+-	if (zonefs_seq_file_need_wro(inode, file))
+-		zonefs_seq_file_write_close(inode);
+-
+-	return 0;
+-}
+-
+-static const struct file_operations zonefs_file_operations = {
+-	.open		= zonefs_file_open,
+-	.release	= zonefs_file_release,
+-	.fsync		= zonefs_file_fsync,
+-	.mmap		= zonefs_file_mmap,
+-	.llseek		= zonefs_file_llseek,
+-	.read_iter	= zonefs_file_read_iter,
+-	.write_iter	= zonefs_file_write_iter,
+-	.splice_read	= generic_file_splice_read,
+-	.splice_write	= iter_file_splice_write,
+-	.iopoll		= iocb_bio_iopoll,
+-};
+-
+ static struct kmem_cache *zonefs_inode_cachep;
+ 
+ static struct inode *zonefs_alloc_inode(struct super_block *sb)
+@@ -1282,7 +414,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
+ 	inode_init_once(&zi->i_vnode);
+ 	mutex_init(&zi->i_truncate_mutex);
+ 	zi->i_wr_refcnt = 0;
+-	zi->i_flags = 0;
+ 
+ 	return &zi->i_vnode;
+ }
+@@ -1315,8 +446,8 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
+ 	buf->f_bavail = buf->f_bfree;
+ 
+ 	for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
+-		if (sbi->s_nr_files[t])
+-			buf->f_files += sbi->s_nr_files[t] + 1;
++		if (sbi->s_zgroup[t].g_nr_zones)
++			buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1;
+ 	}
+ 	buf->f_ffree = 0;
+ 
+@@ -1382,51 +513,85 @@ static int zonefs_parse_options(struct super_block *sb, char *options)
+ 		}
+ 	}
+ 
+-	return 0;
+-}
++	return 0;
++}
++
++static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
++{
++	struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
++
++	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
++		seq_puts(seq, ",errors=remount-ro");
++	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
++		seq_puts(seq, ",errors=zone-ro");
++	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
++		seq_puts(seq, ",errors=zone-offline");
++	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
++		seq_puts(seq, ",errors=repair");
++
++	return 0;
++}
++
++static int zonefs_remount(struct super_block *sb, int *flags, char *data)
++{
++	sync_filesystem(sb);
++
++	return zonefs_parse_options(sb, data);
++}
++
++static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
++				struct dentry *dentry, struct iattr *iattr)
++{
++	struct inode *inode = d_inode(dentry);
++	int ret;
++
++	if (unlikely(IS_IMMUTABLE(inode)))
++		return -EPERM;
++
++	ret = setattr_prepare(&init_user_ns, dentry, iattr);
++	if (ret)
++		return ret;
++
++	/*
++	 * Since files and directories cannot be created nor deleted, do not
++	 * allow setting any write attributes on the sub-directories grouping
++	 * files by zone type.
++	 */
++	if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
++	    (iattr->ia_mode & 0222))
++		return -EPERM;
++
++	if (((iattr->ia_valid & ATTR_UID) &&
++	     !uid_eq(iattr->ia_uid, inode->i_uid)) ||
++	    ((iattr->ia_valid & ATTR_GID) &&
++	     !gid_eq(iattr->ia_gid, inode->i_gid))) {
++		ret = dquot_transfer(mnt_userns, inode, iattr);
++		if (ret)
++			return ret;
++	}
+ 
+-static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
+-{
+-	struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
++	if (iattr->ia_valid & ATTR_SIZE) {
++		ret = zonefs_file_truncate(inode, iattr->ia_size);
++		if (ret)
++			return ret;
++	}
+ 
+-	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
+-		seq_puts(seq, ",errors=remount-ro");
+-	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
+-		seq_puts(seq, ",errors=zone-ro");
+-	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
+-		seq_puts(seq, ",errors=zone-offline");
+-	if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
+-		seq_puts(seq, ",errors=repair");
++	setattr_copy(&init_user_ns, inode, iattr);
+ 
+ 	return 0;
+ }
+ 
+-static int zonefs_remount(struct super_block *sb, int *flags, char *data)
+-{
+-	sync_filesystem(sb);
+-
+-	return zonefs_parse_options(sb, data);
+-}
+-
+-static const struct super_operations zonefs_sops = {
+-	.alloc_inode	= zonefs_alloc_inode,
+-	.free_inode	= zonefs_free_inode,
+-	.statfs		= zonefs_statfs,
+-	.remount_fs	= zonefs_remount,
+-	.show_options	= zonefs_show_options,
+-};
+-
+ static const struct inode_operations zonefs_dir_inode_operations = {
+ 	.lookup		= simple_lookup,
+ 	.setattr	= zonefs_inode_setattr,
+ };
+ 
+ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
+-				  enum zonefs_ztype type)
++				  enum zonefs_ztype ztype)
+ {
+ 	struct super_block *sb = parent->i_sb;
+ 
+-	inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1;
++	inode->i_ino = bdev_nr_zones(sb->s_bdev) + ztype + 1;
+ 	inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
+ 	inode->i_op = &zonefs_dir_inode_operations;
+ 	inode->i_fop = &simple_dir_operations;
+@@ -1434,73 +599,38 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
+ 	inc_nlink(parent);
+ }
+ 
+-static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+-				  enum zonefs_ztype type)
++static const struct inode_operations zonefs_file_inode_operations = {
++	.setattr	= zonefs_inode_setattr,
++};
++
++static void zonefs_init_file_inode(struct inode *inode,
++				   struct zonefs_zone *z)
+ {
+ 	struct super_block *sb = inode->i_sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+-	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+-	int ret = 0;
+-
+-	inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
+-	inode->i_mode = S_IFREG | sbi->s_perm;
+-
+-	zi->i_ztype = type;
+-	zi->i_zsector = zone->start;
+-	zi->i_zone_size = zone->len << SECTOR_SHIFT;
+-	if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
+-	    !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
+-		zonefs_err(sb,
+-			   "zone size %llu doesn't match device's zone sectors %llu\n",
+-			   zi->i_zone_size,
+-			   bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
+-		return -EINVAL;
+-	}
+ 
+-	zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
+-			       zone->capacity << SECTOR_SHIFT);
+-	zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true);
++	inode->i_private = z;
+ 
++	inode->i_ino = z->z_sector >> sbi->s_zone_sectors_shift;
++	inode->i_mode = S_IFREG | sbi->s_perm;
+ 	inode->i_uid = sbi->s_uid;
+ 	inode->i_gid = sbi->s_gid;
+-	inode->i_size = zi->i_wpoffset;
+-	inode->i_blocks = zi->i_max_size >> SECTOR_SHIFT;
++	inode->i_size = z->z_wpoffset;
++	inode->i_blocks = z->z_capacity >> SECTOR_SHIFT;
+ 
+ 	inode->i_op = &zonefs_file_inode_operations;
+ 	inode->i_fop = &zonefs_file_operations;
+ 	inode->i_mapping->a_ops = &zonefs_file_aops;
+ 
+-	sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
+-	sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
+-	sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
+-
+-	mutex_lock(&zi->i_truncate_mutex);
+-
+-	/*
+-	 * For sequential zones, make sure that any open zone is closed first
+-	 * to ensure that the initial number of open zones is 0, in sync with
+-	 * the open zone accounting done when the mount option
+-	 * ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
+-	 */
+-	if (type == ZONEFS_ZTYPE_SEQ &&
+-	    (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
+-	     zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
+-		ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+-		if (ret)
+-			goto unlock;
+-	}
+-
+-	zonefs_account_active(inode);
+-
+-unlock:
+-	mutex_unlock(&zi->i_truncate_mutex);
+-
+-	return ret;
++	/* Update the inode access rights depending on the zone condition */
++	z->z_flags |= ZONEFS_ZONE_INIT_MODE;
++	zonefs_inode_update_mode(inode);
+ }
+ 
+ static struct dentry *zonefs_create_inode(struct dentry *parent,
+-					const char *name, struct blk_zone *zone,
+-					enum zonefs_ztype type)
++					  const char *name,
++					  struct zonefs_zone *z,
++					  enum zonefs_ztype ztype)
+ {
+ 	struct inode *dir = d_inode(parent);
+ 	struct dentry *dentry;
+@@ -1516,15 +646,10 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
+ 		goto dput;
+ 
+ 	inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+-	if (zone) {
+-		ret = zonefs_init_file_inode(inode, zone, type);
+-		if (ret) {
+-			iput(inode);
+-			goto dput;
+-		}
+-	} else {
+-		zonefs_init_dir_inode(dir, inode, type);
+-	}
++	if (z)
++		zonefs_init_file_inode(inode, z);
++	else
++		zonefs_init_dir_inode(dir, inode, ztype);
+ 
+ 	d_add(dentry, inode);
+ 	dir->i_size++;
+@@ -1540,100 +665,51 @@ dput:
+ struct zonefs_zone_data {
+ 	struct super_block	*sb;
+ 	unsigned int		nr_zones[ZONEFS_ZTYPE_MAX];
++	sector_t		cnv_zone_start;
+ 	struct blk_zone		*zones;
+ };
+ 
+ /*
+- * Create a zone group and populate it with zone files.
++ * Create the inodes for a zone group.
+  */
+-static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
+-				enum zonefs_ztype type)
++static int zonefs_create_zgroup_inodes(struct super_block *sb,
++				       enum zonefs_ztype ztype)
+ {
+-	struct super_block *sb = zd->sb;
+ 	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+-	struct blk_zone *zone, *next, *end;
+-	const char *zgroup_name;
+-	char *file_name;
++	struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
+ 	struct dentry *dir, *dent;
+-	unsigned int n = 0;
+-	int ret;
++	char *file_name;
++	int i, ret = 0;
++
++	if (!zgroup)
++		return -ENOMEM;
+ 
+ 	/* If the group is empty, there is nothing to do */
+-	if (!zd->nr_zones[type])
++	if (!zgroup->g_nr_zones)
+ 		return 0;
+ 
+ 	file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
+ 	if (!file_name)
+ 		return -ENOMEM;
+ 
+-	if (type == ZONEFS_ZTYPE_CNV)
+-		zgroup_name = "cnv";
+-	else
+-		zgroup_name = "seq";
+-
+-	dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
++	dir = zonefs_create_inode(sb->s_root, zonefs_zgroup_name(ztype),
++				  NULL, ztype);
+ 	if (IS_ERR(dir)) {
+ 		ret = PTR_ERR(dir);
+ 		goto free;
+ 	}
+ 
+-	/*
+-	 * The first zone contains the super block: skip it.
+-	 */
+-	end = zd->zones + bdev_nr_zones(sb->s_bdev);
+-	for (zone = &zd->zones[1]; zone < end; zone = next) {
+-
+-		next = zone + 1;
+-		if (zonefs_zone_type(zone) != type)
+-			continue;
+-
+-		/*
+-		 * For conventional zones, contiguous zones can be aggregated
+-		 * together to form larger files. Note that this overwrites the
+-		 * length of the first zone of the set of contiguous zones
+-		 * aggregated together. If one offline or read-only zone is
+-		 * found, assume that all zones aggregated have the same
+-		 * condition.
+-		 */
+-		if (type == ZONEFS_ZTYPE_CNV &&
+-		    (sbi->s_features & ZONEFS_F_AGGRCNV)) {
+-			for (; next < end; next++) {
+-				if (zonefs_zone_type(next) != type)
+-					break;
+-				zone->len += next->len;
+-				zone->capacity += next->capacity;
+-				if (next->cond == BLK_ZONE_COND_READONLY &&
+-				    zone->cond != BLK_ZONE_COND_OFFLINE)
+-					zone->cond = BLK_ZONE_COND_READONLY;
+-				else if (next->cond == BLK_ZONE_COND_OFFLINE)
+-					zone->cond = BLK_ZONE_COND_OFFLINE;
+-			}
+-			if (zone->capacity != zone->len) {
+-				zonefs_err(sb, "Invalid conventional zone capacity\n");
+-				ret = -EINVAL;
+-				goto free;
+-			}
+-		}
+-
+-		/*
+-		 * Use the file number within its group as file name.
+-		 */
+-		snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
+-		dent = zonefs_create_inode(dir, file_name, zone, type);
++	for (i = 0; i < zgroup->g_nr_zones; i++) {
++		/* Use the zone number within its group as the file name */
++		snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", i);
++		dent = zonefs_create_inode(dir, file_name,
++					   &zgroup->g_zones[i], ztype);
+ 		if (IS_ERR(dent)) {
+ 			ret = PTR_ERR(dent);
+-			goto free;
++			break;
+ 		}
+-
+-		n++;
+ 	}
+ 
+-	zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
+-		    zgroup_name, n, n > 1 ? "s" : "");
+-
+-	sbi->s_nr_files[type] = n;
+-	ret = 0;
+-
+ free:
+ 	kfree(file_name);
+ 
+@@ -1644,21 +720,38 @@ static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
+ 				   void *data)
+ {
+ 	struct zonefs_zone_data *zd = data;
++	struct super_block *sb = zd->sb;
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
++
++	/*
++	 * We do not care about the first zone: it contains the super block
++	 * and not exposed as a file.
++	 */
++	if (!idx)
++		return 0;
+ 
+ 	/*
+-	 * Count the number of usable zones: the first zone at index 0 contains
+-	 * the super block and is ignored.
++	 * Count the number of zones that will be exposed as files.
++	 * For sequential zones, we always have as many files as zones.
++	 * FOr conventional zones, the number of files depends on if we have
++	 * conventional zones aggregation enabled.
+ 	 */
+ 	switch (zone->type) {
+ 	case BLK_ZONE_TYPE_CONVENTIONAL:
+-		zone->wp = zone->start + zone->len;
+-		if (idx)
+-			zd->nr_zones[ZONEFS_ZTYPE_CNV]++;
++		if (sbi->s_features & ZONEFS_F_AGGRCNV) {
++			/* One file per set of contiguous conventional zones */
++			if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) ||
++			    zone->start != zd->cnv_zone_start)
++				sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
++			zd->cnv_zone_start = zone->start + zone->len;
++		} else {
++			/* One file per zone */
++			sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
++		}
+ 		break;
+ 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
+ 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
+-		if (idx)
+-			zd->nr_zones[ZONEFS_ZTYPE_SEQ]++;
++		sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++;
+ 		break;
+ 	default:
+ 		zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
+@@ -1698,11 +791,173 @@ static int zonefs_get_zone_info(struct zonefs_zone_data *zd)
+ 	return 0;
+ }
+ 
+-static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd)
++static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd)
+ {
+ 	kvfree(zd->zones);
+ }
+ 
++/*
++ * Create a zone group and populate it with zone files.
++ */
++static int zonefs_init_zgroup(struct super_block *sb,
++			      struct zonefs_zone_data *zd,
++			      enum zonefs_ztype ztype)
++{
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
++	struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
++	struct blk_zone *zone, *next, *end;
++	struct zonefs_zone *z;
++	unsigned int n = 0;
++	int ret;
++
++	/* Allocate the zone group. If it is empty, we have nothing to do. */
++	if (!zgroup->g_nr_zones)
++		return 0;
++
++	zgroup->g_zones = kvcalloc(zgroup->g_nr_zones,
++				   sizeof(struct zonefs_zone), GFP_KERNEL);
++	if (!zgroup->g_zones)
++		return -ENOMEM;
++
++	/*
++	 * Initialize the zone groups using the device zone information.
++	 * We always skip the first zone as it contains the super block
++	 * and is not use to back a file.
++	 */
++	end = zd->zones + bdev_nr_zones(sb->s_bdev);
++	for (zone = &zd->zones[1]; zone < end; zone = next) {
++
++		next = zone + 1;
++		if (zonefs_zone_type(zone) != ztype)
++			continue;
++
++		if (WARN_ON_ONCE(n >= zgroup->g_nr_zones))
++			return -EINVAL;
++
++		/*
++		 * For conventional zones, contiguous zones can be aggregated
++		 * together to form larger files. Note that this overwrites the
++		 * length of the first zone of the set of contiguous zones
++		 * aggregated together. If one offline or read-only zone is
++		 * found, assume that all zones aggregated have the same
++		 * condition.
++		 */
++		if (ztype == ZONEFS_ZTYPE_CNV &&
++		    (sbi->s_features & ZONEFS_F_AGGRCNV)) {
++			for (; next < end; next++) {
++				if (zonefs_zone_type(next) != ztype)
++					break;
++				zone->len += next->len;
++				zone->capacity += next->capacity;
++				if (next->cond == BLK_ZONE_COND_READONLY &&
++				    zone->cond != BLK_ZONE_COND_OFFLINE)
++					zone->cond = BLK_ZONE_COND_READONLY;
++				else if (next->cond == BLK_ZONE_COND_OFFLINE)
++					zone->cond = BLK_ZONE_COND_OFFLINE;
++			}
++		}
++
++		z = &zgroup->g_zones[n];
++		if (ztype == ZONEFS_ZTYPE_CNV)
++			z->z_flags |= ZONEFS_ZONE_CNV;
++		z->z_sector = zone->start;
++		z->z_size = zone->len << SECTOR_SHIFT;
++		if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
++		    !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
++			zonefs_err(sb,
++				"Invalid zone size %llu (device zone sectors %llu)\n",
++				z->z_size,
++				bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
++			return -EINVAL;
++		}
++
++		z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE,
++				      zone->capacity << SECTOR_SHIFT);
++		z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone);
++
++		sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes);
++		sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits;
++		sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits;
++
++		/*
++		 * For sequential zones, make sure that any open zone is closed
++		 * first to ensure that the initial number of open zones is 0,
++		 * in sync with the open zone accounting done when the mount
++		 * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
++		 */
++		if (ztype == ZONEFS_ZTYPE_SEQ &&
++		    (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
++		     zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
++			ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE);
++			if (ret)
++				return ret;
++		}
++
++		zonefs_account_active(sb, z);
++
++		n++;
++	}
++
++	if (WARN_ON_ONCE(n != zgroup->g_nr_zones))
++		return -EINVAL;
++
++	zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
++		    zonefs_zgroup_name(ztype),
++		    zgroup->g_nr_zones,
++		    zgroup->g_nr_zones > 1 ? "s" : "");
++
++	return 0;
++}
++
++static void zonefs_free_zgroups(struct super_block *sb)
++{
++	struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
++	enum zonefs_ztype ztype;
++
++	if (!sbi)
++		return;
++
++	for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
++		kvfree(sbi->s_zgroup[ztype].g_zones);
++		sbi->s_zgroup[ztype].g_zones = NULL;
++	}
++}
++
++/*
++ * Create a zone group and populate it with zone files.
++ */
++static int zonefs_init_zgroups(struct super_block *sb)
++{
++	struct zonefs_zone_data zd;
++	enum zonefs_ztype ztype;
++	int ret;
++
++	/* First get the device zone information */
++	memset(&zd, 0, sizeof(struct zonefs_zone_data));
++	zd.sb = sb;
++	ret = zonefs_get_zone_info(&zd);
++	if (ret)
++		goto cleanup;
++
++	/* Allocate and initialize the zone groups */
++	for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
++		ret = zonefs_init_zgroup(sb, &zd, ztype);
++		if (ret) {
++			zonefs_info(sb,
++				    "Zone group \"%s\" initialization failed\n",
++				    zonefs_zgroup_name(ztype));
++			break;
++		}
++	}
++
++cleanup:
++	zonefs_free_zone_info(&zd);
++	if (ret)
++		zonefs_free_zgroups(sb);
++
++	return ret;
++}
++
+ /*
+  * Read super block information from the device.
+  */
+@@ -1785,6 +1040,14 @@ free_page:
+ 	return ret;
+ }
+ 
++static const struct super_operations zonefs_sops = {
++	.alloc_inode	= zonefs_alloc_inode,
++	.free_inode	= zonefs_free_inode,
++	.statfs		= zonefs_statfs,
++	.remount_fs	= zonefs_remount,
++	.show_options	= zonefs_show_options,
++};
++
+ /*
+  * Check that the device is zoned. If it is, get the list of zones and create
+  * sub-directories and files according to the device zone configuration and
+@@ -1792,7 +1055,6 @@ free_page:
+  */
+ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ {
+-	struct zonefs_zone_data zd;
+ 	struct zonefs_sb_info *sbi;
+ 	struct inode *inode;
+ 	enum zonefs_ztype t;
+@@ -1845,16 +1107,6 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ 	if (ret)
+ 		return ret;
+ 
+-	memset(&zd, 0, sizeof(struct zonefs_zone_data));
+-	zd.sb = sb;
+-	ret = zonefs_get_zone_info(&zd);
+-	if (ret)
+-		goto cleanup;
+-
+-	ret = zonefs_sysfs_register(sb);
+-	if (ret)
+-		goto cleanup;
+-
+ 	zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
+ 
+ 	if (!sbi->s_max_wro_seq_files &&
+@@ -1865,6 +1117,11 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ 		sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
+ 	}
+ 
++	/* Initialize the zone groups */
++	ret = zonefs_init_zgroups(sb);
++	if (ret)
++		goto cleanup;
++
+ 	/* Create root directory inode */
+ 	ret = -ENOMEM;
+ 	inode = new_inode(sb);
+@@ -1884,13 +1141,19 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+ 
+ 	/* Create and populate files in zone groups directories */
+ 	for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
+-		ret = zonefs_create_zgroup(&zd, t);
++		ret = zonefs_create_zgroup_inodes(sb, t);
+ 		if (ret)
+-			break;
++			goto cleanup;
+ 	}
+ 
++	ret = zonefs_sysfs_register(sb);
++	if (ret)
++		goto cleanup;
++
++	return 0;
++
+ cleanup:
+-	zonefs_cleanup_zone_info(&zd);
++	zonefs_free_zgroups(sb);
+ 
+ 	return ret;
+ }
+@@ -1909,6 +1172,7 @@ static void zonefs_kill_super(struct super_block *sb)
+ 		d_genocide(sb->s_root);
+ 
+ 	zonefs_sysfs_unregister(sb);
++	zonefs_free_zgroups(sb);
+ 	kill_block_super(sb);
+ 	kfree(sbi);
+ }
+diff --git a/fs/zonefs/trace.h b/fs/zonefs/trace.h
+index 42edcfd393ed2..9969db3a9c7dc 100644
+--- a/fs/zonefs/trace.h
++++ b/fs/zonefs/trace.h
+@@ -20,8 +20,9 @@
+ #define show_dev(dev) MAJOR(dev), MINOR(dev)
+ 
+ TRACE_EVENT(zonefs_zone_mgmt,
+-	    TP_PROTO(struct inode *inode, enum req_op op),
+-	    TP_ARGS(inode, op),
++	    TP_PROTO(struct super_block *sb, struct zonefs_zone *z,
++		     enum req_op op),
++	    TP_ARGS(sb, z, op),
+ 	    TP_STRUCT__entry(
+ 			     __field(dev_t, dev)
+ 			     __field(ino_t, ino)
+@@ -30,12 +31,12 @@ TRACE_EVENT(zonefs_zone_mgmt,
+ 			     __field(sector_t, nr_sectors)
+ 	    ),
+ 	    TP_fast_assign(
+-			   __entry->dev = inode->i_sb->s_dev;
+-			   __entry->ino = inode->i_ino;
++			   __entry->dev = sb->s_dev;
++			   __entry->ino =
++				z->z_sector >> ZONEFS_SB(sb)->s_zone_sectors_shift;
+ 			   __entry->op = op;
+-			   __entry->sector = ZONEFS_I(inode)->i_zsector;
+-			   __entry->nr_sectors =
+-				   ZONEFS_I(inode)->i_zone_size >> SECTOR_SHIFT;
++			   __entry->sector = z->z_sector;
++			   __entry->nr_sectors = z->z_size >> SECTOR_SHIFT;
+ 	    ),
+ 	    TP_printk("bdev=(%d,%d), ino=%lu op=%s, sector=%llu, nr_sectors=%llu",
+ 		      show_dev(__entry->dev), (unsigned long)__entry->ino,
+@@ -58,9 +59,10 @@ TRACE_EVENT(zonefs_file_dio_append,
+ 	    TP_fast_assign(
+ 			   __entry->dev = inode->i_sb->s_dev;
+ 			   __entry->ino = inode->i_ino;
+-			   __entry->sector = ZONEFS_I(inode)->i_zsector;
++			   __entry->sector = zonefs_inode_zone(inode)->z_sector;
+ 			   __entry->size = size;
+-			   __entry->wpoffset = ZONEFS_I(inode)->i_wpoffset;
++			   __entry->wpoffset =
++				zonefs_inode_zone(inode)->z_wpoffset;
+ 			   __entry->ret = ret;
+ 	    ),
+ 	    TP_printk("bdev=(%d, %d), ino=%lu, sector=%llu, size=%zu, wpoffset=%llu, ret=%zu",
+diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
+index 1dbe78119ff16..2d626e18b1411 100644
+--- a/fs/zonefs/zonefs.h
++++ b/fs/zonefs/zonefs.h
+@@ -39,31 +39,47 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
+ 	return ZONEFS_ZTYPE_SEQ;
+ }
+ 
+-#define ZONEFS_ZONE_OPEN	(1U << 0)
+-#define ZONEFS_ZONE_ACTIVE	(1U << 1)
+-#define ZONEFS_ZONE_OFFLINE	(1U << 2)
+-#define ZONEFS_ZONE_READONLY	(1U << 3)
++#define ZONEFS_ZONE_INIT_MODE	(1U << 0)
++#define ZONEFS_ZONE_OPEN	(1U << 1)
++#define ZONEFS_ZONE_ACTIVE	(1U << 2)
++#define ZONEFS_ZONE_OFFLINE	(1U << 3)
++#define ZONEFS_ZONE_READONLY	(1U << 4)
++#define ZONEFS_ZONE_CNV		(1U << 31)
+ 
+ /*
+- * In-memory inode data.
++ * In-memory per-file inode zone data.
+  */
+-struct zonefs_inode_info {
+-	struct inode		i_vnode;
++struct zonefs_zone {
++	/* Zone state flags */
++	unsigned int		z_flags;
+ 
+-	/* File zone type */
+-	enum zonefs_ztype	i_ztype;
++	/* Zone start sector (512B unit) */
++	sector_t		z_sector;
+ 
+-	/* File zone start sector (512B unit) */
+-	sector_t		i_zsector;
++	/* Zone size (bytes) */
++	loff_t			z_size;
+ 
+-	/* File zone write pointer position (sequential zones only) */
+-	loff_t			i_wpoffset;
++	/* Zone capacity (file maximum size, bytes) */
++	loff_t			z_capacity;
+ 
+-	/* File maximum size */
+-	loff_t			i_max_size;
++	/* Write pointer offset in the zone (sequential zones only, bytes) */
++	loff_t			z_wpoffset;
++};
+ 
+-	/* File zone size */
+-	loff_t			i_zone_size;
++/*
++ * In memory zone group information: all zones of a group are exposed
++ * as files, one file per zone.
++ */
++struct zonefs_zone_group {
++	unsigned int		g_nr_zones;
++	struct zonefs_zone	*g_zones;
++};
++
++/*
++ * In-memory inode data.
++ */
++struct zonefs_inode_info {
++	struct inode		i_vnode;
+ 
+ 	/*
+ 	 * To serialise fully against both syscall and mmap based IO and
+@@ -82,7 +98,6 @@ struct zonefs_inode_info {
+ 
+ 	/* guarded by i_truncate_mutex */
+ 	unsigned int		i_wr_refcnt;
+-	unsigned int		i_flags;
+ };
+ 
+ static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode)
+@@ -90,6 +105,31 @@ static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode)
+ 	return container_of(inode, struct zonefs_inode_info, i_vnode);
+ }
+ 
++static inline bool zonefs_zone_is_cnv(struct zonefs_zone *z)
++{
++	return z->z_flags & ZONEFS_ZONE_CNV;
++}
++
++static inline bool zonefs_zone_is_seq(struct zonefs_zone *z)
++{
++	return !zonefs_zone_is_cnv(z);
++}
++
++static inline struct zonefs_zone *zonefs_inode_zone(struct inode *inode)
++{
++	return inode->i_private;
++}
++
++static inline bool zonefs_inode_is_cnv(struct inode *inode)
++{
++	return zonefs_zone_is_cnv(zonefs_inode_zone(inode));
++}
++
++static inline bool zonefs_inode_is_seq(struct inode *inode)
++{
++	return zonefs_zone_is_seq(zonefs_inode_zone(inode));
++}
++
+ /*
+  * On-disk super block (block 0).
+  */
+@@ -181,7 +221,7 @@ struct zonefs_sb_info {
+ 	uuid_t			s_uuid;
+ 	unsigned int		s_zone_sectors_shift;
+ 
+-	unsigned int		s_nr_files[ZONEFS_ZTYPE_MAX];
++	struct zonefs_zone_group s_zgroup[ZONEFS_ZTYPE_MAX];
+ 
+ 	loff_t			s_blocks;
+ 	loff_t			s_used_blocks;
+@@ -209,6 +249,28 @@ static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
+ #define zonefs_warn(sb, format, args...)	\
+ 	pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args)
+ 
++/* In super.c */
++void zonefs_inode_account_active(struct inode *inode);
++int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op);
++void zonefs_i_size_write(struct inode *inode, loff_t isize);
++void zonefs_update_stats(struct inode *inode, loff_t new_isize);
++void __zonefs_io_error(struct inode *inode, bool write);
++
++static inline void zonefs_io_error(struct inode *inode, bool write)
++{
++	struct zonefs_inode_info *zi = ZONEFS_I(inode);
++
++	mutex_lock(&zi->i_truncate_mutex);
++	__zonefs_io_error(inode, write);
++	mutex_unlock(&zi->i_truncate_mutex);
++}
++
++/* In file.c */
++extern const struct address_space_operations zonefs_file_aops;
++extern const struct file_operations zonefs_file_operations;
++int zonefs_file_truncate(struct inode *inode, loff_t isize);
++
++/* In sysfs.c */
+ int zonefs_sysfs_register(struct super_block *sb);
+ void zonefs_sysfs_unregister(struct super_block *sb);
+ int zonefs_sysfs_init(void);
+diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
+index 0584e9f6e3397..57acb895c0381 100644
+--- a/include/acpi/acpi_bus.h
++++ b/include/acpi/acpi_bus.h
+@@ -657,6 +657,7 @@ static inline bool acpi_quirk_skip_acpi_ac_and_battery(void)
+ #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
+ bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev);
+ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip);
++bool acpi_quirk_skip_gpio_event_handlers(void);
+ #else
+ static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev)
+ {
+@@ -668,6 +669,10 @@ acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip)
+ 	*skip = false;
+ 	return 0;
+ }
++static inline bool acpi_quirk_skip_gpio_event_handlers(void)
++{
++	return false;
++}
+ #endif
+ 
+ #ifdef CONFIG_PM
+diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
+index 90b2fb0292cb1..012fa0d171b27 100644
+--- a/include/trace/events/rcu.h
++++ b/include/trace/events/rcu.h
+@@ -768,7 +768,7 @@ TRACE_EVENT_RCU(rcu_torture_read,
+ 	TP_ARGS(rcutorturename, rhp, secs, c_old, c),
+ 
+ 	TP_STRUCT__entry(
+-		__field(char, rcutorturename[RCUTORTURENAME_LEN])
++		__array(char, rcutorturename, RCUTORTURENAME_LEN)
+ 		__field(struct rcu_head *, rhp)
+ 		__field(unsigned long, secs)
+ 		__field(unsigned long, c_old)
+diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
+index 655d92e803e14..79a443c65ea93 100644
+--- a/include/xen/interface/platform.h
++++ b/include/xen/interface/platform.h
+@@ -483,6 +483,8 @@ struct xenpf_symdata {
+ };
+ DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata);
+ 
++#define XENPF_get_dom0_console 64
++
+ struct xen_platform_op {
+ 	uint32_t cmd;
+ 	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
+@@ -506,6 +508,7 @@ struct xen_platform_op {
+ 		struct xenpf_mem_hotadd        mem_add;
+ 		struct xenpf_core_parking      core_parking;
+ 		struct xenpf_symdata           symdata;
++		struct dom0_vga_console_info   dom0_console;
+ 		uint8_t                        pad[128];
+ 	} u;
+ };
+diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
+index 729793ae97127..c2cde88aeed53 100644
+--- a/io_uring/alloc_cache.h
++++ b/io_uring/alloc_cache.h
+@@ -27,6 +27,7 @@ static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *c
+ 		struct hlist_node *node = cache->list.first;
+ 
+ 		hlist_del(node);
++		cache->nr_cached--;
+ 		return container_of(node, struct io_cache_entry, node);
+ 	}
+ 
+diff --git a/io_uring/poll.c b/io_uring/poll.c
+index fea739eef56f4..666666ab2e73d 100644
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -724,6 +724,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
+ 	apoll = io_req_alloc_apoll(req, issue_flags);
+ 	if (!apoll)
+ 		return IO_APOLL_ABORTED;
++	req->flags &= ~(REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL);
+ 	req->flags |= REQ_F_POLLED;
+ 	ipt.pt._qproc = io_async_queue_proc;
+ 
+diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
+index 2b8743645efcb..f27f4975217d9 100644
+--- a/io_uring/rsrc.h
++++ b/io_uring/rsrc.h
+@@ -144,15 +144,13 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
+ 					unsigned int issue_flags)
+ {
+ 	if (!req->rsrc_node) {
+-		req->rsrc_node = ctx->rsrc_node;
++		io_ring_submit_lock(ctx, issue_flags);
+ 
+-		if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+-			lockdep_assert_held(&ctx->uring_lock);
++		lockdep_assert_held(&ctx->uring_lock);
+ 
+-			io_charge_rsrc_node(ctx);
+-		} else {
+-			percpu_ref_get(&req->rsrc_node->refs);
+-		}
++		req->rsrc_node = ctx->rsrc_node;
++		io_charge_rsrc_node(ctx);
++		io_ring_submit_unlock(ctx, issue_flags);
+ 	}
+ }
+ 
+diff --git a/kernel/compat.c b/kernel/compat.c
+index 55551989d9da5..fb50f29d9b361 100644
+--- a/kernel/compat.c
++++ b/kernel/compat.c
+@@ -152,7 +152,7 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t,  pid, unsigned int, len,
+ 	if (len & (sizeof(compat_ulong_t)-1))
+ 		return -EINVAL;
+ 
+-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
++	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ 		return -ENOMEM;
+ 
+ 	ret = sched_getaffinity(pid, mask);
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 9a0698353d60f..57d84b534cdea 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8404,14 +8404,14 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
+ 	if (len & (sizeof(unsigned long)-1))
+ 		return -EINVAL;
+ 
+-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
++	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ 		return -ENOMEM;
+ 
+ 	ret = sched_getaffinity(pid, mask);
+ 	if (ret == 0) {
+ 		unsigned int retlen = min(len, cpumask_size());
+ 
+-		if (copy_to_user(user_mask_ptr, mask, retlen))
++		if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
+ 			ret = -EFAULT;
+ 		else
+ 			ret = retlen;
+diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
+index c736487fc0e48..e0c420eb0b2b4 100644
+--- a/kernel/trace/kprobe_event_gen_test.c
++++ b/kernel/trace/kprobe_event_gen_test.c
+@@ -146,7 +146,7 @@ static int __init test_gen_kprobe_cmd(void)
+ 	if (trace_event_file_is_valid(gen_kprobe_test))
+ 		gen_kprobe_test = NULL;
+ 	/* We got an error after creating the event, delete it */
+-	ret = kprobe_event_delete("gen_kprobe_test");
++	kprobe_event_delete("gen_kprobe_test");
+ 	goto out;
+ }
+ 
+@@ -211,7 +211,7 @@ static int __init test_gen_kretprobe_cmd(void)
+ 	if (trace_event_file_is_valid(gen_kretprobe_test))
+ 		gen_kretprobe_test = NULL;
+ 	/* We got an error after creating the event, delete it */
+-	ret = kprobe_event_delete("gen_kretprobe_test");
++	kprobe_event_delete("gen_kretprobe_test");
+ 	goto out;
+ }
+ 
+diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
+index 7a5bf44839c9c..f06df065dec01 100644
+--- a/lib/zstd/common/zstd_deps.h
++++ b/lib/zstd/common/zstd_deps.h
+@@ -84,7 +84,7 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
+ 
+ #include <linux/kernel.h>
+ 
+-#define assert(x) WARN_ON((x))
++#define assert(x) WARN_ON(!(x))
+ 
+ #endif /* ZSTD_DEPS_ASSERT */
+ #endif /* ZSTD_DEPS_NEED_ASSERT */
+diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
+index b9b935a9f5c0d..6b3177c947114 100644
+--- a/lib/zstd/decompress/zstd_decompress.c
++++ b/lib/zstd/decompress/zstd_decompress.c
+@@ -798,7 +798,7 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+         if (srcSize == 0) return 0;
+         RETURN_ERROR(dstBuffer_null, "");
+     }
+-    ZSTD_memcpy(dst, src, srcSize);
++    ZSTD_memmove(dst, src, srcSize);
+     return srcSize;
+ }
+ 
+@@ -858,6 +858,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+ 
+     /* Loop on each block */
+     while (1) {
++        BYTE* oBlockEnd = oend;
+         size_t decodedSize;
+         blockProperties_t blockProperties;
+         size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
+@@ -867,16 +868,34 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+         remainingSrcSize -= ZSTD_blockHeaderSize;
+         RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
+ 
++        if (ip >= op && ip < oBlockEnd) {
++            /* We are decompressing in-place. Limit the output pointer so that we
++             * don't overwrite the block that we are currently reading. This will
++             * fail decompression if the input & output pointers aren't spaced
++             * far enough apart.
++             *
++             * This is important to set, even when the pointers are far enough
++             * apart, because ZSTD_decompressBlock_internal() can decide to store
++             * literals in the output buffer, after the block it is decompressing.
++             * Since we don't want anything to overwrite our input, we have to tell
++             * ZSTD_decompressBlock_internal to never write past ip.
++             *
++             * See ZSTD_allocateLiteralsBuffer() for reference.
++             */
++            oBlockEnd = op + (ip - op);
++        }
++
+         switch(blockProperties.blockType)
+         {
+         case bt_compressed:
+-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
++            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
+             break;
+         case bt_raw :
++            /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
+             decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
+             break;
+         case bt_rle :
+-            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
++            decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize);
+             break;
+         case bt_reserved :
+         default:
+diff --git a/net/can/bcm.c b/net/can/bcm.c
+index 27706f6ace34a..a962ec2b8ba5b 100644
+--- a/net/can/bcm.c
++++ b/net/can/bcm.c
+@@ -941,6 +941,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
+ 
+ 			cf = op->frames + op->cfsiz * i;
+ 			err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
++			if (err < 0)
++				goto free_op;
+ 
+ 			if (op->flags & CAN_FD_FRAME) {
+ 				if (cf->len > 64)
+@@ -950,12 +952,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
+ 					err = -EINVAL;
+ 			}
+ 
+-			if (err < 0) {
+-				if (op->frames != &op->sframe)
+-					kfree(op->frames);
+-				kfree(op);
+-				return err;
+-			}
++			if (err < 0)
++				goto free_op;
+ 
+ 			if (msg_head->flags & TX_CP_CAN_ID) {
+ 				/* copy can_id into frame */
+@@ -1026,6 +1024,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
+ 		bcm_tx_start_timer(op);
+ 
+ 	return msg_head->nframes * op->cfsiz + MHSIZ;
++
++free_op:
++	if (op->frames != &op->sframe)
++		kfree(op->frames);
++	kfree(op);
++	return err;
+ }
+ 
+ /*
+diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
+index fce9b9ebf13f6..fb92c3609e172 100644
+--- a/net/can/j1939/transport.c
++++ b/net/can/j1939/transport.c
+@@ -1124,8 +1124,6 @@ static void __j1939_session_cancel(struct j1939_session *session,
+ 
+ 	if (session->sk)
+ 		j1939_sk_send_loop_abort(session->sk, session->err);
+-	else
+-		j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ }
+ 
+ static void j1939_session_cancel(struct j1939_session *session,
+@@ -1140,6 +1138,9 @@ static void j1939_session_cancel(struct j1939_session *session,
+ 	}
+ 
+ 	j1939_session_list_unlock(session->priv);
++
++	if (!session->sk)
++		j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ }
+ 
+ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
+@@ -1253,6 +1254,9 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
+ 			__j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+ 		}
+ 		j1939_session_list_unlock(session->priv);
++
++		if (!session->sk)
++			j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ 	}
+ 
+ 	j1939_session_put(session);
+diff --git a/net/dsa/slave.c b/net/dsa/slave.c
+index 6711ddc0a3c7d..df8b16c741a40 100644
+--- a/net/dsa/slave.c
++++ b/net/dsa/slave.c
+@@ -57,6 +57,12 @@ struct dsa_standalone_event_work {
+ 	u16 vid;
+ };
+ 
++struct dsa_host_vlan_rx_filtering_ctx {
++	struct net_device *dev;
++	const unsigned char *addr;
++	enum dsa_standalone_event event;
++};
++
+ static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds)
+ {
+ 	return ds->ops->port_fdb_add && ds->ops->port_fdb_del &&
+@@ -155,18 +161,37 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev,
+ 	return 0;
+ }
+ 
++static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid,
++					    void *arg)
++{
++	struct dsa_host_vlan_rx_filtering_ctx *ctx = arg;
++
++	return dsa_slave_schedule_standalone_work(ctx->dev, ctx->event,
++						  ctx->addr, vid);
++}
++
+ static int dsa_slave_sync_uc(struct net_device *dev,
+ 			     const unsigned char *addr)
+ {
+ 	struct net_device *master = dsa_slave_to_master(dev);
+ 	struct dsa_port *dp = dsa_slave_to_port(dev);
++	struct dsa_host_vlan_rx_filtering_ctx ctx = {
++		.dev = dev,
++		.addr = addr,
++		.event = DSA_UC_ADD,
++	};
++	int err;
+ 
+ 	dev_uc_add(master, addr);
+ 
+ 	if (!dsa_switch_supports_uc_filtering(dp->ds))
+ 		return 0;
+ 
+-	return dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0);
++	err = dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0);
++	if (err)
++		return err;
++
++	return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
+ }
+ 
+ static int dsa_slave_unsync_uc(struct net_device *dev,
+@@ -174,13 +199,23 @@ static int dsa_slave_unsync_uc(struct net_device *dev,
+ {
+ 	struct net_device *master = dsa_slave_to_master(dev);
+ 	struct dsa_port *dp = dsa_slave_to_port(dev);
++	struct dsa_host_vlan_rx_filtering_ctx ctx = {
++		.dev = dev,
++		.addr = addr,
++		.event = DSA_UC_DEL,
++	};
++	int err;
+ 
+ 	dev_uc_del(master, addr);
+ 
+ 	if (!dsa_switch_supports_uc_filtering(dp->ds))
+ 		return 0;
+ 
+-	return dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0);
++	err = dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0);
++	if (err)
++		return err;
++
++	return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
+ }
+ 
+ static int dsa_slave_sync_mc(struct net_device *dev,
+@@ -188,13 +223,23 @@ static int dsa_slave_sync_mc(struct net_device *dev,
+ {
+ 	struct net_device *master = dsa_slave_to_master(dev);
+ 	struct dsa_port *dp = dsa_slave_to_port(dev);
++	struct dsa_host_vlan_rx_filtering_ctx ctx = {
++		.dev = dev,
++		.addr = addr,
++		.event = DSA_MC_ADD,
++	};
++	int err;
+ 
+ 	dev_mc_add(master, addr);
+ 
+ 	if (!dsa_switch_supports_mc_filtering(dp->ds))
+ 		return 0;
+ 
+-	return dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0);
++	err = dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0);
++	if (err)
++		return err;
++
++	return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
+ }
+ 
+ static int dsa_slave_unsync_mc(struct net_device *dev,
+@@ -202,13 +247,23 @@ static int dsa_slave_unsync_mc(struct net_device *dev,
+ {
+ 	struct net_device *master = dsa_slave_to_master(dev);
+ 	struct dsa_port *dp = dsa_slave_to_port(dev);
++	struct dsa_host_vlan_rx_filtering_ctx ctx = {
++		.dev = dev,
++		.addr = addr,
++		.event = DSA_MC_DEL,
++	};
++	int err;
+ 
+ 	dev_mc_del(master, addr);
+ 
+ 	if (!dsa_switch_supports_mc_filtering(dp->ds))
+ 		return 0;
+ 
+-	return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0);
++	err = dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0);
++	if (err)
++		return err;
++
++	return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
+ }
+ 
+ void dsa_slave_sync_ha(struct net_device *dev)
+@@ -1668,6 +1723,8 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
+ 		.flags = 0,
+ 	};
+ 	struct netlink_ext_ack extack = {0};
++	struct dsa_switch *ds = dp->ds;
++	struct netdev_hw_addr *ha;
+ 	int ret;
+ 
+ 	/* User port... */
+@@ -1687,6 +1744,30 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
+ 		return ret;
+ 	}
+ 
++	if (!dsa_switch_supports_uc_filtering(ds) &&
++	    !dsa_switch_supports_mc_filtering(ds))
++		return 0;
++
++	netif_addr_lock_bh(dev);
++
++	if (dsa_switch_supports_mc_filtering(ds)) {
++		netdev_for_each_synced_mc_addr(ha, dev) {
++			dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD,
++							   ha->addr, vid);
++		}
++	}
++
++	if (dsa_switch_supports_uc_filtering(ds)) {
++		netdev_for_each_synced_uc_addr(ha, dev) {
++			dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD,
++							   ha->addr, vid);
++		}
++	}
++
++	netif_addr_unlock_bh(dev);
++
++	dsa_flush_workqueue();
++
+ 	return 0;
+ }
+ 
+@@ -1699,13 +1780,43 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
+ 		/* This API only allows programming tagged, non-PVID VIDs */
+ 		.flags = 0,
+ 	};
++	struct dsa_switch *ds = dp->ds;
++	struct netdev_hw_addr *ha;
+ 	int err;
+ 
+ 	err = dsa_port_vlan_del(dp, &vlan);
+ 	if (err)
+ 		return err;
+ 
+-	return dsa_port_host_vlan_del(dp, &vlan);
++	err = dsa_port_host_vlan_del(dp, &vlan);
++	if (err)
++		return err;
++
++	if (!dsa_switch_supports_uc_filtering(ds) &&
++	    !dsa_switch_supports_mc_filtering(ds))
++		return 0;
++
++	netif_addr_lock_bh(dev);
++
++	if (dsa_switch_supports_mc_filtering(ds)) {
++		netdev_for_each_synced_mc_addr(ha, dev) {
++			dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL,
++							   ha->addr, vid);
++		}
++	}
++
++	if (dsa_switch_supports_uc_filtering(ds)) {
++		netdev_for_each_synced_uc_addr(ha, dev) {
++			dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL,
++							   ha->addr, vid);
++		}
++	}
++
++	netif_addr_unlock_bh(dev);
++
++	dsa_flush_workqueue();
++
++	return 0;
+ }
+ 
+ static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg)
+diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
+index 00db74d96583d..b77f1189d19d1 100644
+--- a/net/hsr/hsr_framereg.c
++++ b/net/hsr/hsr_framereg.c
+@@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
+ 	node_dst = find_node_by_addr_A(&port->hsr->node_db,
+ 				       eth_hdr(skb)->h_dest);
+ 	if (!node_dst) {
+-		if (net_ratelimit())
++		if (port->hsr->prot_version != PRP_V1 && net_ratelimit())
+ 			netdev_err(skb->dev, "%s: Unknown node\n", __func__);
+ 		return;
+ 	}
+diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
+index d611e15301839..e24d2d5b04ad0 100644
+--- a/net/mac80211/cfg.c
++++ b/net/mac80211/cfg.c
+@@ -2576,6 +2576,17 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
+ 	if (!sband)
+ 		return -EINVAL;
+ 
++	if (params->basic_rates) {
++		if (!ieee80211_parse_bitrates(link->conf->chandef.width,
++					      wiphy->bands[sband->band],
++					      params->basic_rates,
++					      params->basic_rates_len,
++					      &link->conf->basic_rates))
++			return -EINVAL;
++		changed |= BSS_CHANGED_BASIC_RATES;
++		ieee80211_check_rate_mask(link);
++	}
++
+ 	if (params->use_cts_prot >= 0) {
+ 		link->conf->use_cts_prot = params->use_cts_prot;
+ 		changed |= BSS_CHANGED_ERP_CTS_PROT;
+@@ -2597,16 +2608,6 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
+ 		changed |= BSS_CHANGED_ERP_SLOT;
+ 	}
+ 
+-	if (params->basic_rates) {
+-		ieee80211_parse_bitrates(link->conf->chandef.width,
+-					 wiphy->bands[sband->band],
+-					 params->basic_rates,
+-					 params->basic_rates_len,
+-					 &link->conf->basic_rates);
+-		changed |= BSS_CHANGED_BASIC_RATES;
+-		ieee80211_check_rate_mask(link);
+-	}
+-
+ 	if (params->ap_isolate >= 0) {
+ 		if (params->ap_isolate)
+ 			sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index aaa5b2741b79d..1b9465b43997c 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -2155,6 +2155,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
+ 	switch (skst) {
+ 	case TCP_FIN_WAIT1:
+ 	case TCP_FIN_WAIT2:
++	case TCP_LAST_ACK:
+ 		break;
+ 	case TCP_ESTABLISHED:
+ 	case TCP_CLOSE_WAIT:
+diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+index cf5172d4ce68c..103af2b3e986f 100644
+--- a/net/xfrm/xfrm_user.c
++++ b/net/xfrm/xfrm_user.c
+@@ -1012,7 +1012,9 @@ static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb)
+ 		return -EMSGSIZE;
+ 
+ 	ap = nla_data(nla);
+-	memcpy(ap, aead, sizeof(*aead));
++	strscpy_pad(ap->alg_name, aead->alg_name, sizeof(ap->alg_name));
++	ap->alg_key_len = aead->alg_key_len;
++	ap->alg_icv_len = aead->alg_icv_len;
+ 
+ 	if (redact_secret && aead->alg_key_len)
+ 		memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8);
+@@ -1032,7 +1034,8 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb)
+ 		return -EMSGSIZE;
+ 
+ 	ap = nla_data(nla);
+-	memcpy(ap, ealg, sizeof(*ealg));
++	strscpy_pad(ap->alg_name, ealg->alg_name, sizeof(ap->alg_name));
++	ap->alg_key_len = ealg->alg_key_len;
+ 
+ 	if (redact_secret && ealg->alg_key_len)
+ 		memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8);
+@@ -1043,6 +1046,40 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb)
+ 	return 0;
+ }
+ 
++static int copy_to_user_calg(struct xfrm_algo *calg, struct sk_buff *skb)
++{
++	struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_COMP, sizeof(*calg));
++	struct xfrm_algo *ap;
++
++	if (!nla)
++		return -EMSGSIZE;
++
++	ap = nla_data(nla);
++	strscpy_pad(ap->alg_name, calg->alg_name, sizeof(ap->alg_name));
++	ap->alg_key_len = 0;
++
++	return 0;
++}
++
++static int copy_to_user_encap(struct xfrm_encap_tmpl *ep, struct sk_buff *skb)
++{
++	struct nlattr *nla = nla_reserve(skb, XFRMA_ENCAP, sizeof(*ep));
++	struct xfrm_encap_tmpl *uep;
++
++	if (!nla)
++		return -EMSGSIZE;
++
++	uep = nla_data(nla);
++	memset(uep, 0, sizeof(*uep));
++
++	uep->encap_type = ep->encap_type;
++	uep->encap_sport = ep->encap_sport;
++	uep->encap_dport = ep->encap_dport;
++	uep->encap_oa = ep->encap_oa;
++
++	return 0;
++}
++
+ static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m)
+ {
+ 	int ret = 0;
+@@ -1098,12 +1135,12 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
+ 			goto out;
+ 	}
+ 	if (x->calg) {
+-		ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
++		ret = copy_to_user_calg(x->calg, skb);
+ 		if (ret)
+ 			goto out;
+ 	}
+ 	if (x->encap) {
+-		ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
++		ret = copy_to_user_encap(x->encap, skb);
+ 		if (ret)
+ 			goto out;
+ 	}
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index efff8078e3958..9466b6a2abae4 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -1733,7 +1733,7 @@ static void extract_crcs_for_object(const char *object, struct module *mod)
+ 		if (!isdigit(*p))
+ 			continue;	/* skip this line */
+ 
+-		crc = strtol(p, &p, 0);
++		crc = strtoul(p, &p, 0);
+ 		if (*p != '\n')
+ 			continue;	/* skip this line */
+ 
+diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
+index 8b6aeb8a78f7d..02fd65993e7e5 100644
+--- a/sound/core/pcm_lib.c
++++ b/sound/core/pcm_lib.c
+@@ -2155,6 +2155,8 @@ int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream,
+ 		ret = substream->ops->ack(substream);
+ 		if (ret < 0) {
+ 			runtime->control->appl_ptr = old_appl_ptr;
++			if (ret == -EPIPE)
++				__snd_pcm_xrun(substream);
+ 			return ret;
+ 		}
+ 	}
+diff --git a/sound/pci/asihpi/hpi6205.c b/sound/pci/asihpi/hpi6205.c
+index 27e11b5f70b97..c7d7eff86727f 100644
+--- a/sound/pci/asihpi/hpi6205.c
++++ b/sound/pci/asihpi/hpi6205.c
+@@ -430,7 +430,7 @@ void HPI_6205(struct hpi_message *phm, struct hpi_response *phr)
+ 		pao = hpi_find_adapter(phm->adapter_index);
+ 	} else {
+ 		/* subsys messages don't address an adapter */
+-		_HPI_6205(NULL, phm, phr);
++		phr->error = HPI_ERROR_INVALID_OBJ_INDEX;
+ 		return;
+ 	}
+ 
+diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
+index acde4cd58785e..099722ebaed83 100644
+--- a/sound/pci/hda/patch_ca0132.c
++++ b/sound/pci/hda/patch_ca0132.c
+@@ -4228,8 +4228,10 @@ static int tuning_ctl_set(struct hda_codec *codec, hda_nid_t nid,
+ 
+ 	for (i = 0; i < TUNING_CTLS_COUNT; i++)
+ 		if (nid == ca0132_tuning_ctls[i].nid)
+-			break;
++			goto found;
+ 
++	return -EINVAL;
++found:
+ 	snd_hda_power_up(codec);
+ 	dspio_set_param(codec, ca0132_tuning_ctls[i].mid, 0x20,
+ 			ca0132_tuning_ctls[i].req,
+diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
+index 75e1d00074b9f..a889cccdd607c 100644
+--- a/sound/pci/hda/patch_conexant.c
++++ b/sound/pci/hda/patch_conexant.c
+@@ -980,7 +980,10 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x3905, "Lenovo G50-30", CXT_FIXUP_STEREO_DMIC),
+ 	SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC),
+ 	SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC),
+-	SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_PINCFG_LENOVO_NOTEBOOK),
++	/* NOTE: we'd need to extend the quirk for 17aa:3977 as the same
++	 * PCI SSID is used on multiple Lenovo models
++	 */
++	SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC),
+ 	SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC),
+ 	SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC),
+ 	SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI),
+@@ -1003,6 +1006,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
+ 	{ .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" },
+ 	{ .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" },
+ 	{ .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" },
++	{ .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" },
+ 	{}
+ };
+ 
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 28ac6c159b2a2..070150bbd3559 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -2631,6 +2631,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ 	SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ 	SND_PCI_QUIRK(0x1558, 0x65f5, "Clevo PD50PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
++	SND_PCI_QUIRK(0x1558, 0x66a2, "Clevo PE60RNE", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ 	SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ 	SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ 	SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+@@ -2651,6 +2652,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950),
+ 	SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950),
+ 	SND_PCI_QUIRK(0x1558, 0x97e2, "Clevo P970RC-M", ALC1220_FIXUP_CLEVO_P950),
++	SND_PCI_QUIRK(0x1558, 0xd502, "Clevo PD50SNE", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
+ 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
+ 	SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
+@@ -9574,6 +9576,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++	SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+@@ -9608,6 +9611,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL5[03]RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++	SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+@@ -9708,6 +9712,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
++	SND_PCI_QUIRK(0x17aa, 0x9e56, "Lenovo ZhaoYang CF4620Z", ALC286_FIXUP_SONY_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK),
+ 	SND_PCI_QUIRK(0x1849, 0xa233, "Positivo Master C6300", ALC269_FIXUP_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS),
+diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c
+index 1e198e4d57b8d..82d4e0fda91be 100644
+--- a/sound/pci/ymfpci/ymfpci.c
++++ b/sound/pci/ymfpci/ymfpci.c
+@@ -170,7 +170,7 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci,
+ 		return -ENOENT;
+ 	}
+ 
+-	err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
++	err = snd_devm_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
+ 			   sizeof(*chip), &card);
+ 	if (err < 0)
+ 		return err;
+diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
+index c80114c0ad7bf..b492c32ce0704 100644
+--- a/sound/pci/ymfpci/ymfpci_main.c
++++ b/sound/pci/ymfpci/ymfpci_main.c
+@@ -2165,7 +2165,7 @@ static int snd_ymfpci_memalloc(struct snd_ymfpci *chip)
+ 	chip->work_base = ptr;
+ 	chip->work_base_addr = ptr_addr;
+ 	
+-	snd_BUG_ON(ptr + chip->work_size !=
++	snd_BUG_ON(ptr + PAGE_ALIGN(chip->work_size) !=
+ 		   chip->work_ptr->area + chip->work_ptr->bytes);
+ 
+ 	snd_ymfpci_writel(chip, YDSXGR_PLAYCTRLBASE, chip->bank_base_playback_addr);
+diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
+index 74cbbe16f9aec..a22f2ec95901f 100644
+--- a/sound/soc/codecs/hdmi-codec.c
++++ b/sound/soc/codecs/hdmi-codec.c
+@@ -428,8 +428,13 @@ static int hdmi_codec_startup(struct snd_pcm_substream *substream,
+ {
+ 	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
+ 	bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
++	bool has_capture = !hcp->hcd.no_i2s_capture;
++	bool has_playback = !hcp->hcd.no_i2s_playback;
+ 	int ret = 0;
+ 
++	if (!((has_playback && tx) || (has_capture && !tx)))
++		return 0;
++
+ 	mutex_lock(&hcp->lock);
+ 	if (hcp->busy) {
+ 		dev_err(dai->dev, "Only one simultaneous stream supported!\n");
+@@ -468,6 +473,12 @@ static void hdmi_codec_shutdown(struct snd_pcm_substream *substream,
+ 				struct snd_soc_dai *dai)
+ {
+ 	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
++	bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
++	bool has_capture = !hcp->hcd.no_i2s_capture;
++	bool has_playback = !hcp->hcd.no_i2s_playback;
++
++	if (!((has_playback && tx) || (has_capture && !tx)))
++		return;
+ 
+ 	hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN;
+ 	hcp->hcd.ops->audio_shutdown(dai->dev->parent, hcp->hcd.data);
+diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
+index 2449a2df66df0..8facdb922f076 100644
+--- a/sound/soc/codecs/lpass-tx-macro.c
++++ b/sound/soc/codecs/lpass-tx-macro.c
+@@ -242,7 +242,7 @@ enum {
+ 
+ struct tx_mute_work {
+ 	struct tx_macro *tx;
+-	u32 decimator;
++	u8 decimator;
+ 	struct delayed_work dwork;
+ };
+ 
+@@ -635,7 +635,7 @@ exit:
+ 	return 0;
+ }
+ 
+-static bool is_amic_enabled(struct snd_soc_component *component, int decimator)
++static bool is_amic_enabled(struct snd_soc_component *component, u8 decimator)
+ {
+ 	u16 adc_mux_reg, adc_reg, adc_n;
+ 
+@@ -849,7 +849,7 @@ static int tx_macro_enable_dec(struct snd_soc_dapm_widget *w,
+ 			       struct snd_kcontrol *kcontrol, int event)
+ {
+ 	struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+-	unsigned int decimator;
++	u8 decimator;
+ 	u16 tx_vol_ctl_reg, dec_cfg_reg, hpf_gate_reg, tx_gain_ctl_reg;
+ 	u8 hpf_cut_off_freq;
+ 	int hpf_delay = TX_MACRO_DMIC_HPF_DELAY_MS;
+@@ -1064,7 +1064,8 @@ static int tx_macro_hw_params(struct snd_pcm_substream *substream,
+ 			      struct snd_soc_dai *dai)
+ {
+ 	struct snd_soc_component *component = dai->component;
+-	u32 decimator, sample_rate;
++	u32 sample_rate;
++	u8 decimator;
+ 	int tx_fs_rate;
+ 	struct tx_macro *tx = snd_soc_component_get_drvdata(component);
+ 
+@@ -1128,7 +1129,7 @@ static int tx_macro_digital_mute(struct snd_soc_dai *dai, int mute, int stream)
+ {
+ 	struct snd_soc_component *component = dai->component;
+ 	struct tx_macro *tx = snd_soc_component_get_drvdata(component);
+-	u16 decimator;
++	u8 decimator;
+ 
+ 	/* active decimator not set yet */
+ 	if (tx->active_decimator[dai->id] == -1)
+diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c
+index acd43b6108e99..1a1d572cc1d02 100644
+--- a/sound/soc/intel/avs/boards/da7219.c
++++ b/sound/soc/intel/avs/boards/da7219.c
+@@ -117,6 +117,26 @@ static void avs_da7219_codec_exit(struct snd_soc_pcm_runtime *rtd)
+ 	snd_soc_component_set_jack(asoc_rtd_to_codec(rtd, 0)->component, NULL, NULL);
+ }
+ 
++static int
++avs_da7219_be_fixup(struct snd_soc_pcm_runtime *runrime, struct snd_pcm_hw_params *params)
++{
++	struct snd_interval *rate, *channels;
++	struct snd_mask *fmt;
++
++	rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
++	channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS);
++	fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
++
++	/* The ADSP will convert the FE rate to 48k, stereo */
++	rate->min = rate->max = 48000;
++	channels->min = channels->max = 2;
++
++	/* set SSP0 to 24 bit */
++	snd_mask_none(fmt);
++	snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S24_LE);
++	return 0;
++}
++
+ static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port,
+ 			       struct snd_soc_dai_link **dai_link)
+ {
+@@ -148,6 +168,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in
+ 	dl->num_platforms = 1;
+ 	dl->id = 0;
+ 	dl->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS;
++	dl->be_hw_params_fixup = avs_da7219_be_fixup;
+ 	dl->init = avs_da7219_codec_init;
+ 	dl->exit = avs_da7219_codec_exit;
+ 	dl->nonatomic = 1;
+diff --git a/sound/soc/intel/avs/boards/max98357a.c b/sound/soc/intel/avs/boards/max98357a.c
+index 921f42caf7e09..183123d08c5a3 100644
+--- a/sound/soc/intel/avs/boards/max98357a.c
++++ b/sound/soc/intel/avs/boards/max98357a.c
+@@ -8,6 +8,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
++#include <sound/pcm_params.h>
+ #include <sound/soc.h>
+ #include <sound/soc-acpi.h>
+ #include <sound/soc-dapm.h>
+@@ -24,6 +25,26 @@ static const struct snd_soc_dapm_route card_base_routes[] = {
+ 	{ "Spk", NULL, "Speaker" },
+ };
+ 
++static int
++avs_max98357a_be_fixup(struct snd_soc_pcm_runtime *runrime, struct snd_pcm_hw_params *params)
++{
++	struct snd_interval *rate, *channels;
++	struct snd_mask *fmt;
++
++	rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
++	channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS);
++	fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
++
++	/* The ADSP will convert the FE rate to 48k, stereo */
++	rate->min = rate->max = 48000;
++	channels->min = channels->max = 2;
++
++	/* set SSP0 to 16 bit */
++	snd_mask_none(fmt);
++	snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S16_LE);
++	return 0;
++}
++
+ static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port,
+ 			       struct snd_soc_dai_link **dai_link)
+ {
+@@ -55,6 +76,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in
+ 	dl->num_platforms = 1;
+ 	dl->id = 0;
+ 	dl->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS;
++	dl->be_hw_params_fixup = avs_max98357a_be_fixup;
+ 	dl->nonatomic = 1;
+ 	dl->no_pcm = 1;
+ 	dl->dpcm_playback = 1;
+diff --git a/sound/soc/intel/avs/boards/nau8825.c b/sound/soc/intel/avs/boards/nau8825.c
+index 6731d8a490767..49438a67a77c6 100644
+--- a/sound/soc/intel/avs/boards/nau8825.c
++++ b/sound/soc/intel/avs/boards/nau8825.c
+@@ -33,15 +33,15 @@ avs_nau8825_clock_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *co
+ 		return -EINVAL;
+ 	}
+ 
+-	if (!SND_SOC_DAPM_EVENT_ON(event)) {
++	if (SND_SOC_DAPM_EVENT_ON(event))
++		ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_MCLK, 24000000,
++					     SND_SOC_CLOCK_IN);
++	else
+ 		ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_INTERNAL, 0, SND_SOC_CLOCK_IN);
+-		if (ret < 0) {
+-			dev_err(card->dev, "set sysclk err = %d\n", ret);
+-			return ret;
+-		}
+-	}
++	if (ret < 0)
++		dev_err(card->dev, "Set sysclk failed: %d\n", ret);
+ 
+-	return 0;
++	return ret;
+ }
+ 
+ static const struct snd_kcontrol_new card_controls[] = {
+diff --git a/sound/soc/intel/avs/boards/rt5682.c b/sound/soc/intel/avs/boards/rt5682.c
+index 473e9fe5d0bf7..b2c2ba93dcb56 100644
+--- a/sound/soc/intel/avs/boards/rt5682.c
++++ b/sound/soc/intel/avs/boards/rt5682.c
+@@ -169,6 +169,27 @@ static const struct snd_soc_ops avs_rt5682_ops = {
+ 	.hw_params = avs_rt5682_hw_params,
+ };
+ 
++static int
++avs_rt5682_be_fixup(struct snd_soc_pcm_runtime *runtime, struct snd_pcm_hw_params *params)
++{
++	struct snd_interval *rate, *channels;
++	struct snd_mask *fmt;
++
++	rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
++	channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS);
++	fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
++
++	/* The ADSP will convert the FE rate to 48k, stereo */
++	rate->min = rate->max = 48000;
++	channels->min = channels->max = 2;
++
++	/* set SSPN to 24 bit */
++	snd_mask_none(fmt);
++	snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S24_LE);
++
++	return 0;
++}
++
+ static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port,
+ 			       struct snd_soc_dai_link **dai_link)
+ {
+@@ -201,6 +222,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in
+ 	dl->id = 0;
+ 	dl->init = avs_rt5682_codec_init;
+ 	dl->exit = avs_rt5682_codec_exit;
++	dl->be_hw_params_fixup = avs_rt5682_be_fixup;
+ 	dl->ops = &avs_rt5682_ops;
+ 	dl->nonatomic = 1;
+ 	dl->no_pcm = 1;
+diff --git a/sound/soc/intel/avs/boards/ssm4567.c b/sound/soc/intel/avs/boards/ssm4567.c
+index c5db696127624..2b7f5ad92aca7 100644
+--- a/sound/soc/intel/avs/boards/ssm4567.c
++++ b/sound/soc/intel/avs/boards/ssm4567.c
+@@ -15,7 +15,6 @@
+ #include <sound/soc-acpi.h>
+ #include "../../../codecs/nau8825.h"
+ 
+-#define SKL_NUVOTON_CODEC_DAI	"nau8825-hifi"
+ #define SKL_SSM_CODEC_DAI	"ssm4567-hifi"
+ 
+ static struct snd_soc_codec_conf card_codec_conf[] = {
+@@ -34,41 +33,11 @@ static const struct snd_kcontrol_new card_controls[] = {
+ 	SOC_DAPM_PIN_SWITCH("Right Speaker"),
+ };
+ 
+-static int
+-platform_clock_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *control, int event)
+-{
+-	struct snd_soc_dapm_context *dapm = w->dapm;
+-	struct snd_soc_card *card = dapm->card;
+-	struct snd_soc_dai *codec_dai;
+-	int ret;
+-
+-	codec_dai = snd_soc_card_get_codec_dai(card, SKL_NUVOTON_CODEC_DAI);
+-	if (!codec_dai) {
+-		dev_err(card->dev, "Codec dai not found\n");
+-		return -EINVAL;
+-	}
+-
+-	if (SND_SOC_DAPM_EVENT_ON(event)) {
+-		ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_MCLK, 24000000,
+-					     SND_SOC_CLOCK_IN);
+-		if (ret < 0)
+-			dev_err(card->dev, "set sysclk err = %d\n", ret);
+-	} else {
+-		ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_INTERNAL, 0, SND_SOC_CLOCK_IN);
+-		if (ret < 0)
+-			dev_err(card->dev, "set sysclk err = %d\n", ret);
+-	}
+-
+-	return ret;
+-}
+-
+ static const struct snd_soc_dapm_widget card_widgets[] = {
+ 	SND_SOC_DAPM_SPK("Left Speaker", NULL),
+ 	SND_SOC_DAPM_SPK("Right Speaker", NULL),
+ 	SND_SOC_DAPM_SPK("DP1", NULL),
+ 	SND_SOC_DAPM_SPK("DP2", NULL),
+-	SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, platform_clock_control,
+-			    SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+ };
+ 
+ static const struct snd_soc_dapm_route card_base_routes[] = {
+diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c
+index 3aea36c077c9d..f3bdeba284122 100644
+--- a/sound/soc/sof/intel/hda-ctrl.c
++++ b/sound/soc/sof/intel/hda-ctrl.c
+@@ -196,12 +196,15 @@ int hda_dsp_ctrl_init_chip(struct snd_sof_dev *sdev)
+ 		goto err;
+ 	}
+ 
++	usleep_range(500, 1000);
++
+ 	/* exit HDA controller reset */
+ 	ret = hda_dsp_ctrl_link_reset(sdev, false);
+ 	if (ret < 0) {
+ 		dev_err(sdev->dev, "error: failed to exit HDA controller reset\n");
+ 		goto err;
+ 	}
++	usleep_range(1000, 1200);
+ 
+ 	hda_codec_detect_mask(sdev);
+ 
+diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
+index b4eacae8564c8..6b2094f74c9c0 100644
+--- a/sound/soc/sof/intel/hda-dsp.c
++++ b/sound/soc/sof/intel/hda-dsp.c
+@@ -399,6 +399,12 @@ static int hda_dsp_update_d0i3c_register(struct snd_sof_dev *sdev, u8 value)
+ 	snd_sof_dsp_update8(sdev, HDA_DSP_HDA_BAR, chip->d0i3_offset,
+ 			    SOF_HDA_VS_D0I3C_I3, value);
+ 
++	/*
++	 * The value written to the D0I3C::I3 bit may not be taken into account immediately.
++	 * A delay is recommended before checking if D0I3C::CIP is cleared
++	 */
++	usleep_range(30, 40);
++
+ 	/* Wait for cmd in progress to be cleared before exiting the function */
+ 	ret = hda_dsp_wait_d0i3c_done(sdev);
+ 	if (ret < 0) {
+@@ -407,6 +413,12 @@ static int hda_dsp_update_d0i3c_register(struct snd_sof_dev *sdev, u8 value)
+ 	}
+ 
+ 	reg = snd_sof_dsp_read8(sdev, HDA_DSP_HDA_BAR, chip->d0i3_offset);
++	/* Confirm d0i3 state changed with paranoia check */
++	if ((reg ^ value) & SOF_HDA_VS_D0I3C_I3) {
++		dev_err(sdev->dev, "failed to update D0I3C!\n");
++		return -EIO;
++	}
++
+ 	trace_sof_intel_D0I3C_updated(sdev, reg);
+ 
+ 	return 0;
+diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c
+index 5b2b409752c58..8c22a00266c06 100644
+--- a/sound/soc/sof/intel/pci-tng.c
++++ b/sound/soc/sof/intel/pci-tng.c
+@@ -75,11 +75,7 @@ static int tangier_pci_probe(struct snd_sof_dev *sdev)
+ 
+ 	/* LPE base */
+ 	base = pci_resource_start(pci, desc->resindex_lpe_base) - IRAM_OFFSET;
+-	size = pci_resource_len(pci, desc->resindex_lpe_base);
+-	if (size < PCI_BAR_SIZE) {
+-		dev_err(sdev->dev, "error: I/O region is too small.\n");
+-		return -ENODEV;
+-	}
++	size = PCI_BAR_SIZE;
+ 
+ 	dev_dbg(sdev->dev, "LPE PHY base at 0x%x size 0x%x", base, size);
+ 	sdev->bar[DSP_BAR] = devm_ioremap(sdev->dev, base, size);
+diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c
+index 1fef4dcc09368..fde8af5a1f485 100644
+--- a/sound/soc/sof/ipc3.c
++++ b/sound/soc/sof/ipc3.c
+@@ -970,8 +970,9 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev)
+ 		return;
+ 	}
+ 
+-	if (hdr.size < sizeof(hdr)) {
+-		dev_err(sdev->dev, "The received message size is invalid\n");
++	if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) {
++		dev_err(sdev->dev, "The received message size is invalid: %u\n",
++			hdr.size);
+ 		return;
+ 	}
+ 
+diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c
+index 0d5a578c34962..7442ec1c5a4d4 100644
+--- a/sound/soc/sof/ipc4-control.c
++++ b/sound/soc/sof/ipc4-control.c
+@@ -84,7 +84,8 @@ sof_ipc4_set_volume_data(struct snd_sof_dev *sdev, struct snd_sof_widget *swidge
+ 		}
+ 
+ 		/* set curve type and duration from topology */
+-		data.curve_duration = gain->data.curve_duration;
++		data.curve_duration_l = gain->data.curve_duration_l;
++		data.curve_duration_h = gain->data.curve_duration_h;
+ 		data.curve_type = gain->data.curve_type;
+ 
+ 		msg->data_ptr = &data;
+diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
+index 59f4d42f9011e..6da6137fa2cbc 100644
+--- a/sound/soc/sof/ipc4-topology.c
++++ b/sound/soc/sof/ipc4-topology.c
+@@ -107,7 +107,7 @@ static const struct sof_topology_token gain_tokens[] = {
+ 		get_token_u32, offsetof(struct sof_ipc4_gain_data, curve_type)},
+ 	{SOF_TKN_GAIN_RAMP_DURATION,
+ 		SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
+-		offsetof(struct sof_ipc4_gain_data, curve_duration)},
++		offsetof(struct sof_ipc4_gain_data, curve_duration_l)},
+ 	{SOF_TKN_GAIN_VAL, SND_SOC_TPLG_TUPLE_TYPE_WORD,
+ 		get_token_u32, offsetof(struct sof_ipc4_gain_data, init_val)},
+ };
+@@ -155,7 +155,7 @@ static void sof_ipc4_dbg_audio_format(struct device *dev,
+ 	for (i = 0; i < num_format; i++, ptr = (u8 *)ptr + object_size) {
+ 		fmt = ptr;
+ 		dev_dbg(dev,
+-			" #%d: %uKHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x)\n",
++			" #%d: %uHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x)\n",
+ 			i, fmt->sampling_frequency, fmt->bit_depth, fmt->ch_map,
+ 			fmt->ch_cfg, fmt->interleaving_style, fmt->fmt_cfg);
+ 	}
+@@ -670,7 +670,7 @@ static int sof_ipc4_widget_setup_comp_pga(struct snd_sof_widget *swidget)
+ 
+ 	dev_dbg(scomp->dev,
+ 		"pga widget %s: ramp type: %d, ramp duration %d, initial gain value: %#x, cpc %d\n",
+-		swidget->widget->name, gain->data.curve_type, gain->data.curve_duration,
++		swidget->widget->name, gain->data.curve_type, gain->data.curve_duration_l,
+ 		gain->data.init_val, gain->base_config.cpc);
+ 
+ 	ret = sof_ipc4_widget_setup_msg(swidget, &gain->msg);
+diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h
+index 2363a7cc0b57d..cf9d278524572 100644
+--- a/sound/soc/sof/ipc4-topology.h
++++ b/sound/soc/sof/ipc4-topology.h
+@@ -217,14 +217,16 @@ struct sof_ipc4_control_data {
+  * @init_val: Initial value
+  * @curve_type: Curve type
+  * @reserved: reserved for future use
+- * @curve_duration: Curve duration
++ * @curve_duration_l: Curve duration low part
++ * @curve_duration_h: Curve duration high part
+  */
+ struct sof_ipc4_gain_data {
+ 	uint32_t channels;
+ 	uint32_t init_val;
+ 	uint32_t curve_type;
+ 	uint32_t reserved;
+-	uint32_t curve_duration;
++	uint32_t curve_duration_l;
++	uint32_t curve_duration_h;
+ } __aligned(8);
+ 
+ /**
+diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
+index 419302e2057e8..647fa054d8b1d 100644
+--- a/sound/usb/endpoint.c
++++ b/sound/usb/endpoint.c
+@@ -455,8 +455,8 @@ static void push_back_to_ready_list(struct snd_usb_endpoint *ep,
+  * This function is used both for implicit feedback endpoints and in low-
+  * latency playback mode.
+  */
+-void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
+-				       bool in_stream_lock)
++int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
++				      bool in_stream_lock)
+ {
+ 	bool implicit_fb = snd_usb_endpoint_implicit_feedback_sink(ep);
+ 
+@@ -480,7 +480,7 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
+ 		spin_unlock_irqrestore(&ep->lock, flags);
+ 
+ 		if (ctx == NULL)
+-			return;
++			break;
+ 
+ 		/* copy over the length information */
+ 		if (implicit_fb) {
+@@ -495,11 +495,14 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
+ 			break;
+ 		if (err < 0) {
+ 			/* push back to ready list again for -EAGAIN */
+-			if (err == -EAGAIN)
++			if (err == -EAGAIN) {
+ 				push_back_to_ready_list(ep, ctx);
+-			else
++				break;
++			}
++
++			if (!in_stream_lock)
+ 				notify_xrun(ep);
+-			return;
++			return -EPIPE;
+ 		}
+ 
+ 		err = usb_submit_urb(ctx->urb, GFP_ATOMIC);
+@@ -507,13 +510,16 @@ void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
+ 			usb_audio_err(ep->chip,
+ 				      "Unable to submit urb #%d: %d at %s\n",
+ 				      ctx->index, err, __func__);
+-			notify_xrun(ep);
+-			return;
++			if (!in_stream_lock)
++				notify_xrun(ep);
++			return -EPIPE;
+ 		}
+ 
+ 		set_bit(ctx->index, &ep->active_mask);
+ 		atomic_inc(&ep->submitted_urbs);
+ 	}
++
++	return 0;
+ }
+ 
+ /*
+diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h
+index 924f4351588ce..c09f68ce08b18 100644
+--- a/sound/usb/endpoint.h
++++ b/sound/usb/endpoint.h
+@@ -52,7 +52,7 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep);
+ int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep,
+ 				      struct snd_urb_ctx *ctx, int idx,
+ 				      unsigned int avail);
+-void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
+-				       bool in_stream_lock);
++int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
++				      bool in_stream_lock);
+ 
+ #endif /* __USBAUDIO_ENDPOINT_H */
+diff --git a/sound/usb/format.c b/sound/usb/format.c
+index 405dc0bf6678c..4b1c5ba121f39 100644
+--- a/sound/usb/format.c
++++ b/sound/usb/format.c
+@@ -39,8 +39,12 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip,
+ 	case UAC_VERSION_1:
+ 	default: {
+ 		struct uac_format_type_i_discrete_descriptor *fmt = _fmt;
+-		if (format >= 64)
+-			return 0; /* invalid format */
++		if (format >= 64) {
++			usb_audio_info(chip,
++				       "%u:%d: invalid format type 0x%llx is detected, processed as PCM\n",
++				       fp->iface, fp->altsetting, format);
++			format = UAC_FORMAT_TYPE_I_PCM;
++		}
+ 		sample_width = fmt->bBitResolution;
+ 		sample_bytes = fmt->bSubframeSize;
+ 		format = 1ULL << format;
+diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
+index d959da7a1afba..eec5232f9fb29 100644
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -1639,7 +1639,7 @@ static int snd_usb_pcm_playback_ack(struct snd_pcm_substream *substream)
+ 	 * outputs here
+ 	 */
+ 	if (!ep->active_mask)
+-		snd_usb_queue_pending_output_urbs(ep, true);
++		return snd_usb_queue_pending_output_urbs(ep, true);
+ 	return 0;
+ }
+ 
+diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
+index 69e80ee5f70e2..cfbec31e115cc 100644
+--- a/tools/lib/bpf/btf_dump.c
++++ b/tools/lib/bpf/btf_dump.c
+@@ -833,14 +833,9 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+ 				 const struct btf_type *t)
+ {
+ 	const struct btf_member *m;
+-	int align, i, bit_sz;
++	int max_align = 1, align, i, bit_sz;
+ 	__u16 vlen;
+ 
+-	align = btf__align_of(btf, id);
+-	/* size of a non-packed struct has to be a multiple of its alignment*/
+-	if (align && t->size % align)
+-		return true;
+-
+ 	m = btf_members(t);
+ 	vlen = btf_vlen(t);
+ 	/* all non-bitfield fields have to be naturally aligned */
+@@ -849,8 +844,11 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+ 		bit_sz = btf_member_bitfield_size(t, i);
+ 		if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
+ 			return true;
++		max_align = max(align, max_align);
+ 	}
+-
++	/* size of a non-packed struct has to be a multiple of its alignment */
++	if (t->size % max_align != 0)
++		return true;
+ 	/*
+ 	 * if original struct was marked as packed, but its layout is
+ 	 * naturally aligned, we'll detect that it's not packed
+@@ -858,44 +856,97 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+ 	return false;
+ }
+ 
+-static int chip_away_bits(int total, int at_most)
+-{
+-	return total % at_most ? : at_most;
+-}
+-
+ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
+-				      int cur_off, int m_off, int m_bit_sz,
+-				      int align, int lvl)
++				      int cur_off, int next_off, int next_align,
++				      bool in_bitfield, int lvl)
+ {
+-	int off_diff = m_off - cur_off;
+-	int ptr_bits = d->ptr_sz * 8;
++	const struct {
++		const char *name;
++		int bits;
++	} pads[] = {
++		{"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8}
++	};
++	int new_off, pad_bits, bits, i;
++	const char *pad_type;
++
++	if (cur_off >= next_off)
++		return; /* no gap */
++
++	/* For filling out padding we want to take advantage of
++	 * natural alignment rules to minimize unnecessary explicit
++	 * padding. First, we find the largest type (among long, int,
++	 * short, or char) that can be used to force naturally aligned
++	 * boundary. Once determined, we'll use such type to fill in
++	 * the remaining padding gap. In some cases we can rely on
++	 * compiler filling some gaps, but sometimes we need to force
++	 * alignment to close natural alignment with markers like
++	 * `long: 0` (this is always the case for bitfields).  Note
++	 * that even if struct itself has, let's say 4-byte alignment
++	 * (i.e., it only uses up to int-aligned types), using `long:
++	 * X;` explicit padding doesn't actually change struct's
++	 * overall alignment requirements, but compiler does take into
++	 * account that type's (long, in this example) natural
++	 * alignment requirements when adding implicit padding. We use
++	 * this fact heavily and don't worry about ruining correct
++	 * struct alignment requirement.
++	 */
++	for (i = 0; i < ARRAY_SIZE(pads); i++) {
++		pad_bits = pads[i].bits;
++		pad_type = pads[i].name;
+ 
+-	if (off_diff <= 0)
+-		/* no gap */
+-		return;
+-	if (m_bit_sz == 0 && off_diff < align * 8)
+-		/* natural padding will take care of a gap */
+-		return;
++		new_off = roundup(cur_off, pad_bits);
++		if (new_off <= next_off)
++			break;
++	}
+ 
+-	while (off_diff > 0) {
+-		const char *pad_type;
+-		int pad_bits;
+-
+-		if (ptr_bits > 32 && off_diff > 32) {
+-			pad_type = "long";
+-			pad_bits = chip_away_bits(off_diff, ptr_bits);
+-		} else if (off_diff > 16) {
+-			pad_type = "int";
+-			pad_bits = chip_away_bits(off_diff, 32);
+-		} else if (off_diff > 8) {
+-			pad_type = "short";
+-			pad_bits = chip_away_bits(off_diff, 16);
+-		} else {
+-			pad_type = "char";
+-			pad_bits = chip_away_bits(off_diff, 8);
++	if (new_off > cur_off && new_off <= next_off) {
++		/* We need explicit `<type>: 0` aligning mark if next
++		 * field is right on alignment offset and its
++		 * alignment requirement is less strict than <type>'s
++		 * alignment (so compiler won't naturally align to the
++		 * offset we expect), or if subsequent `<type>: X`,
++		 * will actually completely fit in the remaining hole,
++		 * making compiler basically ignore `<type>: X`
++		 * completely.
++		 */
++		if (in_bitfield ||
++		    (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) ||
++		    (new_off != next_off && next_off - new_off <= new_off - cur_off))
++			/* but for bitfields we'll emit explicit bit count */
++			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type,
++					in_bitfield ? new_off - cur_off : 0);
++		cur_off = new_off;
++	}
++
++	/* Now we know we start at naturally aligned offset for a chosen
++	 * padding type (long, int, short, or char), and so the rest is just
++	 * a straightforward filling of remaining padding gap with full
++	 * `<type>: sizeof(<type>);` markers, except for the last one, which
++	 * might need smaller than sizeof(<type>) padding.
++	 */
++	while (cur_off != next_off) {
++		bits = min(next_off - cur_off, pad_bits);
++		if (bits == pad_bits) {
++			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
++			cur_off += bits;
++			continue;
++		}
++		/* For the remainder padding that doesn't cover entire
++		 * pad_type bit length, we pick the smallest necessary type.
++		 * This is pure aesthetics, we could have just used `long`,
++		 * but having smallest necessary one communicates better the
++		 * scale of the padding gap.
++		 */
++		for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) {
++			pad_type = pads[i].name;
++			pad_bits = pads[i].bits;
++			if (pad_bits < bits)
++				continue;
++
++			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits);
++			cur_off += bits;
++			break;
+ 		}
+-		btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+-		off_diff -= pad_bits;
+ 	}
+ }
+ 
+@@ -915,9 +966,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
+ {
+ 	const struct btf_member *m = btf_members(t);
+ 	bool is_struct = btf_is_struct(t);
+-	int align, i, packed, off = 0;
++	bool packed, prev_bitfield = false;
++	int align, i, off = 0;
+ 	__u16 vlen = btf_vlen(t);
+ 
++	align = btf__align_of(d->btf, id);
+ 	packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
+ 
+ 	btf_dump_printf(d, "%s%s%s {",
+@@ -927,33 +980,36 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
+ 
+ 	for (i = 0; i < vlen; i++, m++) {
+ 		const char *fname;
+-		int m_off, m_sz;
++		int m_off, m_sz, m_align;
++		bool in_bitfield;
+ 
+ 		fname = btf_name_of(d, m->name_off);
+ 		m_sz = btf_member_bitfield_size(t, i);
+ 		m_off = btf_member_bit_offset(t, i);
+-		align = packed ? 1 : btf__align_of(d->btf, m->type);
++		m_align = packed ? 1 : btf__align_of(d->btf, m->type);
+ 
+-		btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
++		in_bitfield = prev_bitfield && m_sz != 0;
++
++		btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1);
+ 		btf_dump_printf(d, "\n%s", pfx(lvl + 1));
+ 		btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
+ 
+ 		if (m_sz) {
+ 			btf_dump_printf(d, ": %d", m_sz);
+ 			off = m_off + m_sz;
++			prev_bitfield = true;
+ 		} else {
+ 			m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
+ 			off = m_off + m_sz * 8;
++			prev_bitfield = false;
+ 		}
++
+ 		btf_dump_printf(d, ";");
+ 	}
+ 
+ 	/* pad at the end, if necessary */
+-	if (is_struct) {
+-		align = packed ? 1 : btf__align_of(d->btf, id);
+-		btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
+-					  lvl + 1);
+-	}
++	if (is_struct)
++		btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1);
+ 
+ 	/*
+ 	 * Keep `struct empty {}` on a single line,
+diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c
+index 52aa0351533c3..388c9e3ad0407 100644
+--- a/tools/power/acpi/tools/pfrut/pfrut.c
++++ b/tools/power/acpi/tools/pfrut/pfrut.c
+@@ -97,7 +97,7 @@ static struct option long_options[] = {
+ static void parse_options(int argc, char **argv)
+ {
+ 	int option_index = 0;
+-	char *pathname;
++	char *pathname, *endptr;
+ 	int opt;
+ 
+ 	pathname = strdup(argv[0]);
+@@ -125,11 +125,23 @@ static void parse_options(int argc, char **argv)
+ 			log_getinfo = 1;
+ 			break;
+ 		case 'T':
+-			log_type = atoi(optarg);
++			log_type = strtol(optarg, &endptr, 0);
++			if (*endptr || (log_type != 0 && log_type != 1)) {
++				printf("Number expected: type(0:execution, 1:history) - Quit.\n");
++				exit(1);
++			}
++
+ 			set_log_type = 1;
+ 			break;
+ 		case 'L':
+-			log_level = atoi(optarg);
++			log_level = strtol(optarg, &endptr, 0);
++			if (*endptr ||
++			    (log_level != 0 && log_level != 1 &&
++			     log_level != 2 && log_level != 4)) {
++				printf("Number expected: level(0, 1, 2, 4) - Quit.\n");
++				exit(1);
++			}
++
+ 			set_log_level = 1;
+ 			break;
+ 		case 'R':
+diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
+index c7b26a3603afe..3e1a4c4be001a 100644
+--- a/tools/power/x86/turbostat/turbostat.8
++++ b/tools/power/x86/turbostat/turbostat.8
+@@ -344,6 +344,8 @@ Alternatively, non-root users can be enabled to run turbostat this way:
+ 
+ # chmod +r /dev/cpu/*/msr
+ 
++# chmod +r /dev/cpu_dma_latency
++
+ .B "turbostat "
+ reads hardware counters, but doesn't write them.
+ So it will not interfere with the OS or other programs, including
+diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
+index aba460410dbd1..c61c6c704fbe6 100644
+--- a/tools/power/x86/turbostat/turbostat.c
++++ b/tools/power/x86/turbostat/turbostat.c
+@@ -4426,7 +4426,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+ 
+ 	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
+ 		"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
+-		cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-");
++		cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
+ 
+ 	return 0;
+ }
+@@ -5482,7 +5482,7 @@ void print_dev_latency(void)
+ 
+ 	retval = read(fd, (void *)&value, sizeof(int));
+ 	if (retval != sizeof(int)) {
+-		warn("read %s\n", path);
++		warn("read failed %s\n", path);
+ 		close(fd);
+ 		return;
+ 	}
+diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
+index e5560a6560309..e01690618e1ee 100644
+--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
+@@ -53,7 +53,7 @@ struct bitfields_only_mixed_types {
+  */
+ /* ------ END-EXPECTED-OUTPUT ------ */
+ struct bitfield_mixed_with_others {
+-	long: 4; /* char is enough as a backing field */
++	char: 4; /* char is enough as a backing field */
+ 	int a: 4;
+ 	/* 8-bit implicit padding */
+ 	short b; /* combined with previous bitfield */
+diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+index e304b6204bd9d..7998f27df7ddd 100644
+--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+@@ -58,7 +58,81 @@ union jump_code_union {
+ 	} __attribute__((packed));
+ };
+ 
+-/*------ END-EXPECTED-OUTPUT ------ */
++/* ----- START-EXPECTED-OUTPUT ----- */
++/*
++ *struct nested_packed_but_aligned_struct {
++ *	int x1;
++ *	int x2;
++ *};
++ *
++ *struct outer_implicitly_packed_struct {
++ *	char y1;
++ *	struct nested_packed_but_aligned_struct y2;
++ *} __attribute__((packed));
++ *
++ */
++/* ------ END-EXPECTED-OUTPUT ------ */
++
++struct nested_packed_but_aligned_struct {
++	int x1;
++	int x2;
++} __attribute__((packed));
++
++struct outer_implicitly_packed_struct {
++	char y1;
++	struct nested_packed_but_aligned_struct y2;
++};
++/* ----- START-EXPECTED-OUTPUT ----- */
++/*
++ *struct usb_ss_ep_comp_descriptor {
++ *	char: 8;
++ *	char bDescriptorType;
++ *	char bMaxBurst;
++ *	short wBytesPerInterval;
++ *};
++ *
++ *struct usb_host_endpoint {
++ *	long: 64;
++ *	char: 8;
++ *	struct usb_ss_ep_comp_descriptor ss_ep_comp;
++ *	long: 0;
++ *} __attribute__((packed));
++ *
++ */
++/* ------ END-EXPECTED-OUTPUT ------ */
++
++struct usb_ss_ep_comp_descriptor {
++	char: 8;
++	char bDescriptorType;
++	char bMaxBurst;
++	int: 0;
++	short wBytesPerInterval;
++} __attribute__((packed));
++
++struct usb_host_endpoint {
++	long: 64;
++	char: 8;
++	struct usb_ss_ep_comp_descriptor ss_ep_comp;
++	long: 0;
++};
++
++/* ----- START-EXPECTED-OUTPUT ----- */
++struct nested_packed_struct {
++	int a;
++	char b;
++} __attribute__((packed));
++
++struct outer_nonpacked_struct {
++	short a;
++	struct nested_packed_struct b;
++};
++
++struct outer_packed_struct {
++	short a;
++	struct nested_packed_struct b;
++} __attribute__((packed));
++
++/* ------ END-EXPECTED-OUTPUT ------ */
+ 
+ int f(struct {
+ 	struct packed_trailing_space _1;
+@@ -69,6 +143,10 @@ int f(struct {
+ 	union union_is_never_packed _6;
+ 	union union_does_not_need_packing _7;
+ 	union jump_code_union _8;
++	struct outer_implicitly_packed_struct _9;
++	struct usb_host_endpoint _10;
++	struct outer_nonpacked_struct _11;
++	struct outer_packed_struct _12;
+ } *_)
+ {
+ 	return 0;
+diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+index 7cb522d22a664..79276fbe454a8 100644
+--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
++++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+@@ -19,7 +19,7 @@ struct padded_implicitly {
+ /*
+  *struct padded_explicitly {
+  *	int a;
+- *	int: 32;
++ *	long: 0;
+  *	int b;
+  *};
+  *
+@@ -28,41 +28,28 @@ struct padded_implicitly {
+ 
+ struct padded_explicitly {
+ 	int a;
+-	int: 1; /* algo will explicitly pad with full 32 bits here */
++	int: 1; /* algo will emit aligning `long: 0;` here */
+ 	int b;
+ };
+ 
+ /* ----- START-EXPECTED-OUTPUT ----- */
+-/*
+- *struct padded_a_lot {
+- *	int a;
+- *	long: 32;
+- *	long: 64;
+- *	long: 64;
+- *	int b;
+- *};
+- *
+- */
+-/* ------ END-EXPECTED-OUTPUT ------ */
+-
+ struct padded_a_lot {
+ 	int a;
+-	/* 32 bit of implicit padding here, which algo will make explicit */
+ 	long: 64;
+ 	long: 64;
+ 	int b;
+ };
+ 
++/* ------ END-EXPECTED-OUTPUT ------ */
++
+ /* ----- START-EXPECTED-OUTPUT ----- */
+ /*
+  *struct padded_cache_line {
+  *	int a;
+- *	long: 32;
+  *	long: 64;
+  *	long: 64;
+  *	long: 64;
+  *	int b;
+- *	long: 32;
+  *	long: 64;
+  *	long: 64;
+  *	long: 64;
+@@ -85,7 +72,7 @@ struct padded_cache_line {
+  *struct zone {
+  *	int a;
+  *	short b;
+- *	short: 16;
++ *	long: 0;
+  *	struct zone_padding __pad__;
+  *};
+  *
+@@ -108,6 +95,131 @@ struct padding_wo_named_members {
+ 	long: 64;
+ };
+ 
++struct padding_weird_1 {
++	int a;
++	long: 64;
++	short: 16;
++	short b;
++};
++
++/* ------ END-EXPECTED-OUTPUT ------ */
++
++/* ----- START-EXPECTED-OUTPUT ----- */
++/*
++ *struct padding_weird_2 {
++ *	long: 56;
++ *	char a;
++ *	long: 56;
++ *	char b;
++ *	char: 8;
++ *};
++ *
++ */
++/* ------ END-EXPECTED-OUTPUT ------ */
++struct padding_weird_2 {
++	int: 32;	/* these paddings will be collapsed into `long: 56;` */
++	short: 16;
++	char: 8;
++	char a;
++	int: 32;	/* these paddings will be collapsed into `long: 56;` */
++	short: 16;
++	char: 8;
++	char b;
++	char: 8;
++};
++
++/* ----- START-EXPECTED-OUTPUT ----- */
++struct exact_1byte {
++	char x;
++};
++
++struct padded_1byte {
++	char: 8;
++};
++
++struct exact_2bytes {
++	short x;
++};
++
++struct padded_2bytes {
++	short: 16;
++};
++
++struct exact_4bytes {
++	int x;
++};
++
++struct padded_4bytes {
++	int: 32;
++};
++
++struct exact_8bytes {
++	long x;
++};
++
++struct padded_8bytes {
++	long: 64;
++};
++
++struct ff_periodic_effect {
++	int: 32;
++	short magnitude;
++	long: 0;
++	short phase;
++	long: 0;
++	int: 32;
++	int custom_len;
++	short *custom_data;
++};
++
++struct ib_wc {
++	long: 64;
++	long: 64;
++	int: 32;
++	int byte_len;
++	void *qp;
++	union {} ex;
++	long: 64;
++	int slid;
++	int wc_flags;
++	long: 64;
++	char smac[6];
++	long: 0;
++	char network_hdr_type;
++};
++
++struct acpi_object_method {
++	long: 64;
++	char: 8;
++	char type;
++	short reference_count;
++	char flags;
++	short: 0;
++	char: 8;
++	char sync_level;
++	long: 64;
++	void *node;
++	void *aml_start;
++	union {} dispatch;
++	long: 64;
++	int aml_length;
++};
++
++struct nested_unpacked {
++	int x;
++};
++
++struct nested_packed {
++	struct nested_unpacked a;
++	char c;
++} __attribute__((packed));
++
++struct outer_mixed_but_unpacked {
++	struct nested_packed b1;
++	short a1;
++	struct nested_packed b2;
++};
++
+ /* ------ END-EXPECTED-OUTPUT ------ */
+ 
+ int f(struct {
+@@ -117,6 +229,20 @@ int f(struct {
+ 	struct padded_cache_line _4;
+ 	struct zone _5;
+ 	struct padding_wo_named_members _6;
++	struct padding_weird_1 _7;
++	struct padding_weird_2 _8;
++	struct exact_1byte _100;
++	struct padded_1byte _101;
++	struct exact_2bytes _102;
++	struct padded_2bytes _103;
++	struct exact_4bytes _104;
++	struct padded_4bytes _105;
++	struct exact_8bytes _106;
++	struct padded_8bytes _107;
++	struct ff_periodic_effect _200;
++	struct ib_wc _201;
++	struct acpi_object_method _202;
++	struct outer_mixed_but_unpacked _203;
+ } *_)
+ {
+ 	return 0;


             reply	other threads:[~2023-04-06 10:40 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-06 10:40 Alice Ferrazzi [this message]
  -- strict thread matches above, loose matches on Subject: below --
2023-05-17 13:17 [gentoo-commits] proj/linux-patches:6.2 commit in: / Mike Pagano
2023-05-11 16:11 Mike Pagano
2023-05-11 14:48 Mike Pagano
2023-05-10 17:52 Mike Pagano
2023-05-10 16:08 Mike Pagano
2023-04-30 23:50 Alice Ferrazzi
2023-04-26 13:21 Mike Pagano
2023-04-20 11:15 Alice Ferrazzi
2023-04-13 16:08 Mike Pagano
2023-03-30 21:52 Mike Pagano
2023-03-30 11:20 Alice Ferrazzi
2023-03-29 23:09 Mike Pagano
2023-03-22 16:10 Alice Ferrazzi
2023-03-22 12:44 Mike Pagano
2023-03-21 13:32 Mike Pagano
2023-03-17 10:42 Mike Pagano
2023-03-13 11:30 Alice Ferrazzi
2023-03-11 14:08 Mike Pagano
2023-03-11 11:19 Mike Pagano
2023-03-10 12:37 Mike Pagano
2023-03-03 13:02 Mike Pagano
2023-03-03 12:27 Mike Pagano
2023-02-27 18:45 Mike Pagano
2023-02-27  3:48 [gentoo-commits] proj/linux-patches:6.2-2 " Alice Ferrazzi
2023-02-25 11:14 ` [gentoo-commits] proj/linux-patches:6.2 " Alice Ferrazzi
2023-02-26 17:30 Mike Pagano
2023-02-26 17:26 Mike Pagano
2023-02-25 11:02 Alice Ferrazzi
2023-02-19 22:41 Mike Pagano
2023-02-19 22:39 Mike Pagano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1680777626.552322bbd8665a864a089b06ed41c97e413562b9.alicef@gentoo \
    --to=alicef@gentoo.org \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox