* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-07-01 0:32 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-07-01 0:32 UTC (permalink / raw
To: gentoo-commits
commit: 67931769cae3714473bee3739ebc04d917bfd909
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Fri Jul 1 00:32:44 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Fri Jul 1 00:32:44 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=67931769
Update gcc 4.9+ optimization patch. Bug #587578
...-additional-cpu-optimizations-for-gcc-4.9.patch | 90 ++++++++++++++--------
1 file changed, 57 insertions(+), 33 deletions(-)
diff --git a/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
index 418201d..d9729b2 100644
--- a/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
+++ b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
@@ -21,11 +21,12 @@ bug report to see if I'm right: https://bugzilla.kernel.org/show_bug.cgi?id=7746
This patch will expand the number of microarchitectures to include newer
processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
-Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 1.5 Gen Core
-i3/i5/i7 (Westmere), Intel 2nd Gen Core i3/i5/i7 (Sandybridge), Intel 3rd Gen
-Core i3/i5/i7 (Ivybridge), Intel 4th Gen Core i3/i5/i7 (Haswell), Intel 5th
-Gen Core i3/i5/i7 (Broadwell), and the low power Silvermont series of Atom
-processors (Silvermont). It also offers the compiler the 'native' flag.
+Family 15h (Steamroller), Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7
+(Nehalem), Intel 1.5 Gen Core i3/i5/i7 (Westmere), Intel 2nd Gen Core i3/i5/i7
+(Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), Intel 4th Gen Core
+i3/i5/i7 (Haswell), Intel 5th Gen Core i3/i5/i7 (Broadwell), and the low power
+Silvermont series of Atom processors (Silvermont). It also offers the compiler
+the 'native' flag.
Small but real speed increases are measurable using a make endpoint comparing
a generic kernel to one built with one of the respective microarchs.
@@ -37,9 +38,9 @@ REQUIREMENTS
linux version >=3.15
gcc version >=4.9
---- a/arch/x86/include/asm/module.h 2014-06-16 16:44:27.000000000 -0400
-+++ b/arch/x86/include/asm/module.h 2015-03-07 03:27:32.556672424 -0500
-@@ -15,6 +15,22 @@
+--- a/arch/x86/include/asm/module.h 2015-08-30 14:34:09.000000000 -0400
++++ b/arch/x86/include/asm/module.h 2015-11-06 14:18:24.234941036 -0500
+@@ -15,6 +15,24 @@
#define MODULE_PROC_FAMILY "586MMX "
#elif defined CONFIG_MCORE2
#define MODULE_PROC_FAMILY "CORE2 "
@@ -59,10 +60,12 @@ gcc version >=4.9
+#define MODULE_PROC_FAMILY "HASWELL "
+#elif defined CONFIG_MBROADWELL
+#define MODULE_PROC_FAMILY "BROADWELL "
++#elif defined CONFIG_MSKYLAKE
++#define MODULE_PROC_FAMILY "SKYLAKE "
#elif defined CONFIG_MATOM
#define MODULE_PROC_FAMILY "ATOM "
#elif defined CONFIG_M686
-@@ -33,6 +49,20 @@
+@@ -33,6 +51,22 @@
#define MODULE_PROC_FAMILY "K7 "
#elif defined CONFIG_MK8
#define MODULE_PROC_FAMILY "K8 "
@@ -77,14 +80,16 @@ gcc version >=4.9
+#elif defined CONFIG_MBULLDOZER
+#define MODULE_PROC_FAMILY "BULLDOZER "
+#elif defined CONFIG_MPILEDRIVER
++#define MODULE_PROC_FAMILY "STEAMROLLER "
++#elif defined CONFIG_MSTEAMROLLER
+#define MODULE_PROC_FAMILY "PILEDRIVER "
+#elif defined CONFIG_MJAGUAR
+#define MODULE_PROC_FAMILY "JAGUAR "
#elif defined CONFIG_MELAN
#define MODULE_PROC_FAMILY "ELAN "
#elif defined CONFIG_MCRUSOE
---- a/arch/x86/Kconfig.cpu 2014-06-16 16:44:27.000000000 -0400
-+++ b/arch/x86/Kconfig.cpu 2015-03-07 03:32:14.337713226 -0500
+--- a/arch/x86/Kconfig.cpu 2015-08-30 14:34:09.000000000 -0400
++++ b/arch/x86/Kconfig.cpu 2015-11-06 14:20:14.948369244 -0500
@@ -137,9 +137,8 @@ config MPENTIUM4
-Paxville
-Dempsey
@@ -105,7 +110,7 @@ gcc version >=4.9
depends on X86_32
---help---
Select this for an AMD Athlon K7-family processor. Enables use of
-@@ -155,12 +154,62 @@ config MK7
+@@ -155,12 +154,69 @@ config MK7
flags to GCC.
config MK8
@@ -159,6 +164,13 @@ gcc version >=4.9
+
+ Enables -march=bdver2
+
++config MSTEAMROLLER
++ bool "AMD Steamroller"
++ ---help---
++ Select this for AMD Steamroller processors.
++
++ Enables -march=bdver3
++
+config MJAGUAR
+ bool "AMD Jaguar"
+ ---help---
@@ -169,7 +181,7 @@ gcc version >=4.9
config MCRUSOE
bool "Crusoe"
depends on X86_32
-@@ -251,8 +300,17 @@ config MPSC
+@@ -251,8 +307,17 @@ config MPSC
using the cpu family field
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
@@ -188,7 +200,7 @@ gcc version >=4.9
---help---
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
-@@ -260,14 +318,63 @@ config MCORE2
+@@ -260,14 +325,71 @@ config MCORE2
family in /proc/cpuinfo. Newer ones have 6 and older ones 15
(not a typo)
@@ -255,10 +267,18 @@ gcc version >=4.9
+ Select this for 5th Gen Core processors in the Broadwell family.
+
+ Enables -march=broadwell
++
++config MSKYLAKE
++ bool "Intel Skylake"
++ ---help---
++
++ Select this for 6th Gen Core processors in the Skylake family.
++
++ Enables -march=skylake
config GENERIC_CPU
bool "Generic-x86-64"
-@@ -276,6 +383,19 @@ config GENERIC_CPU
+@@ -276,6 +398,19 @@ config GENERIC_CPU
Generic x86-64 CPU.
Run equally well on all x86-64 CPUs.
@@ -278,54 +298,54 @@ gcc version >=4.9
endchoice
config X86_GENERIC
-@@ -300,7 +420,7 @@ config X86_INTERNODE_CACHE_SHIFT
+@@ -300,7 +435,7 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-+ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
++ default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
default "4" if MELAN || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-@@ -331,11 +451,11 @@ config X86_ALIGNMENT_16
+@@ -331,11 +466,11 @@ config X86_ALIGNMENT_16
config X86_INTEL_USERCOPY
def_bool y
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
-+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE
++ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE
config X86_USE_PPRO_CHECKSUM
def_bool y
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
-+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MATOM || MNATIVE
++ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MATOM || MNATIVE
config X86_USE_3DNOW
def_bool y
-@@ -359,17 +479,17 @@ config X86_P6_NOP
+@@ -359,17 +494,17 @@ config X86_P6_NOP
config X86_TSC
def_bool y
- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
-+ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE || MATOM) || X86_64
++ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE || MATOM) || X86_64
config X86_CMPXCHG64
def_bool y
- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
-+ depends on X86_PAE || X86_64 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
++ depends on X86_PAE || X86_64 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
# this should be set for all -march=.. options where the compiler
# generates cmov.
config X86_CMOV
def_bool y
- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
-+ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
++ depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
config X86_MINIMUM_CPU_FAMILY
int
---- a/arch/x86/Makefile 2014-06-16 16:44:27.000000000 -0400
-+++ b/arch/x86/Makefile 2015-03-07 03:33:27.650843211 -0500
-@@ -92,13 +92,35 @@ else
- KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
+--- a/arch/x86/Makefile 2015-08-30 14:34:09.000000000 -0400
++++ b/arch/x86/Makefile 2015-11-06 14:21:05.708983344 -0500
+@@ -94,13 +94,38 @@ else
+ KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
+ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
@@ -336,6 +356,7 @@ gcc version >=4.9
+ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
+ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
+ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
++ cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3)
+ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
@@ -358,14 +379,16 @@ gcc version >=4.9
+ $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell))
+ cflags-$(CONFIG_MBROADWELL) += \
+ $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell))
++ cflags-$(CONFIG_MSKYLAKE) += \
++ $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake))
+ cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \
+ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
KBUILD_CFLAGS += $(cflags-y)
---- a/arch/x86/Makefile_32.cpu 2014-06-16 16:44:27.000000000 -0400
-+++ b/arch/x86/Makefile_32.cpu 2015-03-07 03:34:15.203586024 -0500
-@@ -23,7 +23,15 @@ cflags-$(CONFIG_MK6) += -march=k6
+--- a/arch/x86/Makefile_32.cpu 2015-08-30 14:34:09.000000000 -0400
++++ b/arch/x86/Makefile_32.cpu 2015-11-06 14:21:43.604429077 -0500
+@@ -23,7 +23,16 @@ cflags-$(CONFIG_MK6) += -march=k6
# Please note, that patches that add -march=athlon-xp and friends are pointless.
# They make zero difference whatsosever to performance at this time.
cflags-$(CONFIG_MK7) += -march=athlon
@@ -377,11 +400,12 @@ gcc version >=4.9
+cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon)
+cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon)
+cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon)
++cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon)
+cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
-@@ -32,8 +40,15 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
+@@ -32,8 +41,16 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
cflags-$(CONFIG_MVIAC7) += -march=i686
cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
@@ -394,9 +418,9 @@ gcc version >=4.9
+cflags-$(CONFIG_MIVYBRIDGE) += -march=i686 $(call tune,ivybridge)
+cflags-$(CONFIG_MHASWELL) += -march=i686 $(call tune,haswell)
+cflags-$(CONFIG_MBROADWELL) += -march=i686 $(call tune,broadwell)
++cflags-$(CONFIG_MSKYLAKE) += -march=i686 $(call tune,skylake)
+cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \
+ $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
# AMD Elan support
cflags-$(CONFIG_MELAN) += -march=i486
-
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-07-02 15:24 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-07-02 15:24 UTC (permalink / raw
To: gentoo-commits
commit: 29c01f454805f39d9d02a91f785a67b6593bf914
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sat Jul 2 15:24:02 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sat Jul 2 15:24:02 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=29c01f45
Select SYSVIPC when GENTOO_LINUX_PORTAGE is selected. Dependency of IPC_NS. See bug #587736.
4567_distro-Gentoo-Kconfig.patch | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/4567_distro-Gentoo-Kconfig.patch b/4567_distro-Gentoo-Kconfig.patch
index c7af596..499b21f 100644
--- a/4567_distro-Gentoo-Kconfig.patch
+++ b/4567_distro-Gentoo-Kconfig.patch
@@ -1,5 +1,5 @@
---- a/Kconfig
-+++ b/Kconfig
+--- a/Kconfig 2016-07-01 19:22:17.117439707 -0400
++++ b/Kconfig 2016-07-01 19:21:54.371440596 -0400
@@ -8,4 +8,6 @@ config SRCARCH
string
option env="SRCARCH"
@@ -7,9 +7,9 @@
+source "distro/Kconfig"
+
source "arch/$SRCARCH/Kconfig"
---- /dev/null
-+++ b/distro/Kconfig
-@@ -0,0 +1,131 @@
+--- /dev/null 2016-07-01 11:23:26.087932647 -0400
++++ b/distro/Kconfig 2016-07-01 19:32:35.581415519 -0400
+@@ -0,0 +1,134 @@
+menu "Gentoo Linux"
+
+config GENTOO_LINUX
@@ -63,6 +63,7 @@
+ select NAMESPACES
+ select IPC_NS
+ select NET_NS
++ select SYSVIPC
+
+ help
+ This enables options required by various Portage FEATURES.
@@ -71,6 +72,8 @@
+ CGROUPS (required for FEATURES=cgroup)
+ IPC_NS (required for FEATURES=ipc-sandbox)
+ NET_NS (required for FEATURES=network-sandbox)
++ SYSVIPC (required by IPC_NS)
++
+
+ It is highly recommended that you leave this enabled as these FEATURES
+ are, or will soon be, enabled by default.
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-07-25 0:25 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-07-25 0:25 UTC (permalink / raw
To: gentoo-commits
commit: d918813821549a0562d38158eca9254c4251fd74
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Mon Jul 25 00:25:25 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Mon Jul 25 00:25:25 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=d9188138
Removal of incompatible patch for thinkpad brightness control.
2700_ThinkPad-30-brightness-control-fix.patch | 67 ---------------------------
1 file changed, 67 deletions(-)
diff --git a/2700_ThinkPad-30-brightness-control-fix.patch b/2700_ThinkPad-30-brightness-control-fix.patch
deleted file mode 100644
index b548c6d..0000000
--- a/2700_ThinkPad-30-brightness-control-fix.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
-index cb96296..6c242ed 100644
---- a/drivers/acpi/blacklist.c
-+++ b/drivers/acpi/blacklist.c
-@@ -269,6 +276,61 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
- },
-
- /*
-+ * The following Lenovo models have a broken workaround in the
-+ * acpi_video backlight implementation to meet the Windows 8
-+ * requirement of 101 backlight levels. Reverting to pre-Win8
-+ * behavior fixes the problem.
-+ */
-+ {
-+ .callback = dmi_disable_osi_win8,
-+ .ident = "Lenovo ThinkPad L430",
-+ .matches = {
-+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L430"),
-+ },
-+ },
-+ {
-+ .callback = dmi_disable_osi_win8,
-+ .ident = "Lenovo ThinkPad T430s",
-+ .matches = {
-+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T430s"),
-+ },
-+ },
-+ {
-+ .callback = dmi_disable_osi_win8,
-+ .ident = "Lenovo ThinkPad T530",
-+ .matches = {
-+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T530"),
-+ },
-+ },
-+ {
-+ .callback = dmi_disable_osi_win8,
-+ .ident = "Lenovo ThinkPad W530",
-+ .matches = {
-+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W530"),
-+ },
-+ },
-+ {
-+ .callback = dmi_disable_osi_win8,
-+ .ident = "Lenovo ThinkPad X1 Carbon",
-+ .matches = {
-+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X1 Carbon"),
-+ },
-+ },
-+ {
-+ .callback = dmi_disable_osi_win8,
-+ .ident = "Lenovo ThinkPad X230",
-+ .matches = {
-+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X230"),
-+ },
-+ },
-+
-+ /*
- * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
- * Linux ignores it, except for the machines enumerated below.
- */
-
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-07-27 16:21 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-07-27 16:21 UTC (permalink / raw
To: gentoo-commits
commit: cde941178e64137a1b2757b279aaf56f70ede934
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Wed Jul 27 16:21:13 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Wed Jul 27 16:21:13 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=cde94117
Remove reference to thinkpad patch.
0000_README | 4 ----
1 file changed, 4 deletions(-)
diff --git a/0000_README b/0000_README
index 8e70e78..0530209 100644
--- a/0000_README
+++ b/0000_README
@@ -51,10 +51,6 @@ Patch: 1510_fs-enable-link-security-restrictions-by-default.patch
From: http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch/
Desc: Enable link security restrictions by default.
-Patch: 2700_ThinkPad-30-brightness-control-fix.patch
-From: Seth Forshee <seth.forshee@canonical.com>
-Desc: ACPI: Disable Windows 8 compatibility for some Lenovo ThinkPads.
-
Patch: 2900_dev-root-proc-mount-fix.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=438380
Desc: Ensure that /dev/root doesn't appear in /proc/mounts when bootint without an initramfs.
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-07-28 0:03 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-07-28 0:03 UTC (permalink / raw
To: gentoo-commits
commit: 543cfadc9443b9cfdbfea73dfcd2b7eb82dec66e
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Thu Jul 28 00:03:33 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Thu Jul 28 00:03:33 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=543cfadc
BFQ patches for 4.7. See http://algogroup.unimore.it/people/paolo/disk_sched/patches/4.7.0-v8/
0000_README | 16 +
...oups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch | 103 +
...ntroduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1 | 7097 ++++++++++++++++++++
...arly-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch | 1101 +++
...rn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1 | 6361 ++++++++++++++++++
5 files changed, 14678 insertions(+)
diff --git a/0000_README b/0000_README
index 0530209..1b5179e 100644
--- a/0000_README
+++ b/0000_README
@@ -67,6 +67,22 @@ Patch: 5000_enable-additional-cpu-optimizations-for-gcc.patch
From: https://github.com/graysky2/kernel_gcc_patch/
Desc: Kernel patch enables gcc < v4.9 optimizations for additional CPUs.
+Patch: 5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch
+From: http://algo.ing.unimo.it/people/paolo/disk_sched/
+Desc: BFQ v7r11 patch 1 for 4.7: Build, cgroups and kconfig bits
+
+Patch: 5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1
+From: http://algo.ing.unimo.it/people/paolo/disk_sched/
+Desc: BFQ v7r11 patch 2 for 4.7: BFQ Scheduler
+
+Patch: 5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch
+From: http://algo.ing.unimo.it/people/paolo/disk_sched/
+Desc: BFQ v7r11 patch 3 for 4.7: Early Queue Merge (EQM)
+
+Patch: 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2
+From: http://algo.ing.unimo.it/people/paolo/disk_sched/
+Desc: BFQ v7r11 patch 4 for 4.7: Early Queue Merge (EQM)
+
Patch: 5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
From: https://github.com/graysky2/kernel_gcc_patch/
Desc: Kernel patch enables gcc >= v4.9 optimizations for additional CPUs.
diff --git a/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch b/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch
new file mode 100644
index 0000000..45d0b07
--- /dev/null
+++ b/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch
@@ -0,0 +1,103 @@
+From feb58b4dd1e8fd895f28ba4c759e92febe316cb2 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@unimore.it>
+Date: Tue, 7 Apr 2015 13:39:12 +0200
+Subject: [PATCH 1/4] block: cgroups, kconfig, build bits for BFQ-v7r11-4.7.0
+
+Update Kconfig.iosched and do the related Makefile changes to include
+kernel configuration options for BFQ. Also increase the number of
+policies supported by the blkio controller so that BFQ can add its
+own.
+
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini@google.com>
+---
+ block/Kconfig.iosched | 32 ++++++++++++++++++++++++++++++++
+ block/Makefile | 1 +
+ include/linux/blkdev.h | 2 +-
+ 3 files changed, 34 insertions(+), 1 deletion(-)
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 421bef9..0ee5f0f 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED
+ ---help---
+ Enable group IO scheduling in CFQ.
+
++config IOSCHED_BFQ
++ tristate "BFQ I/O scheduler"
++ default n
++ ---help---
++ The BFQ I/O scheduler tries to distribute bandwidth among
++ all processes according to their weights.
++ It aims at distributing the bandwidth as desired, independently of
++ the disk parameters and with any workload. It also tries to
++ guarantee low latency to interactive and soft real-time
++ applications. If compiled built-in (saying Y here), BFQ can
++ be configured to support hierarchical scheduling.
++
++config CGROUP_BFQIO
++ bool "BFQ hierarchical scheduling support"
++ depends on CGROUPS && IOSCHED_BFQ=y
++ default n
++ ---help---
++ Enable hierarchical scheduling in BFQ, using the cgroups
++ filesystem interface. The name of the subsystem will be
++ bfqio.
++
+ choice
+ prompt "Default I/O scheduler"
+ default DEFAULT_CFQ
+@@ -52,6 +73,16 @@ choice
+ config DEFAULT_CFQ
+ bool "CFQ" if IOSCHED_CFQ=y
+
++ config DEFAULT_BFQ
++ bool "BFQ" if IOSCHED_BFQ=y
++ help
++ Selects BFQ as the default I/O scheduler which will be
++ used by default for all block devices.
++ The BFQ I/O scheduler aims at distributing the bandwidth
++ as desired, independently of the disk parameters and with
++ any workload. It also tries to guarantee low latency to
++ interactive and soft real-time applications.
++
+ config DEFAULT_NOOP
+ bool "No-op"
+
+@@ -61,6 +92,7 @@ config DEFAULT_IOSCHED
+ string
+ default "deadline" if DEFAULT_DEADLINE
+ default "cfq" if DEFAULT_CFQ
++ default "bfq" if DEFAULT_BFQ
+ default "noop" if DEFAULT_NOOP
+
+ endmenu
+diff --git a/block/Makefile b/block/Makefile
+index 9eda232..4a36683 100644
+--- a/block/Makefile
++++ b/block/Makefile
+@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
+ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
+ obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
+ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
++obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o
+
+ obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
+ obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 3d9cf32..8d862a0 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -45,7 +45,7 @@ struct pr_ops;
+ * Maximum number of blkcg policies allowed to be registered concurrently.
+ * Defined here to simplify include dependency.
+ */
+-#define BLKCG_MAX_POLS 2
++#define BLKCG_MAX_POLS 3
+
+ struct request;
+ typedef void (rq_end_io_fn)(struct request *, int);
+--
+1.9.1
+
diff --git a/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1 b/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1
new file mode 100644
index 0000000..8a67a4b
--- /dev/null
+++ b/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1
@@ -0,0 +1,7097 @@
+From 1f07b3f666e6da78d10e62cfb9696242e5b3005e Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@unimore.it>
+Date: Thu, 9 May 2013 19:10:02 +0200
+Subject: [PATCH 2/4] block: introduce the BFQ-v7r11 I/O sched for 4.7.0
+
+The general structure is borrowed from CFQ, as much of the code for
+handling I/O contexts. Over time, several useful features have been
+ported from CFQ as well (details in the changelog in README.BFQ). A
+(bfq_)queue is associated to each task doing I/O on a device, and each
+time a scheduling decision has to be made a queue is selected and served
+until it expires.
+
+ - Slices are given in the service domain: tasks are assigned
+ budgets, measured in number of sectors. Once got the disk, a task
+ must however consume its assigned budget within a configurable
+ maximum time (by default, the maximum possible value of the
+ budgets is automatically computed to comply with this timeout).
+ This allows the desired latency vs "throughput boosting" tradeoff
+ to be set.
+
+ - Budgets are scheduled according to a variant of WF2Q+, implemented
+ using an augmented rb-tree to take eligibility into account while
+ preserving an O(log N) overall complexity.
+
+ - A low-latency tunable is provided; if enabled, both interactive
+ and soft real-time applications are guaranteed a very low latency.
+
+ - Latency guarantees are preserved also in the presence of NCQ.
+
+ - Also with flash-based devices, a high throughput is achieved
+ while still preserving latency guarantees.
+
+ - BFQ features Early Queue Merge (EQM), a sort of fusion of the
+ cooperating-queue-merging and the preemption mechanisms present
+ in CFQ. EQM is in fact a unified mechanism that tries to get a
+ sequential read pattern, and hence a high throughput, with any
+ set of processes performing interleaved I/O over a contiguous
+ sequence of sectors.
+
+ - BFQ supports full hierarchical scheduling, exporting a cgroups
+ interface. Since each node has a full scheduler, each group can
+ be assigned its own weight.
+
+ - If the cgroups interface is not used, only I/O priorities can be
+ assigned to processes, with ioprio values mapped to weights
+ with the relation weight = IOPRIO_BE_NR - ioprio.
+
+ - ioprio classes are served in strict priority order, i.e., lower
+ priority queues are not served as long as there are higher
+ priority queues. Among queues in the same class the bandwidth is
+ distributed in proportion to the weight of each queue. A very
+ thin extra bandwidth is however guaranteed to the Idle class, to
+ prevent it from starving.
+
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini@google.com>
+---
+ block/Kconfig.iosched | 6 +-
+ block/bfq-cgroup.c | 1182 ++++++++++++++++
+ block/bfq-ioc.c | 36 +
+ block/bfq-iosched.c | 3754 +++++++++++++++++++++++++++++++++++++++++++++++++
+ block/bfq-sched.c | 1200 ++++++++++++++++
+ block/bfq.h | 801 +++++++++++
+ 6 files changed, 6975 insertions(+), 4 deletions(-)
+ create mode 100644 block/bfq-cgroup.c
+ create mode 100644 block/bfq-ioc.c
+ create mode 100644 block/bfq-iosched.c
+ create mode 100644 block/bfq-sched.c
+ create mode 100644 block/bfq.h
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 0ee5f0f..f78cd1a 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -51,14 +51,12 @@ config IOSCHED_BFQ
+ applications. If compiled built-in (saying Y here), BFQ can
+ be configured to support hierarchical scheduling.
+
+-config CGROUP_BFQIO
++config BFQ_GROUP_IOSCHED
+ bool "BFQ hierarchical scheduling support"
+ depends on CGROUPS && IOSCHED_BFQ=y
+ default n
+ ---help---
+- Enable hierarchical scheduling in BFQ, using the cgroups
+- filesystem interface. The name of the subsystem will be
+- bfqio.
++ Enable hierarchical scheduling in BFQ, using the blkio controller.
+
+ choice
+ prompt "Default I/O scheduler"
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+new file mode 100644
+index 0000000..8610cd6
+--- /dev/null
++++ b/block/bfq-cgroup.c
+@@ -0,0 +1,1182 @@
++/*
++ * BFQ: CGROUPS support.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ * Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
++ * file.
++ */
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++
++/* bfqg stats flags */
++enum bfqg_stats_flags {
++ BFQG_stats_waiting = 0,
++ BFQG_stats_idling,
++ BFQG_stats_empty,
++};
++
++#define BFQG_FLAG_FNS(name) \
++static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \
++{ \
++ stats->flags |= (1 << BFQG_stats_##name); \
++} \
++static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \
++{ \
++ stats->flags &= ~(1 << BFQG_stats_##name); \
++} \
++static int bfqg_stats_##name(struct bfqg_stats *stats) \
++{ \
++ return (stats->flags & (1 << BFQG_stats_##name)) != 0; \
++} \
++
++BFQG_FLAG_FNS(waiting)
++BFQG_FLAG_FNS(idling)
++BFQG_FLAG_FNS(empty)
++#undef BFQG_FLAG_FNS
++
++/* This should be called with the queue_lock held. */
++static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
++{
++ unsigned long long now;
++
++ if (!bfqg_stats_waiting(stats))
++ return;
++
++ now = sched_clock();
++ if (time_after64(now, stats->start_group_wait_time))
++ blkg_stat_add(&stats->group_wait_time,
++ now - stats->start_group_wait_time);
++ bfqg_stats_clear_waiting(stats);
++}
++
++/* This should be called with the queue_lock held. */
++static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
++ struct bfq_group *curr_bfqg)
++{
++ struct bfqg_stats *stats = &bfqg->stats;
++
++ if (bfqg_stats_waiting(stats))
++ return;
++ if (bfqg == curr_bfqg)
++ return;
++ stats->start_group_wait_time = sched_clock();
++ bfqg_stats_mark_waiting(stats);
++}
++
++/* This should be called with the queue_lock held. */
++static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
++{
++ unsigned long long now;
++
++ if (!bfqg_stats_empty(stats))
++ return;
++
++ now = sched_clock();
++ if (time_after64(now, stats->start_empty_time))
++ blkg_stat_add(&stats->empty_time,
++ now - stats->start_empty_time);
++ bfqg_stats_clear_empty(stats);
++}
++
++static void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
++{
++ blkg_stat_add(&bfqg->stats.dequeue, 1);
++}
++
++static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
++{
++ struct bfqg_stats *stats = &bfqg->stats;
++
++ if (blkg_rwstat_total(&stats->queued))
++ return;
++
++ /*
++ * group is already marked empty. This can happen if bfqq got new
++ * request in parent group and moved to this group while being added
++ * to service tree. Just ignore the event and move on.
++ */
++ if (bfqg_stats_empty(stats))
++ return;
++
++ stats->start_empty_time = sched_clock();
++ bfqg_stats_mark_empty(stats);
++}
++
++static void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
++{
++ struct bfqg_stats *stats = &bfqg->stats;
++
++ if (bfqg_stats_idling(stats)) {
++ unsigned long long now = sched_clock();
++
++ if (time_after64(now, stats->start_idle_time))
++ blkg_stat_add(&stats->idle_time,
++ now - stats->start_idle_time);
++ bfqg_stats_clear_idling(stats);
++ }
++}
++
++static void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
++{
++ struct bfqg_stats *stats = &bfqg->stats;
++
++ stats->start_idle_time = sched_clock();
++ bfqg_stats_mark_idling(stats);
++}
++
++static void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
++{
++ struct bfqg_stats *stats = &bfqg->stats;
++
++ blkg_stat_add(&stats->avg_queue_size_sum,
++ blkg_rwstat_total(&stats->queued));
++ blkg_stat_add(&stats->avg_queue_size_samples, 1);
++ bfqg_stats_update_group_wait_time(stats);
++}
++
++static struct blkcg_policy blkcg_policy_bfq;
++
++/*
++ * blk-cgroup policy-related handlers
++ * The following functions help in converting between blk-cgroup
++ * internal structures and BFQ-specific structures.
++ */
++
++static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
++{
++ return pd ? container_of(pd, struct bfq_group, pd) : NULL;
++}
++
++static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
++{
++ return pd_to_blkg(&bfqg->pd);
++}
++
++static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
++{
++ struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq);
++ BUG_ON(!pd);
++ return pd_to_bfqg(pd);
++}
++
++/*
++ * bfq_group handlers
++ * The following functions help in navigating the bfq_group hierarchy
++ * by allowing to find the parent of a bfq_group or the bfq_group
++ * associated to a bfq_queue.
++ */
++
++static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
++{
++ struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
++
++ return pblkg ? blkg_to_bfqg(pblkg) : NULL;
++}
++
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
++{
++ struct bfq_entity *group_entity = bfqq->entity.parent;
++
++ return group_entity ? container_of(group_entity, struct bfq_group,
++ entity) :
++ bfqq->bfqd->root_group;
++}
++
++/*
++ * The following two functions handle get and put of a bfq_group by
++ * wrapping the related blk-cgroup hooks.
++ */
++
++static void bfqg_get(struct bfq_group *bfqg)
++{
++ return blkg_get(bfqg_to_blkg(bfqg));
++}
++
++static void bfqg_put(struct bfq_group *bfqg)
++{
++ return blkg_put(bfqg_to_blkg(bfqg));
++}
++
++static void bfqg_stats_update_io_add(struct bfq_group *bfqg,
++ struct bfq_queue *bfqq,
++ int rw)
++{
++ blkg_rwstat_add(&bfqg->stats.queued, rw, 1);
++ bfqg_stats_end_empty_time(&bfqg->stats);
++ if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
++ bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
++}
++
++static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw)
++{
++ blkg_rwstat_add(&bfqg->stats.queued, rw, -1);
++}
++
++static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw)
++{
++ blkg_rwstat_add(&bfqg->stats.merged, rw, 1);
++}
++
++static void bfqg_stats_update_dispatch(struct bfq_group *bfqg,
++ uint64_t bytes, int rw)
++{
++ blkg_stat_add(&bfqg->stats.sectors, bytes >> 9);
++ blkg_rwstat_add(&bfqg->stats.serviced, rw, 1);
++ blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes);
++}
++
++static void bfqg_stats_update_completion(struct bfq_group *bfqg,
++ uint64_t start_time, uint64_t io_start_time, int rw)
++{
++ struct bfqg_stats *stats = &bfqg->stats;
++ unsigned long long now = sched_clock();
++
++ if (time_after64(now, io_start_time))
++ blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
++ if (time_after64(io_start_time, start_time))
++ blkg_rwstat_add(&stats->wait_time, rw,
++ io_start_time - start_time);
++}
++
++/* @stats = 0 */
++static void bfqg_stats_reset(struct bfqg_stats *stats)
++{
++ if (!stats)
++ return;
++
++ /* queued stats shouldn't be cleared */
++ blkg_rwstat_reset(&stats->service_bytes);
++ blkg_rwstat_reset(&stats->serviced);
++ blkg_rwstat_reset(&stats->merged);
++ blkg_rwstat_reset(&stats->service_time);
++ blkg_rwstat_reset(&stats->wait_time);
++ blkg_stat_reset(&stats->time);
++ blkg_stat_reset(&stats->unaccounted_time);
++ blkg_stat_reset(&stats->avg_queue_size_sum);
++ blkg_stat_reset(&stats->avg_queue_size_samples);
++ blkg_stat_reset(&stats->dequeue);
++ blkg_stat_reset(&stats->group_wait_time);
++ blkg_stat_reset(&stats->idle_time);
++ blkg_stat_reset(&stats->empty_time);
++}
++
++/* @to += @from */
++static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from)
++{
++ if (!to || !from)
++ return;
++
++ /* queued stats shouldn't be cleared */
++ blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes);
++ blkg_rwstat_add_aux(&to->serviced, &from->serviced);
++ blkg_rwstat_add_aux(&to->merged, &from->merged);
++ blkg_rwstat_add_aux(&to->service_time, &from->service_time);
++ blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
++ blkg_stat_add_aux(&from->time, &from->time);
++ blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
++ blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
++ blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
++ blkg_stat_add_aux(&to->dequeue, &from->dequeue);
++ blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
++ blkg_stat_add_aux(&to->idle_time, &from->idle_time);
++ blkg_stat_add_aux(&to->empty_time, &from->empty_time);
++}
++
++/*
++ * Transfer @bfqg's stats to its parent's dead_stats so that the ancestors'
++ * recursive stats can still account for the amount used by this bfqg after
++ * it's gone.
++ */
++static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
++{
++ struct bfq_group *parent;
++
++ if (!bfqg) /* root_group */
++ return;
++
++ parent = bfqg_parent(bfqg);
++
++ lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock);
++
++ if (unlikely(!parent))
++ return;
++
++ bfqg_stats_merge(&parent->dead_stats, &bfqg->stats);
++ bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats);
++ bfqg_stats_reset(&bfqg->stats);
++ bfqg_stats_reset(&bfqg->dead_stats);
++}
++
++static void bfq_init_entity(struct bfq_entity *entity,
++ struct bfq_group *bfqg)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++ entity->weight = entity->new_weight;
++ entity->orig_weight = entity->new_weight;
++ if (bfqq) {
++ bfqq->ioprio = bfqq->new_ioprio;
++ bfqq->ioprio_class = bfqq->new_ioprio_class;
++ bfqg_get(bfqg);
++ }
++ entity->parent = bfqg->my_entity;
++ entity->sched_data = &bfqg->sched_data;
++}
++
++static void bfqg_stats_exit(struct bfqg_stats *stats)
++{
++ blkg_rwstat_exit(&stats->service_bytes);
++ blkg_rwstat_exit(&stats->serviced);
++ blkg_rwstat_exit(&stats->merged);
++ blkg_rwstat_exit(&stats->service_time);
++ blkg_rwstat_exit(&stats->wait_time);
++ blkg_rwstat_exit(&stats->queued);
++ blkg_stat_exit(&stats->sectors);
++ blkg_stat_exit(&stats->time);
++ blkg_stat_exit(&stats->unaccounted_time);
++ blkg_stat_exit(&stats->avg_queue_size_sum);
++ blkg_stat_exit(&stats->avg_queue_size_samples);
++ blkg_stat_exit(&stats->dequeue);
++ blkg_stat_exit(&stats->group_wait_time);
++ blkg_stat_exit(&stats->idle_time);
++ blkg_stat_exit(&stats->empty_time);
++}
++
++static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
++{
++ if (blkg_rwstat_init(&stats->service_bytes, gfp) ||
++ blkg_rwstat_init(&stats->serviced, gfp) ||
++ blkg_rwstat_init(&stats->merged, gfp) ||
++ blkg_rwstat_init(&stats->service_time, gfp) ||
++ blkg_rwstat_init(&stats->wait_time, gfp) ||
++ blkg_rwstat_init(&stats->queued, gfp) ||
++ blkg_stat_init(&stats->sectors, gfp) ||
++ blkg_stat_init(&stats->time, gfp) ||
++ blkg_stat_init(&stats->unaccounted_time, gfp) ||
++ blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
++ blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
++ blkg_stat_init(&stats->dequeue, gfp) ||
++ blkg_stat_init(&stats->group_wait_time, gfp) ||
++ blkg_stat_init(&stats->idle_time, gfp) ||
++ blkg_stat_init(&stats->empty_time, gfp)) {
++ bfqg_stats_exit(stats);
++ return -ENOMEM;
++ }
++
++ return 0;
++}
++
++static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
++ {
++ return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
++ }
++
++static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
++{
++ return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
++}
++
++static void bfq_cpd_init(struct blkcg_policy_data *cpd)
++{
++ struct bfq_group_data *d = cpd_to_bfqgd(cpd);
++
++ d->weight = BFQ_DEFAULT_GRP_WEIGHT;
++}
++
++static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
++{
++ struct bfq_group *bfqg;
++
++ bfqg = kzalloc_node(sizeof(*bfqg), gfp, node);
++ if (!bfqg)
++ return NULL;
++
++ if (bfqg_stats_init(&bfqg->stats, gfp) ||
++ bfqg_stats_init(&bfqg->dead_stats, gfp)) {
++ kfree(bfqg);
++ return NULL;
++ }
++
++ return &bfqg->pd;
++}
++
++static void bfq_group_set_parent(struct bfq_group *bfqg,
++ struct bfq_group *parent)
++{
++ struct bfq_entity *entity;
++
++ BUG_ON(!parent);
++ BUG_ON(!bfqg);
++ BUG_ON(bfqg == parent);
++
++ entity = &bfqg->entity;
++ entity->parent = parent->my_entity;
++ entity->sched_data = &parent->sched_data;
++}
++
++static void bfq_pd_init(struct blkg_policy_data *pd)
++{
++ struct blkcg_gq *blkg = pd_to_blkg(pd);
++ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++ struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
++ struct bfq_entity *entity = &bfqg->entity;
++ struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
++
++ entity->orig_weight = entity->weight = entity->new_weight = d->weight;
++ entity->my_sched_data = &bfqg->sched_data;
++ bfqg->my_entity = entity; /*
++ * the root_group's will be set to NULL
++ * in bfq_init_queue()
++ */
++ bfqg->bfqd = bfqd;
++ bfqg->active_entities = 0;
++}
++
++static void bfq_pd_free(struct blkg_policy_data *pd)
++{
++ struct bfq_group *bfqg = pd_to_bfqg(pd);
++
++ bfqg_stats_exit(&bfqg->stats);
++ bfqg_stats_exit(&bfqg->dead_stats);
++
++ return kfree(bfqg);
++}
++
++/* offset delta from bfqg->stats to bfqg->dead_stats */
++static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) -
++ offsetof(struct bfq_group, stats);
++
++/* to be used by recursive prfill, sums live and dead stats recursively */
++static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
++{
++ u64 sum = 0;
++
++ sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
++ sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
++ off + dead_stats_off_delta);
++ return sum;
++}
++
++/* to be used by recursive prfill, sums live and dead rwstats recursively */
++static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
++ int off)
++{
++ struct blkg_rwstat a, b;
++
++ a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
++ b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
++ off + dead_stats_off_delta);
++ blkg_rwstat_add_aux(&a, &b);
++ return a;
++}
++
++static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
++{
++ struct bfq_group *bfqg = pd_to_bfqg(pd);
++
++ bfqg_stats_reset(&bfqg->stats);
++ bfqg_stats_reset(&bfqg->dead_stats);
++}
++
++static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
++ struct blkcg *blkcg)
++{
++ struct request_queue *q = bfqd->queue;
++ struct bfq_group *bfqg = NULL, *parent;
++ struct bfq_entity *entity = NULL;
++
++ assert_spin_locked(bfqd->queue->queue_lock);
++
++ /* avoid lookup for the common case where there's no blkcg */
++ if (blkcg == &blkcg_root) {
++ bfqg = bfqd->root_group;
++ } else {
++ struct blkcg_gq *blkg;
++
++ blkg = blkg_lookup_create(blkcg, q);
++ if (!IS_ERR(blkg))
++ bfqg = blkg_to_bfqg(blkg);
++ else /* fallback to root_group */
++ bfqg = bfqd->root_group;
++ }
++
++ BUG_ON(!bfqg);
++
++ /*
++ * Update chain of bfq_groups as we might be handling a leaf group
++ * which, along with some of its relatives, has not been hooked yet
++ * to the private hierarchy of BFQ.
++ */
++ entity = &bfqg->entity;
++ for_each_entity(entity) {
++ bfqg = container_of(entity, struct bfq_group, entity);
++ BUG_ON(!bfqg);
++ if (bfqg != bfqd->root_group) {
++ parent = bfqg_parent(bfqg);
++ if (!parent)
++ parent = bfqd->root_group;
++ BUG_ON(!parent);
++ bfq_group_set_parent(bfqg, parent);
++ }
++ }
++
++ return bfqg;
++}
++
++/**
++ * bfq_bfqq_move - migrate @bfqq to @bfqg.
++ * @bfqd: queue descriptor.
++ * @bfqq: the queue to move.
++ * @entity: @bfqq's entity.
++ * @bfqg: the group to move to.
++ *
++ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
++ * it on the new one. Avoid putting the entity on the old group idle tree.
++ *
++ * Must be called under the queue lock; the cgroup owning @bfqg must
++ * not disappear (by now this just means that we are called under
++ * rcu_read_lock()).
++ */
++static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ struct bfq_entity *entity, struct bfq_group *bfqg)
++{
++ int busy, resume;
++
++ busy = bfq_bfqq_busy(bfqq);
++ resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
++
++ BUG_ON(resume && !entity->on_st);
++ BUG_ON(busy && !resume && entity->on_st &&
++ bfqq != bfqd->in_service_queue);
++
++ if (busy) {
++ BUG_ON(atomic_read(&bfqq->ref) < 2);
++
++ if (!resume)
++ bfq_del_bfqq_busy(bfqd, bfqq, 0);
++ else
++ bfq_deactivate_bfqq(bfqd, bfqq, 0);
++ } else if (entity->on_st)
++ bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
++ bfqg_put(bfqq_group(bfqq));
++
++ /*
++ * Here we use a reference to bfqg. We don't need a refcounter
++ * as the cgroup reference will not be dropped, so that its
++ * destroy() callback will not be invoked.
++ */
++ entity->parent = bfqg->my_entity;
++ entity->sched_data = &bfqg->sched_data;
++ bfqg_get(bfqg);
++
++ if (busy) {
++ if (resume)
++ bfq_activate_bfqq(bfqd, bfqq);
++ }
++
++ if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
++ bfq_schedule_dispatch(bfqd);
++}
++
++/**
++ * __bfq_bic_change_cgroup - move @bic to @cgroup.
++ * @bfqd: the queue descriptor.
++ * @bic: the bic to move.
++ * @blkcg: the blk-cgroup to move to.
++ *
++ * Move bic to blkcg, assuming that bfqd->queue is locked; the caller
++ * has to make sure that the reference to cgroup is valid across the call.
++ *
++ * NOTE: an alternative approach might have been to store the current
++ * cgroup in bfqq and getting a reference to it, reducing the lookup
++ * time here, at the price of slightly more complex code.
++ */
++static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
++ struct bfq_io_cq *bic,
++ struct blkcg *blkcg)
++{
++ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
++ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
++ struct bfq_group *bfqg;
++ struct bfq_entity *entity;
++
++ lockdep_assert_held(bfqd->queue->queue_lock);
++
++ bfqg = bfq_find_alloc_group(bfqd, blkcg);
++ if (async_bfqq) {
++ entity = &async_bfqq->entity;
++
++ if (entity->sched_data != &bfqg->sched_data) {
++ bic_set_bfqq(bic, NULL, 0);
++ bfq_log_bfqq(bfqd, async_bfqq,
++ "bic_change_group: %p %d",
++ async_bfqq, atomic_read(&async_bfqq->ref));
++ bfq_put_queue(async_bfqq);
++ }
++ }
++
++ if (sync_bfqq) {
++ entity = &sync_bfqq->entity;
++ if (entity->sched_data != &bfqg->sched_data)
++ bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
++ }
++
++ return bfqg;
++}
++
++static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
++{
++ struct bfq_data *bfqd = bic_to_bfqd(bic);
++ struct blkcg *blkcg;
++ struct bfq_group *bfqg = NULL;
++ uint64_t id;
++
++ rcu_read_lock();
++ blkcg = bio_blkcg(bio);
++ id = blkcg->css.serial_nr;
++ rcu_read_unlock();
++
++ /*
++ * Check whether blkcg has changed. The condition may trigger
++ * spuriously on a newly created cic but there's no harm.
++ */
++ if (unlikely(!bfqd) || likely(bic->blkcg_id == id))
++ return;
++
++ bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg);
++ BUG_ON(!bfqg);
++ bic->blkcg_id = id;
++}
++
++/**
++ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
++ * @st: the service tree being flushed.
++ */
++static void bfq_flush_idle_tree(struct bfq_service_tree *st)
++{
++ struct bfq_entity *entity = st->first_idle;
++
++ for (; entity ; entity = st->first_idle)
++ __bfq_deactivate_entity(entity, 0);
++}
++
++/**
++ * bfq_reparent_leaf_entity - move leaf entity to the root_group.
++ * @bfqd: the device data structure with the root group.
++ * @entity: the entity to move.
++ */
++static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
++ struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++ BUG_ON(!bfqq);
++ bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
++ return;
++}
++
++/**
++ * bfq_reparent_active_entities - move to the root group all active
++ * entities.
++ * @bfqd: the device data structure with the root group.
++ * @bfqg: the group to move from.
++ * @st: the service tree with the entities.
++ *
++ * Needs queue_lock to be taken and reference to be valid over the call.
++ */
++static void bfq_reparent_active_entities(struct bfq_data *bfqd,
++ struct bfq_group *bfqg,
++ struct bfq_service_tree *st)
++{
++ struct rb_root *active = &st->active;
++ struct bfq_entity *entity = NULL;
++
++ if (!RB_EMPTY_ROOT(&st->active))
++ entity = bfq_entity_of(rb_first(active));
++
++ for (; entity ; entity = bfq_entity_of(rb_first(active)))
++ bfq_reparent_leaf_entity(bfqd, entity);
++
++ if (bfqg->sched_data.in_service_entity)
++ bfq_reparent_leaf_entity(bfqd,
++ bfqg->sched_data.in_service_entity);
++
++ return;
++}
++
++/**
++ * bfq_destroy_group - destroy @bfqg.
++ * @bfqg: the group being destroyed.
++ *
++ * Destroy @bfqg, making sure that it is not referenced from its parent.
++ * blkio already grabs the queue_lock for us, so no need to use RCU-based magic
++ */
++static void bfq_pd_offline(struct blkg_policy_data *pd)
++{
++ struct bfq_service_tree *st;
++ struct bfq_group *bfqg;
++ struct bfq_data *bfqd;
++ struct bfq_entity *entity;
++ int i;
++
++ BUG_ON(!pd);
++ bfqg = pd_to_bfqg(pd);
++ BUG_ON(!bfqg);
++ bfqd = bfqg->bfqd;
++ BUG_ON(bfqd && !bfqd->root_group);
++
++ entity = bfqg->my_entity;
++
++ if (!entity) /* root group */
++ return;
++
++ /*
++ * Empty all service_trees belonging to this group before
++ * deactivating the group itself.
++ */
++ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
++ BUG_ON(!bfqg->sched_data.service_tree);
++ st = bfqg->sched_data.service_tree + i;
++ /*
++ * The idle tree may still contain bfq_queues belonging
++ * to exited task because they never migrated to a different
++ * cgroup from the one being destroyed now. No one else
++ * can access them so it's safe to act without any lock.
++ */
++ bfq_flush_idle_tree(st);
++
++ /*
++ * It may happen that some queues are still active
++ * (busy) upon group destruction (if the corresponding
++ * processes have been forced to terminate). We move
++ * all the leaf entities corresponding to these queues
++ * to the root_group.
++ * Also, it may happen that the group has an entity
++ * in service, which is disconnected from the active
++ * tree: it must be moved, too.
++ * There is no need to put the sync queues, as the
++ * scheduler has taken no reference.
++ */
++ bfq_reparent_active_entities(bfqd, bfqg, st);
++ BUG_ON(!RB_EMPTY_ROOT(&st->active));
++ BUG_ON(!RB_EMPTY_ROOT(&st->idle));
++ }
++ BUG_ON(bfqg->sched_data.next_in_service);
++ BUG_ON(bfqg->sched_data.in_service_entity);
++
++ __bfq_deactivate_entity(entity, 0);
++ bfq_put_async_queues(bfqd, bfqg);
++ BUG_ON(entity->tree);
++
++ bfqg_stats_xfer_dead(bfqg);
++}
++
++static void bfq_end_wr_async(struct bfq_data *bfqd)
++{
++ struct blkcg_gq *blkg;
++
++ list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
++ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++
++ bfq_end_wr_async_queues(bfqd, bfqg);
++ }
++ bfq_end_wr_async_queues(bfqd, bfqd->root_group);
++}
++
++static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css,
++ struct cftype *cftype)
++{
++ struct blkcg *blkcg = css_to_blkcg(css);
++ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
++ int ret = -EINVAL;
++
++ spin_lock_irq(&blkcg->lock);
++ ret = bfqgd->weight;
++ spin_unlock_irq(&blkcg->lock);
++
++ return ret;
++}
++
++static int bfqio_cgroup_weight_read_dfl(struct seq_file *sf, void *v)
++{
++ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
++ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
++
++ spin_lock_irq(&blkcg->lock);
++ seq_printf(sf, "%u\n", bfqgd->weight);
++ spin_unlock_irq(&blkcg->lock);
++
++ return 0;
++}
++
++static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
++ struct cftype *cftype,
++ u64 val)
++{
++ struct blkcg *blkcg = css_to_blkcg(css);
++ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
++ struct blkcg_gq *blkg;
++ int ret = -EINVAL;
++
++ if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
++ return ret;
++
++ ret = 0;
++ spin_lock_irq(&blkcg->lock);
++ bfqgd->weight = (unsigned short)val;
++ hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
++ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++ if (!bfqg)
++ continue;
++ /*
++ * Setting the prio_changed flag of the entity
++ * to 1 with new_weight == weight would re-set
++ * the value of the weight to its ioprio mapping.
++ * Set the flag only if necessary.
++ */
++ if ((unsigned short)val != bfqg->entity.new_weight) {
++ bfqg->entity.new_weight = (unsigned short)val;
++ /*
++ * Make sure that the above new value has been
++ * stored in bfqg->entity.new_weight before
++ * setting the prio_changed flag. In fact,
++ * this flag may be read asynchronously (in
++ * critical sections protected by a different
++ * lock than that held here), and finding this
++ * flag set may cause the execution of the code
++ * for updating parameters whose value may
++ * depend also on bfqg->entity.new_weight (in
++ * __bfq_entity_update_weight_prio).
++ * This barrier makes sure that the new value
++ * of bfqg->entity.new_weight is correctly
++ * seen in that code.
++ */
++ smp_wmb();
++ bfqg->entity.prio_changed = 1;
++ }
++ }
++ spin_unlock_irq(&blkcg->lock);
++
++ return ret;
++}
++
++static ssize_t bfqio_cgroup_weight_write_dfl(struct kernfs_open_file *of,
++ char *buf, size_t nbytes,
++ loff_t off)
++{
++ /* First unsigned long found in the file is used */
++ return bfqio_cgroup_weight_write(of_css(of), NULL,
++ simple_strtoull(strim(buf), NULL, 0));
++}
++
++static int bfqg_print_stat(struct seq_file *sf, void *v)
++{
++ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
++ &blkcg_policy_bfq, seq_cft(sf)->private, false);
++ return 0;
++}
++
++static int bfqg_print_rwstat(struct seq_file *sf, void *v)
++{
++ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
++ &blkcg_policy_bfq, seq_cft(sf)->private, true);
++ return 0;
++}
++
++static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
++ struct blkg_policy_data *pd, int off)
++{
++ u64 sum = bfqg_stat_pd_recursive_sum(pd, off);
++
++ return __blkg_prfill_u64(sf, pd, sum);
++}
++
++static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
++ struct blkg_policy_data *pd, int off)
++{
++ struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off);
++
++ return __blkg_prfill_rwstat(sf, pd, &sum);
++}
++
++static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
++{
++ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++ bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
++ seq_cft(sf)->private, false);
++ return 0;
++}
++
++static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
++{
++ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++ bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
++ seq_cft(sf)->private, true);
++ return 0;
++}
++
++static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
++ struct blkg_policy_data *pd, int off)
++{
++ struct bfq_group *bfqg = pd_to_bfqg(pd);
++ u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples);
++ u64 v = 0;
++
++ if (samples) {
++ v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum);
++ v = div64_u64(v, samples);
++ }
++ __blkg_prfill_u64(sf, pd, v);
++ return 0;
++}
++
++/* print avg_queue_size */
++static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
++{
++ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++ bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
++ 0, false);
++ return 0;
++}
++
++static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
++{
++ int ret;
++
++ ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
++ if (ret)
++ return NULL;
++
++ return blkg_to_bfqg(bfqd->queue->root_blkg);
++}
++
++static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
++{
++ struct bfq_group_data *bgd;
++
++ bgd = kzalloc(sizeof(*bgd), GFP_KERNEL);
++ if (!bgd)
++ return NULL;
++ return &bgd->pd;
++}
++
++static void bfq_cpd_free(struct blkcg_policy_data *cpd)
++{
++ kfree(cpd_to_bfqgd(cpd));
++}
++
++static struct cftype bfqio_files_dfl[] = {
++ {
++ .name = "weight",
++ .flags = CFTYPE_NOT_ON_ROOT,
++ .seq_show = bfqio_cgroup_weight_read_dfl,
++ .write = bfqio_cgroup_weight_write_dfl,
++ },
++ {} /* terminate */
++};
++
++static struct cftype bfqio_files[] = {
++ {
++ .name = "bfq.weight",
++ .read_u64 = bfqio_cgroup_weight_read,
++ .write_u64 = bfqio_cgroup_weight_write,
++ },
++ /* statistics, cover only the tasks in the bfqg */
++ {
++ .name = "bfq.time",
++ .private = offsetof(struct bfq_group, stats.time),
++ .seq_show = bfqg_print_stat,
++ },
++ {
++ .name = "bfq.sectors",
++ .private = offsetof(struct bfq_group, stats.sectors),
++ .seq_show = bfqg_print_stat,
++ },
++ {
++ .name = "bfq.io_service_bytes",
++ .private = offsetof(struct bfq_group, stats.service_bytes),
++ .seq_show = bfqg_print_rwstat,
++ },
++ {
++ .name = "bfq.io_serviced",
++ .private = offsetof(struct bfq_group, stats.serviced),
++ .seq_show = bfqg_print_rwstat,
++ },
++ {
++ .name = "bfq.io_service_time",
++ .private = offsetof(struct bfq_group, stats.service_time),
++ .seq_show = bfqg_print_rwstat,
++ },
++ {
++ .name = "bfq.io_wait_time",
++ .private = offsetof(struct bfq_group, stats.wait_time),
++ .seq_show = bfqg_print_rwstat,
++ },
++ {
++ .name = "bfq.io_merged",
++ .private = offsetof(struct bfq_group, stats.merged),
++ .seq_show = bfqg_print_rwstat,
++ },
++ {
++ .name = "bfq.io_queued",
++ .private = offsetof(struct bfq_group, stats.queued),
++ .seq_show = bfqg_print_rwstat,
++ },
++
++ /* the same statictics which cover the bfqg and its descendants */
++ {
++ .name = "bfq.time_recursive",
++ .private = offsetof(struct bfq_group, stats.time),
++ .seq_show = bfqg_print_stat_recursive,
++ },
++ {
++ .name = "bfq.sectors_recursive",
++ .private = offsetof(struct bfq_group, stats.sectors),
++ .seq_show = bfqg_print_stat_recursive,
++ },
++ {
++ .name = "bfq.io_service_bytes_recursive",
++ .private = offsetof(struct bfq_group, stats.service_bytes),
++ .seq_show = bfqg_print_rwstat_recursive,
++ },
++ {
++ .name = "bfq.io_serviced_recursive",
++ .private = offsetof(struct bfq_group, stats.serviced),
++ .seq_show = bfqg_print_rwstat_recursive,
++ },
++ {
++ .name = "bfq.io_service_time_recursive",
++ .private = offsetof(struct bfq_group, stats.service_time),
++ .seq_show = bfqg_print_rwstat_recursive,
++ },
++ {
++ .name = "bfq.io_wait_time_recursive",
++ .private = offsetof(struct bfq_group, stats.wait_time),
++ .seq_show = bfqg_print_rwstat_recursive,
++ },
++ {
++ .name = "bfq.io_merged_recursive",
++ .private = offsetof(struct bfq_group, stats.merged),
++ .seq_show = bfqg_print_rwstat_recursive,
++ },
++ {
++ .name = "bfq.io_queued_recursive",
++ .private = offsetof(struct bfq_group, stats.queued),
++ .seq_show = bfqg_print_rwstat_recursive,
++ },
++ {
++ .name = "bfq.avg_queue_size",
++ .seq_show = bfqg_print_avg_queue_size,
++ },
++ {
++ .name = "bfq.group_wait_time",
++ .private = offsetof(struct bfq_group, stats.group_wait_time),
++ .seq_show = bfqg_print_stat,
++ },
++ {
++ .name = "bfq.idle_time",
++ .private = offsetof(struct bfq_group, stats.idle_time),
++ .seq_show = bfqg_print_stat,
++ },
++ {
++ .name = "bfq.empty_time",
++ .private = offsetof(struct bfq_group, stats.empty_time),
++ .seq_show = bfqg_print_stat,
++ },
++ {
++ .name = "bfq.dequeue",
++ .private = offsetof(struct bfq_group, stats.dequeue),
++ .seq_show = bfqg_print_stat,
++ },
++ {
++ .name = "bfq.unaccounted_time",
++ .private = offsetof(struct bfq_group, stats.unaccounted_time),
++ .seq_show = bfqg_print_stat,
++ },
++ { } /* terminate */
++};
++
++static struct blkcg_policy blkcg_policy_bfq = {
++ .dfl_cftypes = bfqio_files_dfl,
++ .legacy_cftypes = bfqio_files,
++
++ .pd_alloc_fn = bfq_pd_alloc,
++ .pd_init_fn = bfq_pd_init,
++ .pd_offline_fn = bfq_pd_offline,
++ .pd_free_fn = bfq_pd_free,
++ .pd_reset_stats_fn = bfq_pd_reset_stats,
++
++ .cpd_alloc_fn = bfq_cpd_alloc,
++ .cpd_init_fn = bfq_cpd_init,
++ .cpd_bind_fn = bfq_cpd_init,
++ .cpd_free_fn = bfq_cpd_free,
++
++};
++
++#else
++
++static void bfq_init_entity(struct bfq_entity *entity,
++ struct bfq_group *bfqg)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ entity->weight = entity->new_weight;
++ entity->orig_weight = entity->new_weight;
++ if (bfqq) {
++ bfqq->ioprio = bfqq->new_ioprio;
++ bfqq->ioprio_class = bfqq->new_ioprio_class;
++ }
++ entity->sched_data = &bfqg->sched_data;
++}
++
++static struct bfq_group *
++bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
++{
++ struct bfq_data *bfqd = bic_to_bfqd(bic);
++ return bfqd->root_group;
++}
++
++static void bfq_bfqq_move(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ struct bfq_entity *entity,
++ struct bfq_group *bfqg)
++{
++}
++
++static void bfq_end_wr_async(struct bfq_data *bfqd)
++{
++ bfq_end_wr_async_queues(bfqd, bfqd->root_group);
++}
++
++static void bfq_disconnect_groups(struct bfq_data *bfqd)
++{
++ bfq_put_async_queues(bfqd, bfqd->root_group);
++}
++
++static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
++ struct blkcg *blkcg)
++{
++ return bfqd->root_group;
++}
++
++static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
++{
++ struct bfq_group *bfqg;
++ int i;
++
++ bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
++ if (!bfqg)
++ return NULL;
++
++ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
++ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++
++ return bfqg;
++}
++#endif
+diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c
+new file mode 100644
+index 0000000..fb7bb8f
+--- /dev/null
++++ b/block/bfq-ioc.c
+@@ -0,0 +1,36 @@
++/*
++ * BFQ: I/O context handling.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ * Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ */
++
++/**
++ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.
++ * @icq: the iocontext queue.
++ */
++static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
++{
++ /* bic->icq is the first member, %NULL will convert to %NULL */
++ return container_of(icq, struct bfq_io_cq, icq);
++}
++
++/**
++ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
++ * @bfqd: the lookup key.
++ * @ioc: the io_context of the process doing I/O.
++ *
++ * Queue lock must be held.
++ */
++static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
++ struct io_context *ioc)
++{
++ if (ioc)
++ return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));
++ return NULL;
++}
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+new file mode 100644
+index 0000000..f9787a6
+--- /dev/null
++++ b/block/bfq-iosched.c
+@@ -0,0 +1,3754 @@
++/*
++ * Budget Fair Queueing (BFQ) disk scheduler.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ * Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
++ * file.
++ *
++ * BFQ is a proportional-share storage-I/O scheduling algorithm based on
++ * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
++ * measured in number of sectors, to processes instead of time slices. The
++ * device is not granted to the in-service process for a given time slice,
++ * but until it has exhausted its assigned budget. This change from the time
++ * to the service domain allows BFQ to distribute the device throughput
++ * among processes as desired, without any distortion due to ZBR, workload
++ * fluctuations or other factors. BFQ uses an ad hoc internal scheduler,
++ * called B-WF2Q+, to schedule processes according to their budgets. More
++ * precisely, BFQ schedules queues associated to processes. Thanks to the
++ * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to
++ * I/O-bound processes issuing sequential requests (to boost the
++ * throughput), and yet guarantee a low latency to interactive and soft
++ * real-time applications.
++ *
++ * BFQ is described in [1], where also a reference to the initial, more
++ * theoretical paper on BFQ can be found. The interested reader can find
++ * in the latter paper full details on the main algorithm, as well as
++ * formulas of the guarantees and formal proofs of all the properties.
++ * With respect to the version of BFQ presented in these papers, this
++ * implementation adds a few more heuristics, such as the one that
++ * guarantees a low latency to soft real-time applications, and a
++ * hierarchical extension based on H-WF2Q+.
++ *
++ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
++ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
++ * complexity derives from the one introduced with EEVDF in [3].
++ *
++ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness
++ * with the BFQ Disk I/O Scheduler'',
++ * Proceedings of the 5th Annual International Systems and Storage
++ * Conference (SYSTOR '12), June 2012.
++ *
++ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
++ *
++ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
++ * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
++ * Oct 1997.
++ *
++ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
++ *
++ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline
++ * First: A Flexible and Accurate Mechanism for Proportional Share
++ * Resource Allocation,'' technical report.
++ *
++ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
++ */
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/cgroup.h>
++#include <linux/elevator.h>
++#include <linux/jiffies.h>
++#include <linux/rbtree.h>
++#include <linux/ioprio.h>
++#include "bfq.h"
++#include "blk.h"
++
++/* Expiration time of sync (0) and async (1) requests, in jiffies. */
++static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
++
++/* Maximum backwards seek, in KiB. */
++static const int bfq_back_max = 16 * 1024;
++
++/* Penalty of a backwards seek, in number of sectors. */
++static const int bfq_back_penalty = 2;
++
++/* Idling period duration, in jiffies. */
++static int bfq_slice_idle = HZ / 125;
++
++/* Minimum number of assigned budgets for which stats are safe to compute. */
++static const int bfq_stats_min_budgets = 194;
++
++/* Default maximum budget values, in sectors and number of requests. */
++static const int bfq_default_max_budget = 16 * 1024;
++static const int bfq_max_budget_async_rq = 4;
++
++/*
++ * Async to sync throughput distribution is controlled as follows:
++ * when an async request is served, the entity is charged the number
++ * of sectors of the request, multiplied by the factor below
++ */
++static const int bfq_async_charge_factor = 10;
++
++/* Default timeout values, in jiffies, approximating CFQ defaults. */
++static const int bfq_timeout_sync = HZ / 8;
++static int bfq_timeout_async = HZ / 25;
++
++struct kmem_cache *bfq_pool;
++
++/* Below this threshold (in ms), we consider thinktime immediate. */
++#define BFQ_MIN_TT 2
++
++/* hw_tag detection: parallel requests threshold and min samples needed. */
++#define BFQ_HW_QUEUE_THRESHOLD 4
++#define BFQ_HW_QUEUE_SAMPLES 32
++
++#define BFQQ_SEEK_THR (sector_t)(8 * 1024)
++#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
++
++/* Min samples used for peak rate estimation (for autotuning). */
++#define BFQ_PEAK_RATE_SAMPLES 32
++
++/* Shift used for peak rate fixed precision calculations. */
++#define BFQ_RATE_SHIFT 16
++
++/*
++ * By default, BFQ computes the duration of the weight raising for
++ * interactive applications automatically, using the following formula:
++ * duration = (R / r) * T, where r is the peak rate of the device, and
++ * R and T are two reference parameters.
++ * In particular, R is the peak rate of the reference device (see below),
++ * and T is a reference time: given the systems that are likely to be
++ * installed on the reference device according to its speed class, T is
++ * about the maximum time needed, under BFQ and while reading two files in
++ * parallel, to load typical large applications on these systems.
++ * In practice, the slower/faster the device at hand is, the more/less it
++ * takes to load applications with respect to the reference device.
++ * Accordingly, the longer/shorter BFQ grants weight raising to interactive
++ * applications.
++ *
++ * BFQ uses four different reference pairs (R, T), depending on:
++ * . whether the device is rotational or non-rotational;
++ * . whether the device is slow, such as old or portable HDDs, as well as
++ * SD cards, or fast, such as newer HDDs and SSDs.
++ *
++ * The device's speed class is dynamically (re)detected in
++ * bfq_update_peak_rate() every time the estimated peak rate is updated.
++ *
++ * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0]
++ * are the reference values for a slow/fast rotational device, whereas
++ * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for
++ * a slow/fast non-rotational device. Finally, device_speed_thresh are the
++ * thresholds used to switch between speed classes.
++ * Both the reference peak rates and the thresholds are measured in
++ * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
++ */
++static int R_slow[2] = {1536, 10752};
++static int R_fast[2] = {17415, 34791};
++/*
++ * To improve readability, a conversion function is used to initialize the
++ * following arrays, which entails that they can be initialized only in a
++ * function.
++ */
++static int T_slow[2];
++static int T_fast[2];
++static int device_speed_thresh[2];
++
++#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \
++ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
++
++#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0])
++#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
++
++static void bfq_schedule_dispatch(struct bfq_data *bfqd);
++
++#include "bfq-ioc.c"
++#include "bfq-sched.c"
++#include "bfq-cgroup.c"
++
++#define bfq_class_idle(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
++#define bfq_class_rt(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_RT)
++
++#define bfq_sample_valid(samples) ((samples) > 80)
++
++/*
++ * We regard a request as SYNC, if either it's a read or has the SYNC bit
++ * set (in which case it could also be a direct WRITE).
++ */
++static int bfq_bio_sync(struct bio *bio)
++{
++ if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))
++ return 1;
++
++ return 0;
++}
++
++/*
++ * Scheduler run of queue, if there are requests pending and no one in the
++ * driver that will restart queueing.
++ */
++static void bfq_schedule_dispatch(struct bfq_data *bfqd)
++{
++ if (bfqd->queued != 0) {
++ bfq_log(bfqd, "schedule dispatch");
++ kblockd_schedule_work(&bfqd->unplug_work);
++ }
++}
++
++/*
++ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
++ * We choose the request that is closesr to the head right now. Distance
++ * behind the head is penalized and only allowed to a certain extent.
++ */
++static struct request *bfq_choose_req(struct bfq_data *bfqd,
++ struct request *rq1,
++ struct request *rq2,
++ sector_t last)
++{
++ sector_t s1, s2, d1 = 0, d2 = 0;
++ unsigned long back_max;
++#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */
++#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */
++ unsigned wrap = 0; /* bit mask: requests behind the disk head? */
++
++ if (!rq1 || rq1 == rq2)
++ return rq2;
++ if (!rq2)
++ return rq1;
++
++ if (rq_is_sync(rq1) && !rq_is_sync(rq2))
++ return rq1;
++ else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
++ return rq2;
++ if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
++ return rq1;
++ else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
++ return rq2;
++
++ s1 = blk_rq_pos(rq1);
++ s2 = blk_rq_pos(rq2);
++
++ /*
++ * By definition, 1KiB is 2 sectors.
++ */
++ back_max = bfqd->bfq_back_max * 2;
++
++ /*
++ * Strict one way elevator _except_ in the case where we allow
++ * short backward seeks which are biased as twice the cost of a
++ * similar forward seek.
++ */
++ if (s1 >= last)
++ d1 = s1 - last;
++ else if (s1 + back_max >= last)
++ d1 = (last - s1) * bfqd->bfq_back_penalty;
++ else
++ wrap |= BFQ_RQ1_WRAP;
++
++ if (s2 >= last)
++ d2 = s2 - last;
++ else if (s2 + back_max >= last)
++ d2 = (last - s2) * bfqd->bfq_back_penalty;
++ else
++ wrap |= BFQ_RQ2_WRAP;
++
++ /* Found required data */
++
++ /*
++ * By doing switch() on the bit mask "wrap" we avoid having to
++ * check two variables for all permutations: --> faster!
++ */
++ switch (wrap) {
++ case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
++ if (d1 < d2)
++ return rq1;
++ else if (d2 < d1)
++ return rq2;
++ else {
++ if (s1 >= s2)
++ return rq1;
++ else
++ return rq2;
++ }
++
++ case BFQ_RQ2_WRAP:
++ return rq1;
++ case BFQ_RQ1_WRAP:
++ return rq2;
++ case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */
++ default:
++ /*
++ * Since both rqs are wrapped,
++ * start with the one that's further behind head
++ * (--> only *one* back seek required),
++ * since back seek takes more time than forward.
++ */
++ if (s1 <= s2)
++ return rq1;
++ else
++ return rq2;
++ }
++}
++
++/*
++ * Tell whether there are active queues or groups with differentiated weights.
++ */
++static bool bfq_differentiated_weights(struct bfq_data *bfqd)
++{
++ /*
++ * For weights to differ, at least one of the trees must contain
++ * at least two nodes.
++ */
++ return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
++ (bfqd->queue_weights_tree.rb_node->rb_left ||
++ bfqd->queue_weights_tree.rb_node->rb_right)
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ ) ||
++ (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
++ (bfqd->group_weights_tree.rb_node->rb_left ||
++ bfqd->group_weights_tree.rb_node->rb_right)
++#endif
++ );
++}
++
++/*
++ * The following function returns true if every queue must receive the
++ * same share of the throughput (this condition is used when deciding
++ * whether idling may be disabled, see the comments in the function
++ * bfq_bfqq_may_idle()).
++ *
++ * Such a scenario occurs when:
++ * 1) all active queues have the same weight,
++ * 2) all active groups at the same level in the groups tree have the same
++ * weight,
++ * 3) all active groups at the same level in the groups tree have the same
++ * number of children.
++ *
++ * Unfortunately, keeping the necessary state for evaluating exactly the
++ * above symmetry conditions would be quite complex and time-consuming.
++ * Therefore this function evaluates, instead, the following stronger
++ * sub-conditions, for which it is much easier to maintain the needed
++ * state:
++ * 1) all active queues have the same weight,
++ * 2) all active groups have the same weight,
++ * 3) all active groups have at most one active child each.
++ * In particular, the last two conditions are always true if hierarchical
++ * support and the cgroups interface are not enabled, thus no state needs
++ * to be maintained in this case.
++ */
++static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
++{
++ return
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ !bfqd->active_numerous_groups &&
++#endif
++ !bfq_differentiated_weights(bfqd);
++}
++
++/*
++ * If the weight-counter tree passed as input contains no counter for
++ * the weight of the input entity, then add that counter; otherwise just
++ * increment the existing counter.
++ *
++ * Note that weight-counter trees contain few nodes in mostly symmetric
++ * scenarios. For example, if all queues have the same weight, then the
++ * weight-counter tree for the queues may contain at most one node.
++ * This holds even if low_latency is on, because weight-raised queues
++ * are not inserted in the tree.
++ * In most scenarios, the rate at which nodes are created/destroyed
++ * should be low too.
++ */
++static void bfq_weights_tree_add(struct bfq_data *bfqd,
++ struct bfq_entity *entity,
++ struct rb_root *root)
++{
++ struct rb_node **new = &(root->rb_node), *parent = NULL;
++
++ /*
++ * Do not insert if the entity is already associated with a
++ * counter, which happens if:
++ * 1) the entity is associated with a queue,
++ * 2) a request arrival has caused the queue to become both
++ * non-weight-raised, and hence change its weight, and
++ * backlogged; in this respect, each of the two events
++ * causes an invocation of this function,
++ * 3) this is the invocation of this function caused by the
++ * second event. This second invocation is actually useless,
++ * and we handle this fact by exiting immediately. More
++ * efficient or clearer solutions might possibly be adopted.
++ */
++ if (entity->weight_counter)
++ return;
++
++ while (*new) {
++ struct bfq_weight_counter *__counter = container_of(*new,
++ struct bfq_weight_counter,
++ weights_node);
++ parent = *new;
++
++ if (entity->weight == __counter->weight) {
++ entity->weight_counter = __counter;
++ goto inc_counter;
++ }
++ if (entity->weight < __counter->weight)
++ new = &((*new)->rb_left);
++ else
++ new = &((*new)->rb_right);
++ }
++
++ entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
++ GFP_ATOMIC);
++ entity->weight_counter->weight = entity->weight;
++ rb_link_node(&entity->weight_counter->weights_node, parent, new);
++ rb_insert_color(&entity->weight_counter->weights_node, root);
++
++inc_counter:
++ entity->weight_counter->num_active++;
++}
++
++/*
++ * Decrement the weight counter associated with the entity, and, if the
++ * counter reaches 0, remove the counter from the tree.
++ * See the comments to the function bfq_weights_tree_add() for considerations
++ * about overhead.
++ */
++static void bfq_weights_tree_remove(struct bfq_data *bfqd,
++ struct bfq_entity *entity,
++ struct rb_root *root)
++{
++ if (!entity->weight_counter)
++ return;
++
++ BUG_ON(RB_EMPTY_ROOT(root));
++ BUG_ON(entity->weight_counter->weight != entity->weight);
++
++ BUG_ON(!entity->weight_counter->num_active);
++ entity->weight_counter->num_active--;
++ if (entity->weight_counter->num_active > 0)
++ goto reset_entity_pointer;
++
++ rb_erase(&entity->weight_counter->weights_node, root);
++ kfree(entity->weight_counter);
++
++reset_entity_pointer:
++ entity->weight_counter = NULL;
++}
++
++static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ struct request *last)
++{
++ struct rb_node *rbnext = rb_next(&last->rb_node);
++ struct rb_node *rbprev = rb_prev(&last->rb_node);
++ struct request *next = NULL, *prev = NULL;
++
++ BUG_ON(RB_EMPTY_NODE(&last->rb_node));
++
++ if (rbprev)
++ prev = rb_entry_rq(rbprev);
++
++ if (rbnext)
++ next = rb_entry_rq(rbnext);
++ else {
++ rbnext = rb_first(&bfqq->sort_list);
++ if (rbnext && rbnext != &last->rb_node)
++ next = rb_entry_rq(rbnext);
++ }
++
++ return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
++}
++
++/* see the definition of bfq_async_charge_factor for details */
++static unsigned long bfq_serv_to_charge(struct request *rq,
++ struct bfq_queue *bfqq)
++{
++ return blk_rq_sectors(rq) *
++ (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) *
++ bfq_async_charge_factor));
++}
++
++/**
++ * bfq_updated_next_req - update the queue after a new next_rq selection.
++ * @bfqd: the device data the queue belongs to.
++ * @bfqq: the queue to update.
++ *
++ * If the first request of a queue changes we make sure that the queue
++ * has enough budget to serve at least its first request (if the
++ * request has grown). We do this because if the queue has not enough
++ * budget for its first request, it has to go through two dispatch
++ * rounds to actually get it dispatched.
++ */
++static void bfq_updated_next_req(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++ struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++ struct request *next_rq = bfqq->next_rq;
++ unsigned long new_budget;
++
++ if (!next_rq)
++ return;
++
++ if (bfqq == bfqd->in_service_queue)
++ /*
++ * In order not to break guarantees, budgets cannot be
++ * changed after an entity has been selected.
++ */
++ return;
++
++ BUG_ON(entity->tree != &st->active);
++ BUG_ON(entity == entity->sched_data->in_service_entity);
++
++ new_budget = max_t(unsigned long, bfqq->max_budget,
++ bfq_serv_to_charge(next_rq, bfqq));
++ if (entity->budget != new_budget) {
++ entity->budget = new_budget;
++ bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu",
++ new_budget);
++ bfq_activate_bfqq(bfqd, bfqq);
++ }
++}
++
++static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
++{
++ u64 dur;
++
++ if (bfqd->bfq_wr_max_time > 0)
++ return bfqd->bfq_wr_max_time;
++
++ dur = bfqd->RT_prod;
++ do_div(dur, bfqd->peak_rate);
++
++ return dur;
++}
++
++/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */
++static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++ struct bfq_queue *item;
++ struct hlist_node *n;
++
++ hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
++ hlist_del_init(&item->burst_list_node);
++ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
++ bfqd->burst_size = 1;
++}
++
++/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
++static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++ /* Increment burst size to take into account also bfqq */
++ bfqd->burst_size++;
++
++ if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
++ struct bfq_queue *pos, *bfqq_item;
++ struct hlist_node *n;
++
++ /*
++ * Enough queues have been activated shortly after each
++ * other to consider this burst as large.
++ */
++ bfqd->large_burst = true;
++
++ /*
++ * We can now mark all queues in the burst list as
++ * belonging to a large burst.
++ */
++ hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
++ burst_list_node)
++ bfq_mark_bfqq_in_large_burst(bfqq_item);
++ bfq_mark_bfqq_in_large_burst(bfqq);
++
++ /*
++ * From now on, and until the current burst finishes, any
++ * new queue being activated shortly after the last queue
++ * was inserted in the burst can be immediately marked as
++ * belonging to a large burst. So the burst list is not
++ * needed any more. Remove it.
++ */
++ hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
++ burst_list_node)
++ hlist_del_init(&pos->burst_list_node);
++ } else /* burst not yet large: add bfqq to the burst list */
++ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
++}
++
++/*
++ * If many queues happen to become active shortly after each other, then,
++ * to help the processes associated to these queues get their job done as
++ * soon as possible, it is usually better to not grant either weight-raising
++ * or device idling to these queues. In this comment we describe, firstly,
++ * the reasons why this fact holds, and, secondly, the next function, which
++ * implements the main steps needed to properly mark these queues so that
++ * they can then be treated in a different way.
++ *
++ * As for the terminology, we say that a queue becomes active, i.e.,
++ * switches from idle to backlogged, either when it is created (as a
++ * consequence of the arrival of an I/O request), or, if already existing,
++ * when a new request for the queue arrives while the queue is idle.
++ * Bursts of activations, i.e., activations of different queues occurring
++ * shortly after each other, are typically caused by services or applications
++ * that spawn or reactivate many parallel threads/processes. Examples are
++ * systemd during boot or git grep.
++ *
++ * These services or applications benefit mostly from a high throughput:
++ * the quicker the requests of the activated queues are cumulatively served,
++ * the sooner the target job of these queues gets completed. As a consequence,
++ * weight-raising any of these queues, which also implies idling the device
++ * for it, is almost always counterproductive: in most cases it just lowers
++ * throughput.
++ *
++ * On the other hand, a burst of activations may be also caused by the start
++ * of an application that does not consist in a lot of parallel I/O-bound
++ * threads. In fact, with a complex application, the burst may be just a
++ * consequence of the fact that several processes need to be executed to
++ * start-up the application. To start an application as quickly as possible,
++ * the best thing to do is to privilege the I/O related to the application
++ * with respect to all other I/O. Therefore, the best strategy to start as
++ * quickly as possible an application that causes a burst of activations is
++ * to weight-raise all the queues activated during the burst. This is the
++ * exact opposite of the best strategy for the other type of bursts.
++ *
++ * In the end, to take the best action for each of the two cases, the two
++ * types of bursts need to be distinguished. Fortunately, this seems
++ * relatively easy to do, by looking at the sizes of the bursts. In
++ * particular, we found a threshold such that bursts with a larger size
++ * than that threshold are apparently caused only by services or commands
++ * such as systemd or git grep. For brevity, hereafter we call just 'large'
++ * these bursts. BFQ *does not* weight-raise queues whose activations occur
++ * in a large burst. In addition, for each of these queues BFQ performs or
++ * does not perform idling depending on which choice boosts the throughput
++ * most. The exact choice depends on the device and request pattern at
++ * hand.
++ *
++ * Turning back to the next function, it implements all the steps needed
++ * to detect the occurrence of a large burst and to properly mark all the
++ * queues belonging to it (so that they can then be treated in a different
++ * way). This goal is achieved by maintaining a special "burst list" that
++ * holds, temporarily, the queues that belong to the burst in progress. The
++ * list is then used to mark these queues as belonging to a large burst if
++ * the burst does become large. The main steps are the following.
++ *
++ * . when the very first queue is activated, the queue is inserted into the
++ * list (as it could be the first queue in a possible burst)
++ *
++ * . if the current burst has not yet become large, and a queue Q that does
++ * not yet belong to the burst is activated shortly after the last time
++ * at which a new queue entered the burst list, then the function appends
++ * Q to the burst list
++ *
++ * . if, as a consequence of the previous step, the burst size reaches
++ * the large-burst threshold, then
++ *
++ * . all the queues in the burst list are marked as belonging to a
++ * large burst
++ *
++ * . the burst list is deleted; in fact, the burst list already served
++ * its purpose (keeping temporarily track of the queues in a burst,
++ * so as to be able to mark them as belonging to a large burst in the
++ * previous sub-step), and now is not needed any more
++ *
++ * . the device enters a large-burst mode
++ *
++ * . if a queue Q that does not belong to the burst is activated while
++ * the device is in large-burst mode and shortly after the last time
++ * at which a queue either entered the burst list or was marked as
++ * belonging to the current large burst, then Q is immediately marked
++ * as belonging to a large burst.
++ *
++ * . if a queue Q that does not belong to the burst is activated a while
++ * later, i.e., not shortly after, than the last time at which a queue
++ * either entered the burst list or was marked as belonging to the
++ * current large burst, then the current burst is deemed as finished and:
++ *
++ * . the large-burst mode is reset if set
++ *
++ * . the burst list is emptied
++ *
++ * . Q is inserted in the burst list, as Q may be the first queue
++ * in a possible new burst (then the burst list contains just Q
++ * after this step).
++ */
++static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ bool idle_for_long_time)
++{
++ /*
++ * If bfqq happened to be activated in a burst, but has been idle
++ * for at least as long as an interactive queue, then we assume
++ * that, in the overall I/O initiated in the burst, the I/O
++ * associated to bfqq is finished. So bfqq does not need to be
++ * treated as a queue belonging to a burst anymore. Accordingly,
++ * we reset bfqq's in_large_burst flag if set, and remove bfqq
++ * from the burst list if it's there. We do not decrement instead
++ * burst_size, because the fact that bfqq does not need to belong
++ * to the burst list any more does not invalidate the fact that
++ * bfqq may have been activated during the current burst.
++ */
++ if (idle_for_long_time) {
++ hlist_del_init(&bfqq->burst_list_node);
++ bfq_clear_bfqq_in_large_burst(bfqq);
++ }
++
++ /*
++ * If bfqq is already in the burst list or is part of a large
++ * burst, then there is nothing else to do.
++ */
++ if (!hlist_unhashed(&bfqq->burst_list_node) ||
++ bfq_bfqq_in_large_burst(bfqq))
++ return;
++
++ /*
++ * If bfqq's activation happens late enough, then the current
++ * burst is finished, and related data structures must be reset.
++ *
++ * In this respect, consider the special case where bfqq is the very
++ * first queue being activated. In this case, last_ins_in_burst is
++ * not yet significant when we get here. But it is easy to verify
++ * that, whether or not the following condition is true, bfqq will
++ * end up being inserted into the burst list. In particular the
++ * list will happen to contain only bfqq. And this is exactly what
++ * has to happen, as bfqq may be the first queue in a possible
++ * burst.
++ */
++ if (time_is_before_jiffies(bfqd->last_ins_in_burst +
++ bfqd->bfq_burst_interval)) {
++ bfqd->large_burst = false;
++ bfq_reset_burst_list(bfqd, bfqq);
++ return;
++ }
++
++ /*
++ * If we get here, then bfqq is being activated shortly after the
++ * last queue. So, if the current burst is also large, we can mark
++ * bfqq as belonging to this large burst immediately.
++ */
++ if (bfqd->large_burst) {
++ bfq_mark_bfqq_in_large_burst(bfqq);
++ return;
++ }
++
++ /*
++ * If we get here, then a large-burst state has not yet been
++ * reached, but bfqq is being activated shortly after the last
++ * queue. Then we add bfqq to the burst.
++ */
++ bfq_add_to_burst(bfqd, bfqq);
++}
++
++static void bfq_add_request(struct request *rq)
++{
++ struct bfq_queue *bfqq = RQ_BFQQ(rq);
++ struct bfq_entity *entity = &bfqq->entity;
++ struct bfq_data *bfqd = bfqq->bfqd;
++ struct request *next_rq, *prev;
++ unsigned long old_wr_coeff = bfqq->wr_coeff;
++ bool interactive = false;
++
++ bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
++ bfqq->queued[rq_is_sync(rq)]++;
++ bfqd->queued++;
++
++ elv_rb_add(&bfqq->sort_list, rq);
++
++ /*
++ * Check if this request is a better next-serve candidate.
++ */
++ prev = bfqq->next_rq;
++ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
++ BUG_ON(!next_rq);
++ bfqq->next_rq = next_rq;
++
++ if (!bfq_bfqq_busy(bfqq)) {
++ bool soft_rt, in_burst,
++ idle_for_long_time = time_is_before_jiffies(
++ bfqq->budget_timeout +
++ bfqd->bfq_wr_min_idle_time);
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq,
++ rq->cmd_flags);
++#endif
++ if (bfq_bfqq_sync(bfqq)) {
++ bool already_in_burst =
++ !hlist_unhashed(&bfqq->burst_list_node) ||
++ bfq_bfqq_in_large_burst(bfqq);
++ bfq_handle_burst(bfqd, bfqq, idle_for_long_time);
++ /*
++ * If bfqq was not already in the current burst,
++ * then, at this point, bfqq either has been
++ * added to the current burst or has caused the
++ * current burst to terminate. In particular, in
++ * the second case, bfqq has become the first
++ * queue in a possible new burst.
++ * In both cases last_ins_in_burst needs to be
++ * moved forward.
++ */
++ if (!already_in_burst)
++ bfqd->last_ins_in_burst = jiffies;
++ }
++
++ in_burst = bfq_bfqq_in_large_burst(bfqq);
++ soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
++ !in_burst &&
++ time_is_before_jiffies(bfqq->soft_rt_next_start);
++ interactive = !in_burst && idle_for_long_time;
++ entity->budget = max_t(unsigned long, bfqq->max_budget,
++ bfq_serv_to_charge(next_rq, bfqq));
++
++ if (!bfq_bfqq_IO_bound(bfqq)) {
++ if (time_before(jiffies,
++ RQ_BIC(rq)->ttime.last_end_request +
++ bfqd->bfq_slice_idle)) {
++ bfqq->requests_within_timer++;
++ if (bfqq->requests_within_timer >=
++ bfqd->bfq_requests_within_timer)
++ bfq_mark_bfqq_IO_bound(bfqq);
++ } else
++ bfqq->requests_within_timer = 0;
++ }
++
++ if (!bfqd->low_latency)
++ goto add_bfqq_busy;
++
++ /*
++ * If the queue:
++ * - is not being boosted,
++ * - has been idle for enough time,
++ * - is not a sync queue or is linked to a bfq_io_cq (it is
++ * shared "for its nature" or it is not shared and its
++ * requests have not been redirected to a shared queue)
++ * start a weight-raising period.
++ */
++ if (old_wr_coeff == 1 && (interactive || soft_rt) &&
++ (!bfq_bfqq_sync(bfqq) || bfqq->bic)) {
++ bfqq->wr_coeff = bfqd->bfq_wr_coeff;
++ if (interactive)
++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++ else
++ bfqq->wr_cur_max_time =
++ bfqd->bfq_wr_rt_max_time;
++ bfq_log_bfqq(bfqd, bfqq,
++ "wrais starting at %lu, rais_max_time %u",
++ jiffies,
++ jiffies_to_msecs(bfqq->wr_cur_max_time));
++ } else if (old_wr_coeff > 1) {
++ if (interactive)
++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++ else if (in_burst ||
++ (bfqq->wr_cur_max_time ==
++ bfqd->bfq_wr_rt_max_time &&
++ !soft_rt)) {
++ bfqq->wr_coeff = 1;
++ bfq_log_bfqq(bfqd, bfqq,
++ "wrais ending at %lu, rais_max_time %u",
++ jiffies,
++ jiffies_to_msecs(bfqq->
++ wr_cur_max_time));
++ } else if (time_before(
++ bfqq->last_wr_start_finish +
++ bfqq->wr_cur_max_time,
++ jiffies +
++ bfqd->bfq_wr_rt_max_time) &&
++ soft_rt) {
++ /*
++ *
++ * The remaining weight-raising time is lower
++ * than bfqd->bfq_wr_rt_max_time, which means
++ * that the application is enjoying weight
++ * raising either because deemed soft-rt in
++ * the near past, or because deemed interactive
++ * a long ago.
++ * In both cases, resetting now the current
++ * remaining weight-raising time for the
++ * application to the weight-raising duration
++ * for soft rt applications would not cause any
++ * latency increase for the application (as the
++ * new duration would be higher than the
++ * remaining time).
++ *
++ * In addition, the application is now meeting
++ * the requirements for being deemed soft rt.
++ * In the end we can correctly and safely
++ * (re)charge the weight-raising duration for
++ * the application with the weight-raising
++ * duration for soft rt applications.
++ *
++ * In particular, doing this recharge now, i.e.,
++ * before the weight-raising period for the
++ * application finishes, reduces the probability
++ * of the following negative scenario:
++ * 1) the weight of a soft rt application is
++ * raised at startup (as for any newly
++ * created application),
++ * 2) since the application is not interactive,
++ * at a certain time weight-raising is
++ * stopped for the application,
++ * 3) at that time the application happens to
++ * still have pending requests, and hence
++ * is destined to not have a chance to be
++ * deemed soft rt before these requests are
++ * completed (see the comments to the
++ * function bfq_bfqq_softrt_next_start()
++ * for details on soft rt detection),
++ * 4) these pending requests experience a high
++ * latency because the application is not
++ * weight-raised while they are pending.
++ */
++ bfqq->last_wr_start_finish = jiffies;
++ bfqq->wr_cur_max_time =
++ bfqd->bfq_wr_rt_max_time;
++ }
++ }
++ if (old_wr_coeff != bfqq->wr_coeff)
++ entity->prio_changed = 1;
++add_bfqq_busy:
++ bfqq->last_idle_bklogged = jiffies;
++ bfqq->service_from_backlogged = 0;
++ bfq_clear_bfqq_softrt_update(bfqq);
++ bfq_add_bfqq_busy(bfqd, bfqq);
++ } else {
++ if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
++ time_is_before_jiffies(
++ bfqq->last_wr_start_finish +
++ bfqd->bfq_wr_min_inter_arr_async)) {
++ bfqq->wr_coeff = bfqd->bfq_wr_coeff;
++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++
++ bfqd->wr_busy_queues++;
++ entity->prio_changed = 1;
++ bfq_log_bfqq(bfqd, bfqq,
++ "non-idle wrais starting at %lu, rais_max_time %u",
++ jiffies,
++ jiffies_to_msecs(bfqq->wr_cur_max_time));
++ }
++ if (prev != bfqq->next_rq)
++ bfq_updated_next_req(bfqd, bfqq);
++ }
++
++ if (bfqd->low_latency &&
++ (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
++ bfqq->last_wr_start_finish = jiffies;
++}
++
++static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
++ struct bio *bio)
++{
++ struct task_struct *tsk = current;
++ struct bfq_io_cq *bic;
++ struct bfq_queue *bfqq;
++
++ bic = bfq_bic_lookup(bfqd, tsk->io_context);
++ if (!bic)
++ return NULL;
++
++ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
++ if (bfqq)
++ return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));
++
++ return NULL;
++}
++
++static void bfq_activate_request(struct request_queue *q, struct request *rq)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++
++ bfqd->rq_in_driver++;
++ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
++ bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",
++ (long long unsigned)bfqd->last_position);
++}
++
++static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++
++ BUG_ON(bfqd->rq_in_driver == 0);
++ bfqd->rq_in_driver--;
++}
++
++static void bfq_remove_request(struct request *rq)
++{
++ struct bfq_queue *bfqq = RQ_BFQQ(rq);
++ struct bfq_data *bfqd = bfqq->bfqd;
++ const int sync = rq_is_sync(rq);
++
++ if (bfqq->next_rq == rq) {
++ bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
++ bfq_updated_next_req(bfqd, bfqq);
++ }
++
++ if (rq->queuelist.prev != &rq->queuelist)
++ list_del_init(&rq->queuelist);
++ BUG_ON(bfqq->queued[sync] == 0);
++ bfqq->queued[sync]--;
++ bfqd->queued--;
++ elv_rb_del(&bfqq->sort_list, rq);
++
++ if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue)
++ bfq_del_bfqq_busy(bfqd, bfqq, 1);
++ /*
++ * Remove queue from request-position tree as it is empty.
++ */
++ if (bfqq->pos_root) {
++ rb_erase(&bfqq->pos_node, bfqq->pos_root);
++ bfqq->pos_root = NULL;
++ }
++ }
++
++ if (rq->cmd_flags & REQ_META) {
++ BUG_ON(bfqq->meta_pending == 0);
++ bfqq->meta_pending--;
++ }
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
++#endif
++}
++
++static int bfq_merge(struct request_queue *q, struct request **req,
++ struct bio *bio)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++ struct request *__rq;
++
++ __rq = bfq_find_rq_fmerge(bfqd, bio);
++ if (__rq && elv_rq_merge_ok(__rq, bio)) {
++ *req = __rq;
++ return ELEVATOR_FRONT_MERGE;
++ }
++
++ return ELEVATOR_NO_MERGE;
++}
++
++static void bfq_merged_request(struct request_queue *q, struct request *req,
++ int type)
++{
++ if (type == ELEVATOR_FRONT_MERGE &&
++ rb_prev(&req->rb_node) &&
++ blk_rq_pos(req) <
++ blk_rq_pos(container_of(rb_prev(&req->rb_node),
++ struct request, rb_node))) {
++ struct bfq_queue *bfqq = RQ_BFQQ(req);
++ struct bfq_data *bfqd = bfqq->bfqd;
++ struct request *prev, *next_rq;
++
++ /* Reposition request in its sort_list */
++ elv_rb_del(&bfqq->sort_list, req);
++ elv_rb_add(&bfqq->sort_list, req);
++ /* Choose next request to be served for bfqq */
++ prev = bfqq->next_rq;
++ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req,
++ bfqd->last_position);
++ BUG_ON(!next_rq);
++ bfqq->next_rq = next_rq;
++ }
++}
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static void bfq_bio_merged(struct request_queue *q, struct request *req,
++ struct bio *bio)
++{
++ bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_rw);
++}
++#endif
++
++static void bfq_merged_requests(struct request_queue *q, struct request *rq,
++ struct request *next)
++{
++ struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
++
++ /*
++ * If next and rq belong to the same bfq_queue and next is older
++ * than rq, then reposition rq in the fifo (by substituting next
++ * with rq). Otherwise, if next and rq belong to different
++ * bfq_queues, never reposition rq: in fact, we would have to
++ * reposition it with respect to next's position in its own fifo,
++ * which would most certainly be too expensive with respect to
++ * the benefits.
++ */
++ if (bfqq == next_bfqq &&
++ !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
++ time_before(next->fifo_time, rq->fifo_time)) {
++ list_del_init(&rq->queuelist);
++ list_replace_init(&next->queuelist, &rq->queuelist);
++ rq->fifo_time = next->fifo_time;
++ }
++
++ if (bfqq->next_rq == next)
++ bfqq->next_rq = rq;
++
++ bfq_remove_request(next);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
++#endif
++}
++
++/* Must be called with bfqq != NULL */
++static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
++{
++ BUG_ON(!bfqq);
++ if (bfq_bfqq_busy(bfqq))
++ bfqq->bfqd->wr_busy_queues--;
++ bfqq->wr_coeff = 1;
++ bfqq->wr_cur_max_time = 0;
++ /* Trigger a weight change on the next activation of the queue */
++ bfqq->entity.prio_changed = 1;
++}
++
++static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
++ struct bfq_group *bfqg)
++{
++ int i, j;
++
++ for (i = 0; i < 2; i++)
++ for (j = 0; j < IOPRIO_BE_NR; j++)
++ if (bfqg->async_bfqq[i][j])
++ bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
++ if (bfqg->async_idle_bfqq)
++ bfq_bfqq_end_wr(bfqg->async_idle_bfqq);
++}
++
++static void bfq_end_wr(struct bfq_data *bfqd)
++{
++ struct bfq_queue *bfqq;
++
++ spin_lock_irq(bfqd->queue->queue_lock);
++
++ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
++ bfq_bfqq_end_wr(bfqq);
++ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
++ bfq_bfqq_end_wr(bfqq);
++ bfq_end_wr_async(bfqd);
++
++ spin_unlock_irq(bfqd->queue->queue_lock);
++}
++
++static int bfq_allow_merge(struct request_queue *q, struct request *rq,
++ struct bio *bio)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++ struct bfq_io_cq *bic;
++
++ /*
++ * Disallow merge of a sync bio into an async request.
++ */
++ if (bfq_bio_sync(bio) && !rq_is_sync(rq))
++ return 0;
++
++ /*
++ * Lookup the bfqq that this bio will be queued with. Allow
++ * merge only if rq is queued there.
++ * Queue lock is held here.
++ */
++ bic = bfq_bic_lookup(bfqd, current->io_context);
++ if (!bic)
++ return 0;
++
++ return bic_to_bfqq(bic, bfq_bio_sync(bio)) == RQ_BFQQ(rq);
++}
++
++static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq)
++{
++ if (bfqq) {
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
++#endif
++ bfq_mark_bfqq_must_alloc(bfqq);
++ bfq_mark_bfqq_budget_new(bfqq);
++ bfq_clear_bfqq_fifo_expire(bfqq);
++
++ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
++
++ bfq_log_bfqq(bfqd, bfqq,
++ "set_in_service_queue, cur-budget = %d",
++ bfqq->entity.budget);
++ }
++
++ bfqd->in_service_queue = bfqq;
++}
++
++/*
++ * Get and set a new queue for service.
++ */
++static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
++{
++ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
++
++ __bfq_set_in_service_queue(bfqd, bfqq);
++ return bfqq;
++}
++
++/*
++ * If enough samples have been computed, return the current max budget
++ * stored in bfqd, which is dynamically updated according to the
++ * estimated disk peak rate; otherwise return the default max budget
++ */
++static int bfq_max_budget(struct bfq_data *bfqd)
++{
++ if (bfqd->budgets_assigned < bfq_stats_min_budgets)
++ return bfq_default_max_budget;
++ else
++ return bfqd->bfq_max_budget;
++}
++
++/*
++ * Return min budget, which is a fraction of the current or default
++ * max budget (trying with 1/32)
++ */
++static int bfq_min_budget(struct bfq_data *bfqd)
++{
++ if (bfqd->budgets_assigned < bfq_stats_min_budgets)
++ return bfq_default_max_budget / 32;
++ else
++ return bfqd->bfq_max_budget / 32;
++}
++
++static void bfq_arm_slice_timer(struct bfq_data *bfqd)
++{
++ struct bfq_queue *bfqq = bfqd->in_service_queue;
++ struct bfq_io_cq *bic;
++ unsigned long sl;
++
++ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++
++ /* Processes have exited, don't wait. */
++ bic = bfqd->in_service_bic;
++ if (!bic || atomic_read(&bic->icq.ioc->active_ref) == 0)
++ return;
++
++ bfq_mark_bfqq_wait_request(bfqq);
++
++ /*
++ * We don't want to idle for seeks, but we do want to allow
++ * fair distribution of slice time for a process doing back-to-back
++ * seeks. So allow a little bit of time for him to submit a new rq.
++ *
++ * To prevent processes with (partly) seeky workloads from
++ * being too ill-treated, grant them a small fraction of the
++ * assigned budget before reducing the waiting time to
++ * BFQ_MIN_TT. This happened to help reduce latency.
++ */
++ sl = bfqd->bfq_slice_idle;
++ /*
++ * Unless the queue is being weight-raised or the scenario is
++ * asymmetric, grant only minimum idle time if the queue either
++ * has been seeky for long enough or has already proved to be
++ * constantly seeky.
++ */
++ if (bfq_sample_valid(bfqq->seek_samples) &&
++ ((BFQQ_SEEKY(bfqq) && bfqq->entity.service >
++ bfq_max_budget(bfqq->bfqd) / 8) ||
++ bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 &&
++ bfq_symmetric_scenario(bfqd))
++ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
++ else if (bfqq->wr_coeff > 1)
++ sl = sl * 3;
++ bfqd->last_idling_start = ktime_get();
++ mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
++#endif
++ bfq_log(bfqd, "arm idle: %u/%u ms",
++ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
++}
++
++/*
++ * Set the maximum time for the in-service queue to consume its
++ * budget. This prevents seeky processes from lowering the disk
++ * throughput (always guaranteed with a time slice scheme as in CFQ).
++ */
++static void bfq_set_budget_timeout(struct bfq_data *bfqd)
++{
++ struct bfq_queue *bfqq = bfqd->in_service_queue;
++ unsigned int timeout_coeff;
++ if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
++ timeout_coeff = 1;
++ else
++ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
++
++ bfqd->last_budget_start = ktime_get();
++
++ bfq_clear_bfqq_budget_new(bfqq);
++ bfqq->budget_timeout = jiffies +
++ bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
++
++ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
++ jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
++ timeout_coeff));
++}
++
++/*
++ * Move request from internal lists to the request queue dispatch list.
++ */
++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++ struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++ /*
++ * For consistency, the next instruction should have been executed
++ * after removing the request from the queue and dispatching it.
++ * We execute instead this instruction before bfq_remove_request()
++ * (and hence introduce a temporary inconsistency), for efficiency.
++ * In fact, in a forced_dispatch, this prevents two counters related
++ * to bfqq->dispatched to risk to be uselessly decremented if bfqq
++ * is not in service, and then to be incremented again after
++ * incrementing bfqq->dispatched.
++ */
++ bfqq->dispatched++;
++ bfq_remove_request(rq);
++ elv_dispatch_sort(q, rq);
++
++ if (bfq_bfqq_sync(bfqq))
++ bfqd->sync_flight++;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_dispatch(bfqq_group(bfqq), blk_rq_bytes(rq),
++ rq->cmd_flags);
++#endif
++}
++
++/*
++ * Return expired entry, or NULL to just start from scratch in rbtree.
++ */
++static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
++{
++ struct request *rq = NULL;
++
++ if (bfq_bfqq_fifo_expire(bfqq))
++ return NULL;
++
++ bfq_mark_bfqq_fifo_expire(bfqq);
++
++ if (list_empty(&bfqq->fifo))
++ return NULL;
++
++ rq = rq_entry_fifo(bfqq->fifo.next);
++
++ if (time_before(jiffies, rq->fifo_time))
++ return NULL;
++
++ return rq;
++}
++
++static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++ return entity->budget - entity->service;
++}
++
++static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++ BUG_ON(bfqq != bfqd->in_service_queue);
++
++ __bfq_bfqd_reset_in_service(bfqd);
++
++ if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++ /*
++ * Overloading budget_timeout field to store the time
++ * at which the queue remains with no backlog; used by
++ * the weight-raising mechanism.
++ */
++ bfqq->budget_timeout = jiffies;
++ bfq_del_bfqq_busy(bfqd, bfqq, 1);
++ } else
++ bfq_activate_bfqq(bfqd, bfqq);
++}
++
++/**
++ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
++ * @bfqd: device data.
++ * @bfqq: queue to update.
++ * @reason: reason for expiration.
++ *
++ * Handle the feedback on @bfqq budget at queue expiration.
++ * See the body for detailed comments.
++ */
++static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ enum bfqq_expiration reason)
++{
++ struct request *next_rq;
++ int budget, min_budget;
++
++ budget = bfqq->max_budget;
++ min_budget = bfq_min_budget(bfqd);
++
++ BUG_ON(bfqq != bfqd->in_service_queue);
++
++ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
++ bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
++ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d",
++ budget, bfq_min_budget(bfqd));
++ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
++ bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
++
++ if (bfq_bfqq_sync(bfqq)) {
++ switch (reason) {
++ /*
++ * Caveat: in all the following cases we trade latency
++ * for throughput.
++ */
++ case BFQ_BFQQ_TOO_IDLE:
++ /*
++ * This is the only case where we may reduce
++ * the budget: if there is no request of the
++ * process still waiting for completion, then
++ * we assume (tentatively) that the timer has
++ * expired because the batch of requests of
++ * the process could have been served with a
++ * smaller budget. Hence, betting that
++ * process will behave in the same way when it
++ * becomes backlogged again, we reduce its
++ * next budget. As long as we guess right,
++ * this budget cut reduces the latency
++ * experienced by the process.
++ *
++ * However, if there are still outstanding
++ * requests, then the process may have not yet
++ * issued its next request just because it is
++ * still waiting for the completion of some of
++ * the still outstanding ones. So in this
++ * subcase we do not reduce its budget, on the
++ * contrary we increase it to possibly boost
++ * the throughput, as discussed in the
++ * comments to the BUDGET_TIMEOUT case.
++ */
++ if (bfqq->dispatched > 0) /* still outstanding reqs */
++ budget = min(budget * 2, bfqd->bfq_max_budget);
++ else {
++ if (budget > 5 * min_budget)
++ budget -= 4 * min_budget;
++ else
++ budget = min_budget;
++ }
++ break;
++ case BFQ_BFQQ_BUDGET_TIMEOUT:
++ /*
++ * We double the budget here because: 1) it
++ * gives the chance to boost the throughput if
++ * this is not a seeky process (which may have
++ * bumped into this timeout because of, e.g.,
++ * ZBR), 2) together with charge_full_budget
++ * it helps give seeky processes higher
++ * timestamps, and hence be served less
++ * frequently.
++ */
++ budget = min(budget * 2, bfqd->bfq_max_budget);
++ break;
++ case BFQ_BFQQ_BUDGET_EXHAUSTED:
++ /*
++ * The process still has backlog, and did not
++ * let either the budget timeout or the disk
++ * idling timeout expire. Hence it is not
++ * seeky, has a short thinktime and may be
++ * happy with a higher budget too. So
++ * definitely increase the budget of this good
++ * candidate to boost the disk throughput.
++ */
++ budget = min(budget * 4, bfqd->bfq_max_budget);
++ break;
++ case BFQ_BFQQ_NO_MORE_REQUESTS:
++ /*
++ * Leave the budget unchanged.
++ */
++ default:
++ return;
++ }
++ } else
++ /*
++ * Async queues get always the maximum possible budget
++ * (their ability to dispatch is limited by
++ * @bfqd->bfq_max_budget_async_rq).
++ */
++ budget = bfqd->bfq_max_budget;
++
++ bfqq->max_budget = budget;
++
++ if (bfqd->budgets_assigned >= bfq_stats_min_budgets &&
++ !bfqd->bfq_user_max_budget)
++ bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
++
++ /*
++ * Make sure that we have enough budget for the next request.
++ * Since the finish time of the bfqq must be kept in sync with
++ * the budget, be sure to call __bfq_bfqq_expire() after the
++ * update.
++ */
++ next_rq = bfqq->next_rq;
++ if (next_rq)
++ bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
++ bfq_serv_to_charge(next_rq, bfqq));
++ else
++ bfqq->entity.budget = bfqq->max_budget;
++
++ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d",
++ next_rq ? blk_rq_sectors(next_rq) : 0,
++ bfqq->entity.budget);
++}
++
++static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
++{
++ unsigned long max_budget;
++
++ /*
++ * The max_budget calculated when autotuning is equal to the
++ * amount of sectors transfered in timeout_sync at the
++ * estimated peak rate.
++ */
++ max_budget = (unsigned long)(peak_rate * 1000 *
++ timeout >> BFQ_RATE_SHIFT);
++
++ return max_budget;
++}
++
++/*
++ * In addition to updating the peak rate, checks whether the process
++ * is "slow", and returns 1 if so. This slow flag is used, in addition
++ * to the budget timeout, to reduce the amount of service provided to
++ * seeky processes, and hence reduce their chances to lower the
++ * throughput. See the code for more details.
++ */
++static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ bool compensate, enum bfqq_expiration reason)
++{
++ u64 bw, usecs, expected, timeout;
++ ktime_t delta;
++ int update = 0;
++
++ if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
++ return false;
++
++ if (compensate)
++ delta = bfqd->last_idling_start;
++ else
++ delta = ktime_get();
++ delta = ktime_sub(delta, bfqd->last_budget_start);
++ usecs = ktime_to_us(delta);
++
++ /* Don't trust short/unrealistic values. */
++ if (usecs < 100 || usecs >= LONG_MAX)
++ return false;
++
++ /*
++ * Calculate the bandwidth for the last slice. We use a 64 bit
++ * value to store the peak rate, in sectors per usec in fixed
++ * point math. We do so to have enough precision in the estimate
++ * and to avoid overflows.
++ */
++ bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
++ do_div(bw, (unsigned long)usecs);
++
++ timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++
++ /*
++ * Use only long (> 20ms) intervals to filter out spikes for
++ * the peak rate estimation.
++ */
++ if (usecs > 20000) {
++ if (bw > bfqd->peak_rate ||
++ (!BFQQ_SEEKY(bfqq) &&
++ reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {
++ bfq_log(bfqd, "measured bw =%llu", bw);
++ /*
++ * To smooth oscillations use a low-pass filter with
++ * alpha=7/8, i.e.,
++ * new_rate = (7/8) * old_rate + (1/8) * bw
++ */
++ do_div(bw, 8);
++ if (bw == 0)
++ return 0;
++ bfqd->peak_rate *= 7;
++ do_div(bfqd->peak_rate, 8);
++ bfqd->peak_rate += bw;
++ update = 1;
++ bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);
++ }
++
++ update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
++
++ if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES)
++ bfqd->peak_rate_samples++;
++
++ if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&
++ update) {
++ int dev_type = blk_queue_nonrot(bfqd->queue);
++ if (bfqd->bfq_user_max_budget == 0) {
++ bfqd->bfq_max_budget =
++ bfq_calc_max_budget(bfqd->peak_rate,
++ timeout);
++ bfq_log(bfqd, "new max_budget=%d",
++ bfqd->bfq_max_budget);
++ }
++ if (bfqd->device_speed == BFQ_BFQD_FAST &&
++ bfqd->peak_rate < device_speed_thresh[dev_type]) {
++ bfqd->device_speed = BFQ_BFQD_SLOW;
++ bfqd->RT_prod = R_slow[dev_type] *
++ T_slow[dev_type];
++ } else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
++ bfqd->peak_rate > device_speed_thresh[dev_type]) {
++ bfqd->device_speed = BFQ_BFQD_FAST;
++ bfqd->RT_prod = R_fast[dev_type] *
++ T_fast[dev_type];
++ }
++ }
++ }
++
++ /*
++ * If the process has been served for a too short time
++ * interval to let its possible sequential accesses prevail on
++ * the initial seek time needed to move the disk head on the
++ * first sector it requested, then give the process a chance
++ * and for the moment return false.
++ */
++ if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
++ return false;
++
++ /*
++ * A process is considered ``slow'' (i.e., seeky, so that we
++ * cannot treat it fairly in the service domain, as it would
++ * slow down too much the other processes) if, when a slice
++ * ends for whatever reason, it has received service at a
++ * rate that would not be high enough to complete the budget
++ * before the budget timeout expiration.
++ */
++ expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;
++
++ /*
++ * Caveat: processes doing IO in the slower disk zones will
++ * tend to be slow(er) even if not seeky. And the estimated
++ * peak rate will actually be an average over the disk
++ * surface. Hence, to not be too harsh with unlucky processes,
++ * we keep a budget/3 margin of safety before declaring a
++ * process slow.
++ */
++ return expected > (4 * bfqq->entity.budget) / 3;
++}
++
++/*
++ * To be deemed as soft real-time, an application must meet two
++ * requirements. First, the application must not require an average
++ * bandwidth higher than the approximate bandwidth required to playback or
++ * record a compressed high-definition video.
++ * The next function is invoked on the completion of the last request of a
++ * batch, to compute the next-start time instant, soft_rt_next_start, such
++ * that, if the next request of the application does not arrive before
++ * soft_rt_next_start, then the above requirement on the bandwidth is met.
++ *
++ * The second requirement is that the request pattern of the application is
++ * isochronous, i.e., that, after issuing a request or a batch of requests,
++ * the application stops issuing new requests until all its pending requests
++ * have been completed. After that, the application may issue a new batch,
++ * and so on.
++ * For this reason the next function is invoked to compute
++ * soft_rt_next_start only for applications that meet this requirement,
++ * whereas soft_rt_next_start is set to infinity for applications that do
++ * not.
++ *
++ * Unfortunately, even a greedy application may happen to behave in an
++ * isochronous way if the CPU load is high. In fact, the application may
++ * stop issuing requests while the CPUs are busy serving other processes,
++ * then restart, then stop again for a while, and so on. In addition, if
++ * the disk achieves a low enough throughput with the request pattern
++ * issued by the application (e.g., because the request pattern is random
++ * and/or the device is slow), then the application may meet the above
++ * bandwidth requirement too. To prevent such a greedy application to be
++ * deemed as soft real-time, a further rule is used in the computation of
++ * soft_rt_next_start: soft_rt_next_start must be higher than the current
++ * time plus the maximum time for which the arrival of a request is waited
++ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle.
++ * This filters out greedy applications, as the latter issue instead their
++ * next request as soon as possible after the last one has been completed
++ * (in contrast, when a batch of requests is completed, a soft real-time
++ * application spends some time processing data).
++ *
++ * Unfortunately, the last filter may easily generate false positives if
++ * only bfqd->bfq_slice_idle is used as a reference time interval and one
++ * or both the following cases occur:
++ * 1) HZ is so low that the duration of a jiffy is comparable to or higher
++ * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with
++ * HZ=100.
++ * 2) jiffies, instead of increasing at a constant rate, may stop increasing
++ * for a while, then suddenly 'jump' by several units to recover the lost
++ * increments. This seems to happen, e.g., inside virtual machines.
++ * To address this issue, we do not use as a reference time interval just
++ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In
++ * particular we add the minimum number of jiffies for which the filter
++ * seems to be quite precise also in embedded systems and KVM/QEMU virtual
++ * machines.
++ */
++static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq)
++{
++ return max(bfqq->last_idle_bklogged +
++ HZ * bfqq->service_from_backlogged /
++ bfqd->bfq_wr_max_softrt_rate,
++ jiffies + bfqq->bfqd->bfq_slice_idle + 4);
++}
++
++/*
++ * Return the largest-possible time instant such that, for as long as possible,
++ * the current time will be lower than this time instant according to the macro
++ * time_is_before_jiffies().
++ */
++static unsigned long bfq_infinity_from_now(unsigned long now)
++{
++ return now + ULONG_MAX / 2;
++}
++
++/**
++ * bfq_bfqq_expire - expire a queue.
++ * @bfqd: device owning the queue.
++ * @bfqq: the queue to expire.
++ * @compensate: if true, compensate for the time spent idling.
++ * @reason: the reason causing the expiration.
++ *
++ *
++ * If the process associated to the queue is slow (i.e., seeky), or in
++ * case of budget timeout, or, finally, if it is async, we
++ * artificially charge it an entire budget (independently of the
++ * actual service it received). As a consequence, the queue will get
++ * higher timestamps than the correct ones upon reactivation, and
++ * hence it will be rescheduled as if it had received more service
++ * than what it actually received. In the end, this class of processes
++ * will receive less service in proportion to how slowly they consume
++ * their budgets (and hence how seriously they tend to lower the
++ * throughput).
++ *
++ * In contrast, when a queue expires because it has been idling for
++ * too much or because it exhausted its budget, we do not touch the
++ * amount of service it has received. Hence when the queue will be
++ * reactivated and its timestamps updated, the latter will be in sync
++ * with the actual service received by the queue until expiration.
++ *
++ * Charging a full budget to the first type of queues and the exact
++ * service to the others has the effect of using the WF2Q+ policy to
++ * schedule the former on a timeslice basis, without violating the
++ * service domain guarantees of the latter.
++ */
++static void bfq_bfqq_expire(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ bool compensate,
++ enum bfqq_expiration reason)
++{
++ bool slow;
++ BUG_ON(bfqq != bfqd->in_service_queue);
++
++ /*
++ * Update disk peak rate for autotuning and check whether the
++ * process is slow (see bfq_update_peak_rate).
++ */
++ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
++
++ /*
++ * As above explained, 'punish' slow (i.e., seeky), timed-out
++ * and async queues, to favor sequential sync workloads.
++ *
++ * Processes doing I/O in the slower disk zones will tend to be
++ * slow(er) even if not seeky. Hence, since the estimated peak
++ * rate is actually an average over the disk surface, these
++ * processes may timeout just for bad luck. To avoid punishing
++ * them we do not charge a full budget to a process that
++ * succeeded in consuming at least 2/3 of its budget.
++ */
++ if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
++ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3))
++ bfq_bfqq_charge_full_budget(bfqq);
++
++ bfqq->service_from_backlogged += bfqq->entity.service;
++
++ if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
++ !bfq_bfqq_constantly_seeky(bfqq)) {
++ bfq_mark_bfqq_constantly_seeky(bfqq);
++ if (!blk_queue_nonrot(bfqd->queue))
++ bfqd->const_seeky_busy_in_flight_queues++;
++ }
++
++ if (reason == BFQ_BFQQ_TOO_IDLE &&
++ bfqq->entity.service <= 2 * bfqq->entity.budget / 10 )
++ bfq_clear_bfqq_IO_bound(bfqq);
++
++ if (bfqd->low_latency && bfqq->wr_coeff == 1)
++ bfqq->last_wr_start_finish = jiffies;
++
++ if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
++ RB_EMPTY_ROOT(&bfqq->sort_list)) {
++ /*
++ * If we get here, and there are no outstanding requests,
++ * then the request pattern is isochronous (see the comments
++ * to the function bfq_bfqq_softrt_next_start()). Hence we
++ * can compute soft_rt_next_start. If, instead, the queue
++ * still has outstanding requests, then we have to wait
++ * for the completion of all the outstanding requests to
++ * discover whether the request pattern is actually
++ * isochronous.
++ */
++ if (bfqq->dispatched == 0)
++ bfqq->soft_rt_next_start =
++ bfq_bfqq_softrt_next_start(bfqd, bfqq);
++ else {
++ /*
++ * The application is still waiting for the
++ * completion of one or more requests:
++ * prevent it from possibly being incorrectly
++ * deemed as soft real-time by setting its
++ * soft_rt_next_start to infinity. In fact,
++ * without this assignment, the application
++ * would be incorrectly deemed as soft
++ * real-time if:
++ * 1) it issued a new request before the
++ * completion of all its in-flight
++ * requests, and
++ * 2) at that time, its soft_rt_next_start
++ * happened to be in the past.
++ */
++ bfqq->soft_rt_next_start =
++ bfq_infinity_from_now(jiffies);
++ /*
++ * Schedule an update of soft_rt_next_start to when
++ * the task may be discovered to be isochronous.
++ */
++ bfq_mark_bfqq_softrt_update(bfqq);
++ }
++ }
++
++ bfq_log_bfqq(bfqd, bfqq,
++ "expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
++ slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
++
++ /*
++ * Increase, decrease or leave budget unchanged according to
++ * reason.
++ */
++ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
++ __bfq_bfqq_expire(bfqd, bfqq);
++}
++
++/*
++ * Budget timeout is not implemented through a dedicated timer, but
++ * just checked on request arrivals and completions, as well as on
++ * idle timer expirations.
++ */
++static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
++{
++ if (bfq_bfqq_budget_new(bfqq) ||
++ time_before(jiffies, bfqq->budget_timeout))
++ return false;
++ return true;
++}
++
++/*
++ * If we expire a queue that is waiting for the arrival of a new
++ * request, we may prevent the fictitious timestamp back-shifting that
++ * allows the guarantees of the queue to be preserved (see [1] for
++ * this tricky aspect). Hence we return true only if this condition
++ * does not hold, or if the queue is slow enough to deserve only to be
++ * kicked off for preserving a high throughput.
++*/
++static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
++{
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "may_budget_timeout: wait_request %d left %d timeout %d",
++ bfq_bfqq_wait_request(bfqq),
++ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3,
++ bfq_bfqq_budget_timeout(bfqq));
++
++ return (!bfq_bfqq_wait_request(bfqq) ||
++ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)
++ &&
++ bfq_bfqq_budget_timeout(bfqq);
++}
++
++/*
++ * For a queue that becomes empty, device idling is allowed only if
++ * this function returns true for that queue. As a consequence, since
++ * device idling plays a critical role for both throughput boosting
++ * and service guarantees, the return value of this function plays a
++ * critical role as well.
++ *
++ * In a nutshell, this function returns true only if idling is
++ * beneficial for throughput or, even if detrimental for throughput,
++ * idling is however necessary to preserve service guarantees (low
++ * latency, desired throughput distribution, ...). In particular, on
++ * NCQ-capable devices, this function tries to return false, so as to
++ * help keep the drives' internal queues full, whenever this helps the
++ * device boost the throughput without causing any service-guarantee
++ * issue.
++ *
++ * In more detail, the return value of this function is obtained by,
++ * first, computing a number of boolean variables that take into
++ * account throughput and service-guarantee issues, and, then,
++ * combining these variables in a logical expression. Most of the
++ * issues taken into account are not trivial. We discuss these issues
++ * while introducing the variables.
++ */
++static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
++{
++ struct bfq_data *bfqd = bfqq->bfqd;
++ bool idling_boosts_thr, idling_boosts_thr_without_issues,
++ all_queues_seeky, on_hdd_and_not_all_queues_seeky,
++ idling_needed_for_service_guarantees,
++ asymmetric_scenario;
++
++ /*
++ * The next variable takes into account the cases where idling
++ * boosts the throughput.
++ *
++ * The value of the variable is computed considering, first, that
++ * idling is virtually always beneficial for the throughput if:
++ * (a) the device is not NCQ-capable, or
++ * (b) regardless of the presence of NCQ, the device is rotational
++ * and the request pattern for bfqq is I/O-bound and sequential.
++ *
++ * Secondly, and in contrast to the above item (b), idling an
++ * NCQ-capable flash-based device would not boost the
++ * throughput even with sequential I/O; rather it would lower
++ * the throughput in proportion to how fast the device
++ * is. Accordingly, the next variable is true if any of the
++ * above conditions (a) and (b) is true, and, in particular,
++ * happens to be false if bfqd is an NCQ-capable flash-based
++ * device.
++ */
++ idling_boosts_thr = !bfqd->hw_tag ||
++ (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
++ bfq_bfqq_idle_window(bfqq)) ;
++
++ /*
++ * The value of the next variable,
++ * idling_boosts_thr_without_issues, is equal to that of
++ * idling_boosts_thr, unless a special case holds. In this
++ * special case, described below, idling may cause problems to
++ * weight-raised queues.
++ *
++ * When the request pool is saturated (e.g., in the presence
++ * of write hogs), if the processes associated with
++ * non-weight-raised queues ask for requests at a lower rate,
++ * then processes associated with weight-raised queues have a
++ * higher probability to get a request from the pool
++ * immediately (or at least soon) when they need one. Thus
++ * they have a higher probability to actually get a fraction
++ * of the device throughput proportional to their high
++ * weight. This is especially true with NCQ-capable drives,
++ * which enqueue several requests in advance, and further
++ * reorder internally-queued requests.
++ *
++ * For this reason, we force to false the value of
++ * idling_boosts_thr_without_issues if there are weight-raised
++ * busy queues. In this case, and if bfqq is not weight-raised,
++ * this guarantees that the device is not idled for bfqq (if,
++ * instead, bfqq is weight-raised, then idling will be
++ * guaranteed by another variable, see below). Combined with
++ * the timestamping rules of BFQ (see [1] for details), this
++ * behavior causes bfqq, and hence any sync non-weight-raised
++ * queue, to get a lower number of requests served, and thus
++ * to ask for a lower number of requests from the request
++ * pool, before the busy weight-raised queues get served
++ * again. This often mitigates starvation problems in the
++ * presence of heavy write workloads and NCQ, thereby
++ * guaranteeing a higher application and system responsiveness
++ * in these hostile scenarios.
++ */
++ idling_boosts_thr_without_issues = idling_boosts_thr &&
++ bfqd->wr_busy_queues == 0;
++
++ /*
++ * There are then two cases where idling must be performed not
++ * for throughput concerns, but to preserve service
++ * guarantees. In the description of these cases, we say, for
++ * short, that a queue is sequential/random if the process
++ * associated to the queue issues sequential/random requests
++ * (in the second case the queue may be tagged as seeky or
++ * even constantly_seeky).
++ *
++ * To introduce the first case, we note that, since
++ * bfq_bfqq_idle_window(bfqq) is false if the device is
++ * NCQ-capable and bfqq is random (see
++ * bfq_update_idle_window()), then, from the above two
++ * assignments it follows that
++ * idling_boosts_thr_without_issues is false if the device is
++ * NCQ-capable and bfqq is random. Therefore, for this case,
++ * device idling would never be allowed if we used just
++ * idling_boosts_thr_without_issues to decide whether to allow
++ * it. And, beneficially, this would imply that throughput
++ * would always be boosted also with random I/O on NCQ-capable
++ * HDDs.
++ *
++ * But we must be careful on this point, to avoid an unfair
++ * treatment for bfqq. In fact, because of the same above
++ * assignments, idling_boosts_thr_without_issues is, on the
++ * other hand, true if 1) the device is an HDD and bfqq is
++ * sequential, and 2) there are no busy weight-raised
++ * queues. As a consequence, if we used just
++ * idling_boosts_thr_without_issues to decide whether to idle
++ * the device, then with an HDD we might easily bump into a
++ * scenario where queues that are sequential and I/O-bound
++ * would enjoy idling, whereas random queues would not. The
++ * latter might then get a low share of the device throughput,
++ * simply because the former would get many requests served
++ * after being set as in service, while the latter would not.
++ *
++ * To address this issue, we start by setting to true a
++ * sentinel variable, on_hdd_and_not_all_queues_seeky, if the
++ * device is rotational and not all queues with pending or
++ * in-flight requests are constantly seeky (i.e., there are
++ * active sequential queues, and bfqq might then be mistreated
++ * if it does not enjoy idling because it is random).
++ */
++ all_queues_seeky = bfq_bfqq_constantly_seeky(bfqq) &&
++ bfqd->busy_in_flight_queues ==
++ bfqd->const_seeky_busy_in_flight_queues;
++
++ on_hdd_and_not_all_queues_seeky =
++ !blk_queue_nonrot(bfqd->queue) && !all_queues_seeky;
++
++ /*
++ * To introduce the second case where idling needs to be
++ * performed to preserve service guarantees, we can note that
++ * allowing the drive to enqueue more than one request at a
++ * time, and hence delegating de facto final scheduling
++ * decisions to the drive's internal scheduler, causes loss of
++ * control on the actual request service order. In particular,
++ * the critical situation is when requests from different
++ * processes happens to be present, at the same time, in the
++ * internal queue(s) of the drive. In such a situation, the
++ * drive, by deciding the service order of the
++ * internally-queued requests, does determine also the actual
++ * throughput distribution among these processes. But the
++ * drive typically has no notion or concern about per-process
++ * throughput distribution, and makes its decisions only on a
++ * per-request basis. Therefore, the service distribution
++ * enforced by the drive's internal scheduler is likely to
++ * coincide with the desired device-throughput distribution
++ * only in a completely symmetric scenario where:
++ * (i) each of these processes must get the same throughput as
++ * the others;
++ * (ii) all these processes have the same I/O pattern
++ (either sequential or random).
++ * In fact, in such a scenario, the drive will tend to treat
++ * the requests of each of these processes in about the same
++ * way as the requests of the others, and thus to provide
++ * each of these processes with about the same throughput
++ * (which is exactly the desired throughput distribution). In
++ * contrast, in any asymmetric scenario, device idling is
++ * certainly needed to guarantee that bfqq receives its
++ * assigned fraction of the device throughput (see [1] for
++ * details).
++ *
++ * We address this issue by controlling, actually, only the
++ * symmetry sub-condition (i), i.e., provided that
++ * sub-condition (i) holds, idling is not performed,
++ * regardless of whether sub-condition (ii) holds. In other
++ * words, only if sub-condition (i) holds, then idling is
++ * allowed, and the device tends to be prevented from queueing
++ * many requests, possibly of several processes. The reason
++ * for not controlling also sub-condition (ii) is that, first,
++ * in the case of an HDD, the asymmetry in terms of types of
++ * I/O patterns is already taken in to account in the above
++ * sentinel variable
++ * on_hdd_and_not_all_queues_seeky. Secondly, in the case of a
++ * flash-based device, we prefer however to privilege
++ * throughput (and idling lowers throughput for this type of
++ * devices), for the following reasons:
++ * 1) differently from HDDs, the service time of random
++ * requests is not orders of magnitudes lower than the service
++ * time of sequential requests; thus, even if processes doing
++ * sequential I/O get a preferential treatment with respect to
++ * others doing random I/O, the consequences are not as
++ * dramatic as with HDDs;
++ * 2) if a process doing random I/O does need strong
++ * throughput guarantees, it is hopefully already being
++ * weight-raised, or the user is likely to have assigned it a
++ * higher weight than the other processes (and thus
++ * sub-condition (i) is likely to be false, which triggers
++ * idling).
++ *
++ * According to the above considerations, the next variable is
++ * true (only) if sub-condition (i) holds. To compute the
++ * value of this variable, we not only use the return value of
++ * the function bfq_symmetric_scenario(), but also check
++ * whether bfqq is being weight-raised, because
++ * bfq_symmetric_scenario() does not take into account also
++ * weight-raised queues (see comments to
++ * bfq_weights_tree_add()).
++ *
++ * As a side note, it is worth considering that the above
++ * device-idling countermeasures may however fail in the
++ * following unlucky scenario: if idling is (correctly)
++ * disabled in a time period during which all symmetry
++ * sub-conditions hold, and hence the device is allowed to
++ * enqueue many requests, but at some later point in time some
++ * sub-condition stops to hold, then it may become impossible
++ * to let requests be served in the desired order until all
++ * the requests already queued in the device have been served.
++ */
++ asymmetric_scenario = bfqq->wr_coeff > 1 ||
++ !bfq_symmetric_scenario(bfqd);
++
++ /*
++ * Finally, there is a case where maximizing throughput is the
++ * best choice even if it may cause unfairness toward
++ * bfqq. Such a case is when bfqq became active in a burst of
++ * queue activations. Queues that became active during a large
++ * burst benefit only from throughput, as discussed in the
++ * comments to bfq_handle_burst. Thus, if bfqq became active
++ * in a burst and not idling the device maximizes throughput,
++ * then the device must no be idled, because not idling the
++ * device provides bfqq and all other queues in the burst with
++ * maximum benefit. Combining this and the two cases above, we
++ * can now establish when idling is actually needed to
++ * preserve service guarantees.
++ */
++ idling_needed_for_service_guarantees =
++ (on_hdd_and_not_all_queues_seeky || asymmetric_scenario) &&
++ !bfq_bfqq_in_large_burst(bfqq);
++
++ /*
++ * We have now all the components we need to compute the return
++ * value of the function, which is true only if both the following
++ * conditions hold:
++ * 1) bfqq is sync, because idling make sense only for sync queues;
++ * 2) idling either boosts the throughput (without issues), or
++ * is necessary to preserve service guarantees.
++ */
++ return bfq_bfqq_sync(bfqq) &&
++ (idling_boosts_thr_without_issues ||
++ idling_needed_for_service_guarantees);
++}
++
++/*
++ * If the in-service queue is empty but the function bfq_bfqq_may_idle
++ * returns true, then:
++ * 1) the queue must remain in service and cannot be expired, and
++ * 2) the device must be idled to wait for the possible arrival of a new
++ * request for the queue.
++ * See the comments to the function bfq_bfqq_may_idle for the reasons
++ * why performing device idling is the best choice to boost the throughput
++ * and preserve service guarantees when bfq_bfqq_may_idle itself
++ * returns true.
++ */
++static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
++{
++ struct bfq_data *bfqd = bfqq->bfqd;
++
++ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&
++ bfq_bfqq_may_idle(bfqq);
++}
++
++/*
++ * Select a queue for service. If we have a current queue in service,
++ * check whether to continue servicing it, or retrieve and set a new one.
++ */
++static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
++{
++ struct bfq_queue *bfqq;
++ struct request *next_rq;
++ enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
++
++ bfqq = bfqd->in_service_queue;
++ if (!bfqq)
++ goto new_queue;
++
++ bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
++
++ if (bfq_may_expire_for_budg_timeout(bfqq) &&
++ !timer_pending(&bfqd->idle_slice_timer) &&
++ !bfq_bfqq_must_idle(bfqq))
++ goto expire;
++
++ next_rq = bfqq->next_rq;
++ /*
++ * If bfqq has requests queued and it has enough budget left to
++ * serve them, keep the queue, otherwise expire it.
++ */
++ if (next_rq) {
++ if (bfq_serv_to_charge(next_rq, bfqq) >
++ bfq_bfqq_budget_left(bfqq)) {
++ reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
++ goto expire;
++ } else {
++ /*
++ * The idle timer may be pending because we may
++ * not disable disk idling even when a new request
++ * arrives.
++ */
++ if (timer_pending(&bfqd->idle_slice_timer)) {
++ /*
++ * If we get here: 1) at least a new request
++ * has arrived but we have not disabled the
++ * timer because the request was too small,
++ * 2) then the block layer has unplugged
++ * the device, causing the dispatch to be
++ * invoked.
++ *
++ * Since the device is unplugged, now the
++ * requests are probably large enough to
++ * provide a reasonable throughput.
++ * So we disable idling.
++ */
++ bfq_clear_bfqq_wait_request(bfqq);
++ del_timer(&bfqd->idle_slice_timer);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_idle_time(bfqq_group(bfqq));
++#endif
++ }
++ goto keep_queue;
++ }
++ }
++
++ /*
++ * No requests pending. However, if the in-service queue is idling
++ * for a new request, or has requests waiting for a completion and
++ * may idle after their completion, then keep it anyway.
++ */
++ if (timer_pending(&bfqd->idle_slice_timer) ||
++ (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) {
++ bfqq = NULL;
++ goto keep_queue;
++ }
++
++ reason = BFQ_BFQQ_NO_MORE_REQUESTS;
++expire:
++ bfq_bfqq_expire(bfqd, bfqq, false, reason);
++new_queue:
++ bfqq = bfq_set_in_service_queue(bfqd);
++ bfq_log(bfqd, "select_queue: new queue %d returned",
++ bfqq ? bfqq->pid : 0);
++keep_queue:
++ return bfqq;
++}
++
++static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++ if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
++ bfq_log_bfqq(bfqd, bfqq,
++ "raising period dur %u/%u msec, old coeff %u, w %d(%d)",
++ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
++ jiffies_to_msecs(bfqq->wr_cur_max_time),
++ bfqq->wr_coeff,
++ bfqq->entity.weight, bfqq->entity.orig_weight);
++
++ BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=
++ entity->orig_weight * bfqq->wr_coeff);
++ if (entity->prio_changed)
++ bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
++
++ /*
++ * If the queue was activated in a burst, or
++ * too much time has elapsed from the beginning
++ * of this weight-raising period, then end weight
++ * raising.
++ */
++ if (bfq_bfqq_in_large_burst(bfqq) ||
++ time_is_before_jiffies(bfqq->last_wr_start_finish +
++ bfqq->wr_cur_max_time)) {
++ bfqq->last_wr_start_finish = jiffies;
++ bfq_log_bfqq(bfqd, bfqq,
++ "wrais ending at %lu, rais_max_time %u",
++ bfqq->last_wr_start_finish,
++ jiffies_to_msecs(bfqq->wr_cur_max_time));
++ bfq_bfqq_end_wr(bfqq);
++ }
++ }
++ /* Update weight both if it must be raised and if it must be lowered */
++ if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
++ __bfq_entity_update_weight_prio(
++ bfq_entity_service_tree(entity),
++ entity);
++}
++
++/*
++ * Dispatch one request from bfqq, moving it to the request queue
++ * dispatch list.
++ */
++static int bfq_dispatch_request(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq)
++{
++ int dispatched = 0;
++ struct request *rq;
++ unsigned long service_to_charge;
++
++ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++
++ /* Follow expired path, else get first next available. */
++ rq = bfq_check_fifo(bfqq);
++ if (!rq)
++ rq = bfqq->next_rq;
++ service_to_charge = bfq_serv_to_charge(rq, bfqq);
++
++ if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {
++ /*
++ * This may happen if the next rq is chosen in fifo order
++ * instead of sector order. The budget is properly
++ * dimensioned to be always sufficient to serve the next
++ * request only if it is chosen in sector order. The reason
++ * is that it would be quite inefficient and little useful
++ * to always make sure that the budget is large enough to
++ * serve even the possible next rq in fifo order.
++ * In fact, requests are seldom served in fifo order.
++ *
++ * Expire the queue for budget exhaustion, and make sure
++ * that the next act_budget is enough to serve the next
++ * request, even if it comes from the fifo expired path.
++ */
++ bfqq->next_rq = rq;
++ /*
++ * Since this dispatch is failed, make sure that
++ * a new one will be performed
++ */
++ if (!bfqd->rq_in_driver)
++ bfq_schedule_dispatch(bfqd);
++ goto expire;
++ }
++
++ /* Finally, insert request into driver dispatch list. */
++ bfq_bfqq_served(bfqq, service_to_charge);
++ bfq_dispatch_insert(bfqd->queue, rq);
++
++ bfq_update_wr_data(bfqd, bfqq);
++
++ bfq_log_bfqq(bfqd, bfqq,
++ "dispatched %u sec req (%llu), budg left %d",
++ blk_rq_sectors(rq),
++ (long long unsigned)blk_rq_pos(rq),
++ bfq_bfqq_budget_left(bfqq));
++
++ dispatched++;
++
++ if (!bfqd->in_service_bic) {
++ atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);
++ bfqd->in_service_bic = RQ_BIC(rq);
++ }
++
++ if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&
++ dispatched >= bfqd->bfq_max_budget_async_rq) ||
++ bfq_class_idle(bfqq)))
++ goto expire;
++
++ return dispatched;
++
++expire:
++ bfq_bfqq_expire(bfqd, bfqq, false, BFQ_BFQQ_BUDGET_EXHAUSTED);
++ return dispatched;
++}
++
++static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
++{
++ int dispatched = 0;
++
++ while (bfqq->next_rq) {
++ bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);
++ dispatched++;
++ }
++
++ BUG_ON(!list_empty(&bfqq->fifo));
++ return dispatched;
++}
++
++/*
++ * Drain our current requests.
++ * Used for barriers and when switching io schedulers on-the-fly.
++ */
++static int bfq_forced_dispatch(struct bfq_data *bfqd)
++{
++ struct bfq_queue *bfqq, *n;
++ struct bfq_service_tree *st;
++ int dispatched = 0;
++
++ bfqq = bfqd->in_service_queue;
++ if (bfqq)
++ __bfq_bfqq_expire(bfqd, bfqq);
++
++ /*
++ * Loop through classes, and be careful to leave the scheduler
++ * in a consistent state, as feedback mechanisms and vtime
++ * updates cannot be disabled during the process.
++ */
++ list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {
++ st = bfq_entity_service_tree(&bfqq->entity);
++
++ dispatched += __bfq_forced_dispatch_bfqq(bfqq);
++ bfqq->max_budget = bfq_max_budget(bfqd);
++
++ bfq_forget_idle(st);
++ }
++
++ BUG_ON(bfqd->busy_queues != 0);
++
++ return dispatched;
++}
++
++static int bfq_dispatch_requests(struct request_queue *q, int force)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++ struct bfq_queue *bfqq;
++ int max_dispatch;
++
++ bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
++ if (bfqd->busy_queues == 0)
++ return 0;
++
++ if (unlikely(force))
++ return bfq_forced_dispatch(bfqd);
++
++ bfqq = bfq_select_queue(bfqd);
++ if (!bfqq)
++ return 0;
++
++ if (bfq_class_idle(bfqq))
++ max_dispatch = 1;
++
++ if (!bfq_bfqq_sync(bfqq))
++ max_dispatch = bfqd->bfq_max_budget_async_rq;
++
++ if (!bfq_bfqq_sync(bfqq) && bfqq->dispatched >= max_dispatch) {
++ if (bfqd->busy_queues > 1)
++ return 0;
++ if (bfqq->dispatched >= 4 * max_dispatch)
++ return 0;
++ }
++
++ if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))
++ return 0;
++
++ bfq_clear_bfqq_wait_request(bfqq);
++ BUG_ON(timer_pending(&bfqd->idle_slice_timer));
++
++ if (!bfq_dispatch_request(bfqd, bfqq))
++ return 0;
++
++ bfq_log_bfqq(bfqd, bfqq, "dispatched %s request",
++ bfq_bfqq_sync(bfqq) ? "sync" : "async");
++
++ return 1;
++}
++
++/*
++ * Task holds one reference to the queue, dropped when task exits. Each rq
++ * in-flight on this queue also holds a reference, dropped when rq is freed.
++ *
++ * Queue lock must be held here.
++ */
++static void bfq_put_queue(struct bfq_queue *bfqq)
++{
++ struct bfq_data *bfqd = bfqq->bfqd;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ struct bfq_group *bfqg = bfqq_group(bfqq);
++#endif
++
++ BUG_ON(atomic_read(&bfqq->ref) <= 0);
++
++ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
++ atomic_read(&bfqq->ref));
++ if (!atomic_dec_and_test(&bfqq->ref))
++ return;
++
++ BUG_ON(rb_first(&bfqq->sort_list));
++ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
++ BUG_ON(bfqq->entity.tree);
++ BUG_ON(bfq_bfqq_busy(bfqq));
++ BUG_ON(bfqd->in_service_queue == bfqq);
++
++ if (bfq_bfqq_sync(bfqq))
++ /*
++ * The fact that this queue is being destroyed does not
++ * invalidate the fact that this queue may have been
++ * activated during the current burst. As a consequence,
++ * although the queue does not exist anymore, and hence
++ * needs to be removed from the burst list if there,
++ * the burst size has not to be decremented.
++ */
++ hlist_del_init(&bfqq->burst_list_node);
++
++ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);
++
++ kmem_cache_free(bfq_pool, bfqq);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_put(bfqg);
++#endif
++}
++
++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++ if (bfqq == bfqd->in_service_queue) {
++ __bfq_bfqq_expire(bfqd, bfqq);
++ bfq_schedule_dispatch(bfqd);
++ }
++
++ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
++ atomic_read(&bfqq->ref));
++
++ bfq_put_queue(bfqq);
++}
++
++static void bfq_init_icq(struct io_cq *icq)
++{
++ struct bfq_io_cq *bic = icq_to_bic(icq);
++
++ bic->ttime.last_end_request = jiffies;
++}
++
++static void bfq_exit_icq(struct io_cq *icq)
++{
++ struct bfq_io_cq *bic = icq_to_bic(icq);
++ struct bfq_data *bfqd = bic_to_bfqd(bic);
++
++ if (bic->bfqq[BLK_RW_ASYNC]) {
++ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);
++ bic->bfqq[BLK_RW_ASYNC] = NULL;
++ }
++
++ if (bic->bfqq[BLK_RW_SYNC]) {
++ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
++ bic->bfqq[BLK_RW_SYNC] = NULL;
++ }
++}
++
++/*
++ * Update the entity prio values; note that the new values will not
++ * be used until the next (re)activation.
++ */
++static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
++{
++ struct task_struct *tsk = current;
++ int ioprio_class;
++
++ ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
++ switch (ioprio_class) {
++ default:
++ dev_err(bfqq->bfqd->queue->backing_dev_info.dev,
++ "bfq: bad prio class %d\n", ioprio_class);
++ case IOPRIO_CLASS_NONE:
++ /*
++ * No prio set, inherit CPU scheduling settings.
++ */
++ bfqq->new_ioprio = task_nice_ioprio(tsk);
++ bfqq->new_ioprio_class = task_nice_ioclass(tsk);
++ break;
++ case IOPRIO_CLASS_RT:
++ bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
++ bfqq->new_ioprio_class = IOPRIO_CLASS_RT;
++ break;
++ case IOPRIO_CLASS_BE:
++ bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
++ bfqq->new_ioprio_class = IOPRIO_CLASS_BE;
++ break;
++ case IOPRIO_CLASS_IDLE:
++ bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
++ bfqq->new_ioprio = 7;
++ bfq_clear_bfqq_idle_window(bfqq);
++ break;
++ }
++
++ if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) {
++ printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
++ bfqq->new_ioprio);
++ BUG();
++ }
++
++ bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
++ bfqq->entity.prio_changed = 1;
++}
++
++static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
++{
++ struct bfq_data *bfqd;
++ struct bfq_queue *bfqq, *new_bfqq;
++ unsigned long uninitialized_var(flags);
++ int ioprio = bic->icq.ioc->ioprio;
++
++ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
++ &flags);
++ /*
++ * This condition may trigger on a newly created bic, be sure to
++ * drop the lock before returning.
++ */
++ if (unlikely(!bfqd) || likely(bic->ioprio == ioprio))
++ goto out;
++
++ bic->ioprio = ioprio;
++
++ bfqq = bic->bfqq[BLK_RW_ASYNC];
++ if (bfqq) {
++ new_bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic,
++ GFP_ATOMIC);
++ if (new_bfqq) {
++ bic->bfqq[BLK_RW_ASYNC] = new_bfqq;
++ bfq_log_bfqq(bfqd, bfqq,
++ "check_ioprio_change: bfqq %p %d",
++ bfqq, atomic_read(&bfqq->ref));
++ bfq_put_queue(bfqq);
++ }
++ }
++
++ bfqq = bic->bfqq[BLK_RW_SYNC];
++ if (bfqq)
++ bfq_set_next_ioprio_data(bfqq, bic);
++
++out:
++ bfq_put_bfqd_unlock(bfqd, &flags);
++}
++
++static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ struct bfq_io_cq *bic, pid_t pid, int is_sync)
++{
++ RB_CLEAR_NODE(&bfqq->entity.rb_node);
++ INIT_LIST_HEAD(&bfqq->fifo);
++ INIT_HLIST_NODE(&bfqq->burst_list_node);
++
++ atomic_set(&bfqq->ref, 0);
++ bfqq->bfqd = bfqd;
++
++ if (bic)
++ bfq_set_next_ioprio_data(bfqq, bic);
++
++ if (is_sync) {
++ if (!bfq_class_idle(bfqq))
++ bfq_mark_bfqq_idle_window(bfqq);
++ bfq_mark_bfqq_sync(bfqq);
++ } else
++ bfq_clear_bfqq_sync(bfqq);
++ bfq_mark_bfqq_IO_bound(bfqq);
++
++ /* Tentative initial value to trade off between thr and lat */
++ bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
++ bfqq->pid = pid;
++
++ bfqq->wr_coeff = 1;
++ bfqq->last_wr_start_finish = 0;
++ /*
++ * Set to the value for which bfqq will not be deemed as
++ * soft rt when it becomes backlogged.
++ */
++ bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies);
++}
++
++static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
++ struct bio *bio, int is_sync,
++ struct bfq_io_cq *bic,
++ gfp_t gfp_mask)
++{
++ struct bfq_group *bfqg;
++ struct bfq_queue *bfqq, *new_bfqq = NULL;
++ struct blkcg *blkcg;
++
++retry:
++ rcu_read_lock();
++
++ blkcg = bio_blkcg(bio);
++ bfqg = bfq_find_alloc_group(bfqd, blkcg);
++ /* bic always exists here */
++ bfqq = bic_to_bfqq(bic, is_sync);
++
++ /*
++ * Always try a new alloc if we fall back to the OOM bfqq
++ * originally, since it should just be a temporary situation.
++ */
++ if (!bfqq || bfqq == &bfqd->oom_bfqq) {
++ bfqq = NULL;
++ if (new_bfqq) {
++ bfqq = new_bfqq;
++ new_bfqq = NULL;
++ } else if (gfpflags_allow_blocking(gfp_mask)) {
++ rcu_read_unlock();
++ spin_unlock_irq(bfqd->queue->queue_lock);
++ new_bfqq = kmem_cache_alloc_node(bfq_pool,
++ gfp_mask | __GFP_ZERO,
++ bfqd->queue->node);
++ spin_lock_irq(bfqd->queue->queue_lock);
++ if (new_bfqq)
++ goto retry;
++ } else {
++ bfqq = kmem_cache_alloc_node(bfq_pool,
++ gfp_mask | __GFP_ZERO,
++ bfqd->queue->node);
++ }
++
++ if (bfqq) {
++ bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
++ is_sync);
++ bfq_init_entity(&bfqq->entity, bfqg);
++ bfq_log_bfqq(bfqd, bfqq, "allocated");
++ } else {
++ bfqq = &bfqd->oom_bfqq;
++ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
++ }
++ }
++
++ if (new_bfqq)
++ kmem_cache_free(bfq_pool, new_bfqq);
++
++ rcu_read_unlock();
++
++ return bfqq;
++}
++
++static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
++ struct bfq_group *bfqg,
++ int ioprio_class, int ioprio)
++{
++ switch (ioprio_class) {
++ case IOPRIO_CLASS_RT:
++ return &bfqg->async_bfqq[0][ioprio];
++ case IOPRIO_CLASS_NONE:
++ ioprio = IOPRIO_NORM;
++ /* fall through */
++ case IOPRIO_CLASS_BE:
++ return &bfqg->async_bfqq[1][ioprio];
++ case IOPRIO_CLASS_IDLE:
++ return &bfqg->async_idle_bfqq;
++ default:
++ BUG();
++ }
++}
++
++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
++ struct bio *bio, int is_sync,
++ struct bfq_io_cq *bic, gfp_t gfp_mask)
++{
++ const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
++ const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
++ struct bfq_queue **async_bfqq = NULL;
++ struct bfq_queue *bfqq = NULL;
++
++ if (!is_sync) {
++ struct blkcg *blkcg;
++ struct bfq_group *bfqg;
++
++ rcu_read_lock();
++ blkcg = bio_blkcg(bio);
++ rcu_read_unlock();
++ bfqg = bfq_find_alloc_group(bfqd, blkcg);
++ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
++ ioprio);
++ bfqq = *async_bfqq;
++ }
++
++ if (!bfqq)
++ bfqq = bfq_find_alloc_queue(bfqd, bio, is_sync, bic, gfp_mask);
++
++ /*
++ * Pin the queue now that it's allocated, scheduler exit will
++ * prune it.
++ */
++ if (!is_sync && !(*async_bfqq)) {
++ atomic_inc(&bfqq->ref);
++ bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
++ bfqq, atomic_read(&bfqq->ref));
++ *async_bfqq = bfqq;
++ }
++
++ atomic_inc(&bfqq->ref);
++ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,
++ atomic_read(&bfqq->ref));
++ return bfqq;
++}
++
++static void bfq_update_io_thinktime(struct bfq_data *bfqd,
++ struct bfq_io_cq *bic)
++{
++ unsigned long elapsed = jiffies - bic->ttime.last_end_request;
++ unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);
++
++ bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;
++ bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8;
++ bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) /
++ bic->ttime.ttime_samples;
++}
++
++static void bfq_update_io_seektime(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ struct request *rq)
++{
++ sector_t sdist;
++ u64 total;
++
++ if (bfqq->last_request_pos < blk_rq_pos(rq))
++ sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
++ else
++ sdist = bfqq->last_request_pos - blk_rq_pos(rq);
++
++ /*
++ * Don't allow the seek distance to get too large from the
++ * odd fragment, pagein, etc.
++ */
++ if (bfqq->seek_samples == 0) /* first request, not really a seek */
++ sdist = 0;
++ else if (bfqq->seek_samples <= 60) /* second & third seek */
++ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);
++ else
++ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);
++
++ bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;
++ bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;
++ total = bfqq->seek_total + (bfqq->seek_samples/2);
++ do_div(total, bfqq->seek_samples);
++ bfqq->seek_mean = (sector_t)total;
++
++ bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,
++ (u64)bfqq->seek_mean);
++}
++
++/*
++ * Disable idle window if the process thinks too long or seeks so much that
++ * it doesn't matter.
++ */
++static void bfq_update_idle_window(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ struct bfq_io_cq *bic)
++{
++ int enable_idle;
++
++ /* Don't idle for async or idle io prio class. */
++ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
++ return;
++
++ enable_idle = bfq_bfqq_idle_window(bfqq);
++
++ if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
++ bfqd->bfq_slice_idle == 0 ||
++ (bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
++ bfqq->wr_coeff == 1))
++ enable_idle = 0;
++ else if (bfq_sample_valid(bic->ttime.ttime_samples)) {
++ if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle &&
++ bfqq->wr_coeff == 1)
++ enable_idle = 0;
++ else
++ enable_idle = 1;
++ }
++ bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
++ enable_idle);
++
++ if (enable_idle)
++ bfq_mark_bfqq_idle_window(bfqq);
++ else
++ bfq_clear_bfqq_idle_window(bfqq);
++}
++
++/*
++ * Called when a new fs request (rq) is added to bfqq. Check if there's
++ * something we should do about it.
++ */
++static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ struct request *rq)
++{
++ struct bfq_io_cq *bic = RQ_BIC(rq);
++
++ if (rq->cmd_flags & REQ_META)
++ bfqq->meta_pending++;
++
++ bfq_update_io_thinktime(bfqd, bic);
++ bfq_update_io_seektime(bfqd, bfqq, rq);
++ if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) {
++ bfq_clear_bfqq_constantly_seeky(bfqq);
++ if (!blk_queue_nonrot(bfqd->queue)) {
++ BUG_ON(!bfqd->const_seeky_busy_in_flight_queues);
++ bfqd->const_seeky_busy_in_flight_queues--;
++ }
++ }
++ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
++ !BFQQ_SEEKY(bfqq))
++ bfq_update_idle_window(bfqd, bfqq, bic);
++
++ bfq_log_bfqq(bfqd, bfqq,
++ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
++ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),
++ (long long unsigned)bfqq->seek_mean);
++
++ bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
++
++ if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {
++ bool small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
++ blk_rq_sectors(rq) < 32;
++ bool budget_timeout = bfq_bfqq_budget_timeout(bfqq);
++
++ /*
++ * There is just this request queued: if the request
++ * is small and the queue is not to be expired, then
++ * just exit.
++ *
++ * In this way, if the disk is being idled to wait for
++ * a new request from the in-service queue, we avoid
++ * unplugging the device and committing the disk to serve
++ * just a small request. On the contrary, we wait for
++ * the block layer to decide when to unplug the device:
++ * hopefully, new requests will be merged to this one
++ * quickly, then the device will be unplugged and
++ * larger requests will be dispatched.
++ */
++ if (small_req && !budget_timeout)
++ return;
++
++ /*
++ * A large enough request arrived, or the queue is to
++ * be expired: in both cases disk idling is to be
++ * stopped, so clear wait_request flag and reset
++ * timer.
++ */
++ bfq_clear_bfqq_wait_request(bfqq);
++ del_timer(&bfqd->idle_slice_timer);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_idle_time(bfqq_group(bfqq));
++#endif
++
++ /*
++ * The queue is not empty, because a new request just
++ * arrived. Hence we can safely expire the queue, in
++ * case of budget timeout, without risking that the
++ * timestamps of the queue are not updated correctly.
++ * See [1] for more details.
++ */
++ if (budget_timeout)
++ bfq_bfqq_expire(bfqd, bfqq, false,
++ BFQ_BFQQ_BUDGET_TIMEOUT);
++
++ /*
++ * Let the request rip immediately, or let a new queue be
++ * selected if bfqq has just been expired.
++ */
++ __blk_run_queue(bfqd->queue);
++ }
++}
++
++static void bfq_insert_request(struct request_queue *q, struct request *rq)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++ struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++ assert_spin_locked(bfqd->queue->queue_lock);
++
++ bfq_add_request(rq);
++
++ rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
++ list_add_tail(&rq->queuelist, &bfqq->fifo);
++
++ bfq_rq_enqueued(bfqd, bfqq, rq);
++}
++
++static void bfq_update_hw_tag(struct bfq_data *bfqd)
++{
++ bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,
++ bfqd->rq_in_driver);
++
++ if (bfqd->hw_tag == 1)
++ return;
++
++ /*
++ * This sample is valid if the number of outstanding requests
++ * is large enough to allow a queueing behavior. Note that the
++ * sum is not exact, as it's not taking into account deactivated
++ * requests.
++ */
++ if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
++ return;
++
++ if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
++ return;
++
++ bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
++ bfqd->max_rq_in_driver = 0;
++ bfqd->hw_tag_samples = 0;
++}
++
++static void bfq_completed_request(struct request_queue *q, struct request *rq)
++{
++ struct bfq_queue *bfqq = RQ_BFQQ(rq);
++ struct bfq_data *bfqd = bfqq->bfqd;
++ bool sync = bfq_bfqq_sync(bfqq);
++
++ bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)",
++ blk_rq_sectors(rq), sync);
++
++ bfq_update_hw_tag(bfqd);
++
++ BUG_ON(!bfqd->rq_in_driver);
++ BUG_ON(!bfqq->dispatched);
++ bfqd->rq_in_driver--;
++ bfqq->dispatched--;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_completion(bfqq_group(bfqq),
++ rq_start_time_ns(rq),
++ rq_io_start_time_ns(rq), rq->cmd_flags);
++#endif
++
++ if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
++ bfq_weights_tree_remove(bfqd, &bfqq->entity,
++ &bfqd->queue_weights_tree);
++ if (!blk_queue_nonrot(bfqd->queue)) {
++ BUG_ON(!bfqd->busy_in_flight_queues);
++ bfqd->busy_in_flight_queues--;
++ if (bfq_bfqq_constantly_seeky(bfqq)) {
++ BUG_ON(!bfqd->
++ const_seeky_busy_in_flight_queues);
++ bfqd->const_seeky_busy_in_flight_queues--;
++ }
++ }
++ }
++
++ if (sync) {
++ bfqd->sync_flight--;
++ RQ_BIC(rq)->ttime.last_end_request = jiffies;
++ }
++
++ /*
++ * If we are waiting to discover whether the request pattern of the
++ * task associated with the queue is actually isochronous, and
++ * both requisites for this condition to hold are satisfied, then
++ * compute soft_rt_next_start (see the comments to the function
++ * bfq_bfqq_softrt_next_start()).
++ */
++ if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
++ RB_EMPTY_ROOT(&bfqq->sort_list))
++ bfqq->soft_rt_next_start =
++ bfq_bfqq_softrt_next_start(bfqd, bfqq);
++
++ /*
++ * If this is the in-service queue, check if it needs to be expired,
++ * or if we want to idle in case it has no pending requests.
++ */
++ if (bfqd->in_service_queue == bfqq) {
++ if (bfq_bfqq_budget_new(bfqq))
++ bfq_set_budget_timeout(bfqd);
++
++ if (bfq_bfqq_must_idle(bfqq)) {
++ bfq_arm_slice_timer(bfqd);
++ goto out;
++ } else if (bfq_may_expire_for_budg_timeout(bfqq))
++ bfq_bfqq_expire(bfqd, bfqq, false,
++ BFQ_BFQQ_BUDGET_TIMEOUT);
++ else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
++ (bfqq->dispatched == 0 ||
++ !bfq_bfqq_may_idle(bfqq)))
++ bfq_bfqq_expire(bfqd, bfqq, false,
++ BFQ_BFQQ_NO_MORE_REQUESTS);
++ }
++
++ if (!bfqd->rq_in_driver)
++ bfq_schedule_dispatch(bfqd);
++
++out:
++ return;
++}
++
++static int __bfq_may_queue(struct bfq_queue *bfqq)
++{
++ if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {
++ bfq_clear_bfqq_must_alloc(bfqq);
++ return ELV_MQUEUE_MUST;
++ }
++
++ return ELV_MQUEUE_MAY;
++}
++
++static int bfq_may_queue(struct request_queue *q, int rw)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++ struct task_struct *tsk = current;
++ struct bfq_io_cq *bic;
++ struct bfq_queue *bfqq;
++
++ /*
++ * Don't force setup of a queue from here, as a call to may_queue
++ * does not necessarily imply that a request actually will be
++ * queued. So just lookup a possibly existing queue, or return
++ * 'may queue' if that fails.
++ */
++ bic = bfq_bic_lookup(bfqd, tsk->io_context);
++ if (!bic)
++ return ELV_MQUEUE_MAY;
++
++ bfqq = bic_to_bfqq(bic, rw_is_sync(rw));
++ if (bfqq)
++ return __bfq_may_queue(bfqq);
++
++ return ELV_MQUEUE_MAY;
++}
++
++/*
++ * Queue lock held here.
++ */
++static void bfq_put_request(struct request *rq)
++{
++ struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++ if (bfqq) {
++ const int rw = rq_data_dir(rq);
++
++ BUG_ON(!bfqq->allocated[rw]);
++ bfqq->allocated[rw]--;
++
++ rq->elv.priv[0] = NULL;
++ rq->elv.priv[1] = NULL;
++
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
++ bfqq, atomic_read(&bfqq->ref));
++ bfq_put_queue(bfqq);
++ }
++}
++
++/*
++ * Allocate bfq data structures associated with this request.
++ */
++static int bfq_set_request(struct request_queue *q, struct request *rq,
++ struct bio *bio, gfp_t gfp_mask)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++ struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
++ const int rw = rq_data_dir(rq);
++ const int is_sync = rq_is_sync(rq);
++ struct bfq_queue *bfqq;
++ unsigned long flags;
++
++ might_sleep_if(gfpflags_allow_blocking(gfp_mask));
++
++ bfq_check_ioprio_change(bic, bio);
++
++ spin_lock_irqsave(q->queue_lock, flags);
++
++ if (!bic)
++ goto queue_fail;
++
++ bfq_bic_update_cgroup(bic, bio);
++
++ bfqq = bic_to_bfqq(bic, is_sync);
++ if (!bfqq || bfqq == &bfqd->oom_bfqq) {
++ bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask);
++ bic_set_bfqq(bic, bfqq, is_sync);
++ if (is_sync) {
++ if (bfqd->large_burst)
++ bfq_mark_bfqq_in_large_burst(bfqq);
++ else
++ bfq_clear_bfqq_in_large_burst(bfqq);
++ }
++ }
++
++ bfqq->allocated[rw]++;
++ atomic_inc(&bfqq->ref);
++ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,
++ atomic_read(&bfqq->ref));
++
++ rq->elv.priv[0] = bic;
++ rq->elv.priv[1] = bfqq;
++
++ spin_unlock_irqrestore(q->queue_lock, flags);
++
++ return 0;
++
++queue_fail:
++ bfq_schedule_dispatch(bfqd);
++ spin_unlock_irqrestore(q->queue_lock, flags);
++
++ return 1;
++}
++
++static void bfq_kick_queue(struct work_struct *work)
++{
++ struct bfq_data *bfqd =
++ container_of(work, struct bfq_data, unplug_work);
++ struct request_queue *q = bfqd->queue;
++
++ spin_lock_irq(q->queue_lock);
++ __blk_run_queue(q);
++ spin_unlock_irq(q->queue_lock);
++}
++
++/*
++ * Handler of the expiration of the timer running if the in-service queue
++ * is idling inside its time slice.
++ */
++static void bfq_idle_slice_timer(unsigned long data)
++{
++ struct bfq_data *bfqd = (struct bfq_data *)data;
++ struct bfq_queue *bfqq;
++ unsigned long flags;
++ enum bfqq_expiration reason;
++
++ spin_lock_irqsave(bfqd->queue->queue_lock, flags);
++
++ bfqq = bfqd->in_service_queue;
++ /*
++ * Theoretical race here: the in-service queue can be NULL or
++ * different from the queue that was idling if the timer handler
++ * spins on the queue_lock and a new request arrives for the
++ * current queue and there is a full dispatch cycle that changes
++ * the in-service queue. This can hardly happen, but in the worst
++ * case we just expire a queue too early.
++ */
++ if (bfqq) {
++ bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
++ if (bfq_bfqq_budget_timeout(bfqq))
++ /*
++ * Also here the queue can be safely expired
++ * for budget timeout without wasting
++ * guarantees
++ */
++ reason = BFQ_BFQQ_BUDGET_TIMEOUT;
++ else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
++ /*
++ * The queue may not be empty upon timer expiration,
++ * because we may not disable the timer when the
++ * first request of the in-service queue arrives
++ * during disk idling.
++ */
++ reason = BFQ_BFQQ_TOO_IDLE;
++ else
++ goto schedule_dispatch;
++
++ bfq_bfqq_expire(bfqd, bfqq, true, reason);
++ }
++
++schedule_dispatch:
++ bfq_schedule_dispatch(bfqd);
++
++ spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
++}
++
++static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
++{
++ del_timer_sync(&bfqd->idle_slice_timer);
++ cancel_work_sync(&bfqd->unplug_work);
++}
++
++static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
++ struct bfq_queue **bfqq_ptr)
++{
++ struct bfq_group *root_group = bfqd->root_group;
++ struct bfq_queue *bfqq = *bfqq_ptr;
++
++ bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
++ if (bfqq) {
++ bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
++ bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
++ bfqq, atomic_read(&bfqq->ref));
++ bfq_put_queue(bfqq);
++ *bfqq_ptr = NULL;
++ }
++}
++
++/*
++ * Release all the bfqg references to its async queues. If we are
++ * deallocating the group these queues may still contain requests, so
++ * we reparent them to the root cgroup (i.e., the only one that will
++ * exist for sure until all the requests on a device are gone).
++ */
++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
++{
++ int i, j;
++
++ for (i = 0; i < 2; i++)
++ for (j = 0; j < IOPRIO_BE_NR; j++)
++ __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
++
++ __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
++}
++
++static void bfq_exit_queue(struct elevator_queue *e)
++{
++ struct bfq_data *bfqd = e->elevator_data;
++ struct request_queue *q = bfqd->queue;
++ struct bfq_queue *bfqq, *n;
++
++ bfq_shutdown_timer_wq(bfqd);
++
++ spin_lock_irq(q->queue_lock);
++
++ BUG_ON(bfqd->in_service_queue);
++ list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
++ bfq_deactivate_bfqq(bfqd, bfqq, 0);
++
++ spin_unlock_irq(q->queue_lock);
++
++ bfq_shutdown_timer_wq(bfqd);
++
++ synchronize_rcu();
++
++ BUG_ON(timer_pending(&bfqd->idle_slice_timer));
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ blkcg_deactivate_policy(q, &blkcg_policy_bfq);
++#else
++ kfree(bfqd->root_group);
++#endif
++
++ kfree(bfqd);
++}
++
++static void bfq_init_root_group(struct bfq_group *root_group,
++ struct bfq_data *bfqd)
++{
++ int i;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ root_group->entity.parent = NULL;
++ root_group->my_entity = NULL;
++ root_group->bfqd = bfqd;
++#endif
++ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
++ root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++}
++
++static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
++{
++ struct bfq_data *bfqd;
++ struct elevator_queue *eq;
++
++ eq = elevator_alloc(q, e);
++ if (!eq)
++ return -ENOMEM;
++
++ bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
++ if (!bfqd) {
++ kobject_put(&eq->kobj);
++ return -ENOMEM;
++ }
++ eq->elevator_data = bfqd;
++
++ /*
++ * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
++ * Grab a permanent reference to it, so that the normal code flow
++ * will not attempt to free it.
++ */
++ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
++ atomic_inc(&bfqd->oom_bfqq.ref);
++ bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
++ bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
++ bfqd->oom_bfqq.entity.new_weight =
++ bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
++ /*
++ * Trigger weight initialization, according to ioprio, at the
++ * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
++ * class won't be changed any more.
++ */
++ bfqd->oom_bfqq.entity.prio_changed = 1;
++
++ bfqd->queue = q;
++
++ spin_lock_irq(q->queue_lock);
++ q->elevator = eq;
++ spin_unlock_irq(q->queue_lock);
++
++ bfqd->root_group = bfq_create_group_hierarchy(bfqd, q->node);
++ if (!bfqd->root_group)
++ goto out_free;
++ bfq_init_root_group(bfqd->root_group, bfqd);
++ bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqd->active_numerous_groups = 0;
++#endif
++
++ init_timer(&bfqd->idle_slice_timer);
++ bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
++ bfqd->idle_slice_timer.data = (unsigned long)bfqd;
++
++ bfqd->queue_weights_tree = RB_ROOT;
++ bfqd->group_weights_tree = RB_ROOT;
++
++ INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);
++
++ INIT_LIST_HEAD(&bfqd->active_list);
++ INIT_LIST_HEAD(&bfqd->idle_list);
++ INIT_HLIST_HEAD(&bfqd->burst_list);
++
++ bfqd->hw_tag = -1;
++
++ bfqd->bfq_max_budget = bfq_default_max_budget;
++
++ bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
++ bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
++ bfqd->bfq_back_max = bfq_back_max;
++ bfqd->bfq_back_penalty = bfq_back_penalty;
++ bfqd->bfq_slice_idle = bfq_slice_idle;
++ bfqd->bfq_class_idle_last_service = 0;
++ bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
++ bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
++ bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
++
++ bfqd->bfq_requests_within_timer = 120;
++
++ bfqd->bfq_large_burst_thresh = 11;
++ bfqd->bfq_burst_interval = msecs_to_jiffies(500);
++
++ bfqd->low_latency = true;
++
++ bfqd->bfq_wr_coeff = 20;
++ bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
++ bfqd->bfq_wr_max_time = 0;
++ bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
++ bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500);
++ bfqd->bfq_wr_max_softrt_rate = 7000; /*
++ * Approximate rate required
++ * to playback or record a
++ * high-definition compressed
++ * video.
++ */
++ bfqd->wr_busy_queues = 0;
++ bfqd->busy_in_flight_queues = 0;
++ bfqd->const_seeky_busy_in_flight_queues = 0;
++
++ /*
++ * Begin by assuming, optimistically, that the device peak rate is
++ * equal to the highest reference rate.
++ */
++ bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
++ T_fast[blk_queue_nonrot(bfqd->queue)];
++ bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)];
++ bfqd->device_speed = BFQ_BFQD_FAST;
++
++ return 0;
++
++out_free:
++ kfree(bfqd);
++ kobject_put(&eq->kobj);
++ return -ENOMEM;
++}
++
++static void bfq_slab_kill(void)
++{
++ if (bfq_pool)
++ kmem_cache_destroy(bfq_pool);
++}
++
++static int __init bfq_slab_setup(void)
++{
++ bfq_pool = KMEM_CACHE(bfq_queue, 0);
++ if (!bfq_pool)
++ return -ENOMEM;
++ return 0;
++}
++
++static ssize_t bfq_var_show(unsigned int var, char *page)
++{
++ return sprintf(page, "%d\n", var);
++}
++
++static ssize_t bfq_var_store(unsigned long *var, const char *page,
++ size_t count)
++{
++ unsigned long new_val;
++ int ret = kstrtoul(page, 10, &new_val);
++
++ if (ret == 0)
++ *var = new_val;
++
++ return count;
++}
++
++static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page)
++{
++ struct bfq_data *bfqd = e->elevator_data;
++ return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ?
++ jiffies_to_msecs(bfqd->bfq_wr_max_time) :
++ jiffies_to_msecs(bfq_wr_duration(bfqd)));
++}
++
++static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
++{
++ struct bfq_queue *bfqq;
++ struct bfq_data *bfqd = e->elevator_data;
++ ssize_t num_char = 0;
++
++ num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n",
++ bfqd->queued);
++
++ spin_lock_irq(bfqd->queue->queue_lock);
++
++ num_char += sprintf(page + num_char, "Active:\n");
++ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
++ num_char += sprintf(page + num_char,
++ "pid%d: weight %hu, nr_queued %d %d, dur %d/%u\n",
++ bfqq->pid,
++ bfqq->entity.weight,
++ bfqq->queued[0],
++ bfqq->queued[1],
++ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
++ jiffies_to_msecs(bfqq->wr_cur_max_time));
++ }
++
++ num_char += sprintf(page + num_char, "Idle:\n");
++ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
++ num_char += sprintf(page + num_char,
++ "pid%d: weight %hu, dur %d/%u\n",
++ bfqq->pid,
++ bfqq->entity.weight,
++ jiffies_to_msecs(jiffies -
++ bfqq->last_wr_start_finish),
++ jiffies_to_msecs(bfqq->wr_cur_max_time));
++ }
++
++ spin_unlock_irq(bfqd->queue->queue_lock);
++
++ return num_char;
++}
++
++#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
++static ssize_t __FUNC(struct elevator_queue *e, char *page) \
++{ \
++ struct bfq_data *bfqd = e->elevator_data; \
++ unsigned int __data = __VAR; \
++ if (__CONV) \
++ __data = jiffies_to_msecs(__data); \
++ return bfq_var_show(__data, (page)); \
++}
++SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);
++SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);
++SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
++SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
++SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
++SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
++SHOW_FUNCTION(bfq_max_budget_async_rq_show,
++ bfqd->bfq_max_budget_async_rq, 0);
++SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
++SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
++SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
++SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
++SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
++SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1);
++SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async,
++ 1);
++SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0);
++#undef SHOW_FUNCTION
++
++#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
++static ssize_t \
++__FUNC(struct elevator_queue *e, const char *page, size_t count) \
++{ \
++ struct bfq_data *bfqd = e->elevator_data; \
++ unsigned long uninitialized_var(__data); \
++ int ret = bfq_var_store(&__data, (page), count); \
++ if (__data < (MIN)) \
++ __data = (MIN); \
++ else if (__data > (MAX)) \
++ __data = (MAX); \
++ if (__CONV) \
++ *(__PTR) = msecs_to_jiffies(__data); \
++ else \
++ *(__PTR) = __data; \
++ return ret; \
++}
++STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
++ INT_MAX, 1);
++STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
++ INT_MAX, 1);
++STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
++STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
++ INT_MAX, 0);
++STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,
++ 1, INT_MAX, 0);
++STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,
++ INT_MAX, 1);
++STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
++STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
++ 1);
++STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0,
++ INT_MAX, 1);
++STORE_FUNCTION(bfq_wr_min_inter_arr_async_store,
++ &bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
++ INT_MAX, 0);
++#undef STORE_FUNCTION
++
++/* do nothing for the moment */
++static ssize_t bfq_weights_store(struct elevator_queue *e,
++ const char *page, size_t count)
++{
++ return count;
++}
++
++static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
++{
++ u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++
++ if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)
++ return bfq_calc_max_budget(bfqd->peak_rate, timeout);
++ else
++ return bfq_default_max_budget;
++}
++
++static ssize_t bfq_max_budget_store(struct elevator_queue *e,
++ const char *page, size_t count)
++{
++ struct bfq_data *bfqd = e->elevator_data;
++ unsigned long uninitialized_var(__data);
++ int ret = bfq_var_store(&__data, (page), count);
++
++ if (__data == 0)
++ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
++ else {
++ if (__data > INT_MAX)
++ __data = INT_MAX;
++ bfqd->bfq_max_budget = __data;
++ }
++
++ bfqd->bfq_user_max_budget = __data;
++
++ return ret;
++}
++
++static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
++ const char *page, size_t count)
++{
++ struct bfq_data *bfqd = e->elevator_data;
++ unsigned long uninitialized_var(__data);
++ int ret = bfq_var_store(&__data, (page), count);
++
++ if (__data < 1)
++ __data = 1;
++ else if (__data > INT_MAX)
++ __data = INT_MAX;
++
++ bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);
++ if (bfqd->bfq_user_max_budget == 0)
++ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
++
++ return ret;
++}
++
++static ssize_t bfq_low_latency_store(struct elevator_queue *e,
++ const char *page, size_t count)
++{
++ struct bfq_data *bfqd = e->elevator_data;
++ unsigned long uninitialized_var(__data);
++ int ret = bfq_var_store(&__data, (page), count);
++
++ if (__data > 1)
++ __data = 1;
++ if (__data == 0 && bfqd->low_latency != 0)
++ bfq_end_wr(bfqd);
++ bfqd->low_latency = __data;
++
++ return ret;
++}
++
++#define BFQ_ATTR(name) \
++ __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)
++
++static struct elv_fs_entry bfq_attrs[] = {
++ BFQ_ATTR(fifo_expire_sync),
++ BFQ_ATTR(fifo_expire_async),
++ BFQ_ATTR(back_seek_max),
++ BFQ_ATTR(back_seek_penalty),
++ BFQ_ATTR(slice_idle),
++ BFQ_ATTR(max_budget),
++ BFQ_ATTR(max_budget_async_rq),
++ BFQ_ATTR(timeout_sync),
++ BFQ_ATTR(timeout_async),
++ BFQ_ATTR(low_latency),
++ BFQ_ATTR(wr_coeff),
++ BFQ_ATTR(wr_max_time),
++ BFQ_ATTR(wr_rt_max_time),
++ BFQ_ATTR(wr_min_idle_time),
++ BFQ_ATTR(wr_min_inter_arr_async),
++ BFQ_ATTR(wr_max_softrt_rate),
++ BFQ_ATTR(weights),
++ __ATTR_NULL
++};
++
++static struct elevator_type iosched_bfq = {
++ .ops = {
++ .elevator_merge_fn = bfq_merge,
++ .elevator_merged_fn = bfq_merged_request,
++ .elevator_merge_req_fn = bfq_merged_requests,
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ .elevator_bio_merged_fn = bfq_bio_merged,
++#endif
++ .elevator_allow_merge_fn = bfq_allow_merge,
++ .elevator_dispatch_fn = bfq_dispatch_requests,
++ .elevator_add_req_fn = bfq_insert_request,
++ .elevator_activate_req_fn = bfq_activate_request,
++ .elevator_deactivate_req_fn = bfq_deactivate_request,
++ .elevator_completed_req_fn = bfq_completed_request,
++ .elevator_former_req_fn = elv_rb_former_request,
++ .elevator_latter_req_fn = elv_rb_latter_request,
++ .elevator_init_icq_fn = bfq_init_icq,
++ .elevator_exit_icq_fn = bfq_exit_icq,
++ .elevator_set_req_fn = bfq_set_request,
++ .elevator_put_req_fn = bfq_put_request,
++ .elevator_may_queue_fn = bfq_may_queue,
++ .elevator_init_fn = bfq_init_queue,
++ .elevator_exit_fn = bfq_exit_queue,
++ },
++ .icq_size = sizeof(struct bfq_io_cq),
++ .icq_align = __alignof__(struct bfq_io_cq),
++ .elevator_attrs = bfq_attrs,
++ .elevator_name = "bfq",
++ .elevator_owner = THIS_MODULE,
++};
++
++static int __init bfq_init(void)
++{
++ int ret;
++
++ /*
++ * Can be 0 on HZ < 1000 setups.
++ */
++ if (bfq_slice_idle == 0)
++ bfq_slice_idle = 1;
++
++ if (bfq_timeout_async == 0)
++ bfq_timeout_async = 1;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ ret = blkcg_policy_register(&blkcg_policy_bfq);
++ if (ret)
++ return ret;
++#endif
++
++ ret = -ENOMEM;
++ if (bfq_slab_setup())
++ goto err_pol_unreg;
++
++ /*
++ * Times to load large popular applications for the typical systems
++ * installed on the reference devices (see the comments before the
++ * definitions of the two arrays).
++ */
++ T_slow[0] = msecs_to_jiffies(2600);
++ T_slow[1] = msecs_to_jiffies(1000);
++ T_fast[0] = msecs_to_jiffies(5500);
++ T_fast[1] = msecs_to_jiffies(2000);
++
++ /*
++ * Thresholds that determine the switch between speed classes (see
++ * the comments before the definition of the array).
++ */
++ device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2;
++ device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2;
++
++ ret = elv_register(&iosched_bfq);
++ if (ret)
++ goto err_pol_unreg;
++
++ pr_info("BFQ I/O-scheduler: v7r11");
++
++ return 0;
++
++err_pol_unreg:
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ blkcg_policy_unregister(&blkcg_policy_bfq);
++#endif
++ return ret;
++}
++
++static void __exit bfq_exit(void)
++{
++ elv_unregister(&iosched_bfq);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ blkcg_policy_unregister(&blkcg_policy_bfq);
++#endif
++ bfq_slab_kill();
++}
++
++module_init(bfq_init);
++module_exit(bfq_exit);
++
++MODULE_AUTHOR("Arianna Avanzini, Fabio Checconi, Paolo Valente");
++MODULE_LICENSE("GPL");
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+new file mode 100644
+index 0000000..a64fec1
+--- /dev/null
++++ b/block/bfq-sched.c
+@@ -0,0 +1,1200 @@
++/*
++ * BFQ: Hierarchical B-WF2Q+ scheduler.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ * Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ */
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++#define for_each_entity(entity) \
++ for (; entity ; entity = entity->parent)
++
++#define for_each_entity_safe(entity, parent) \
++ for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
++
++
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
++ int extract,
++ struct bfq_data *bfqd);
++
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
++
++static void bfq_update_budget(struct bfq_entity *next_in_service)
++{
++ struct bfq_entity *bfqg_entity;
++ struct bfq_group *bfqg;
++ struct bfq_sched_data *group_sd;
++
++ BUG_ON(!next_in_service);
++
++ group_sd = next_in_service->sched_data;
++
++ bfqg = container_of(group_sd, struct bfq_group, sched_data);
++ /*
++ * bfq_group's my_entity field is not NULL only if the group
++ * is not the root group. We must not touch the root entity
++ * as it must never become an in-service entity.
++ */
++ bfqg_entity = bfqg->my_entity;
++ if (bfqg_entity)
++ bfqg_entity->budget = next_in_service->budget;
++}
++
++static int bfq_update_next_in_service(struct bfq_sched_data *sd)
++{
++ struct bfq_entity *next_in_service;
++
++ if (sd->in_service_entity)
++ /* will update/requeue at the end of service */
++ return 0;
++
++ /*
++ * NOTE: this can be improved in many ways, such as returning
++ * 1 (and thus propagating upwards the update) only when the
++ * budget changes, or caching the bfqq that will be scheduled
++ * next from this subtree. By now we worry more about
++ * correctness than about performance...
++ */
++ next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
++ sd->next_in_service = next_in_service;
++
++ if (next_in_service)
++ bfq_update_budget(next_in_service);
++
++ return 1;
++}
++
++static void bfq_check_next_in_service(struct bfq_sched_data *sd,
++ struct bfq_entity *entity)
++{
++ BUG_ON(sd->next_in_service != entity);
++}
++#else
++#define for_each_entity(entity) \
++ for (; entity ; entity = NULL)
++
++#define for_each_entity_safe(entity, parent) \
++ for (parent = NULL; entity ; entity = parent)
++
++static int bfq_update_next_in_service(struct bfq_sched_data *sd)
++{
++ return 0;
++}
++
++static void bfq_check_next_in_service(struct bfq_sched_data *sd,
++ struct bfq_entity *entity)
++{
++}
++
++static void bfq_update_budget(struct bfq_entity *next_in_service)
++{
++}
++#endif
++
++/*
++ * Shift for timestamp calculations. This actually limits the maximum
++ * service allowed in one timestamp delta (small shift values increase it),
++ * the maximum total weight that can be used for the queues in the system
++ * (big shift values increase it), and the period of virtual time
++ * wraparounds.
++ */
++#define WFQ_SERVICE_SHIFT 22
++
++/**
++ * bfq_gt - compare two timestamps.
++ * @a: first ts.
++ * @b: second ts.
++ *
++ * Return @a > @b, dealing with wrapping correctly.
++ */
++static int bfq_gt(u64 a, u64 b)
++{
++ return (s64)(a - b) > 0;
++}
++
++static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = NULL;
++
++ BUG_ON(!entity);
++
++ if (!entity->my_sched_data)
++ bfqq = container_of(entity, struct bfq_queue, entity);
++
++ return bfqq;
++}
++
++
++/**
++ * bfq_delta - map service into the virtual time domain.
++ * @service: amount of service.
++ * @weight: scale factor (weight of an entity or weight sum).
++ */
++static u64 bfq_delta(unsigned long service, unsigned long weight)
++{
++ u64 d = (u64)service << WFQ_SERVICE_SHIFT;
++
++ do_div(d, weight);
++ return d;
++}
++
++/**
++ * bfq_calc_finish - assign the finish time to an entity.
++ * @entity: the entity to act upon.
++ * @service: the service to be charged to the entity.
++ */
++static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++ BUG_ON(entity->weight == 0);
++
++ entity->finish = entity->start +
++ bfq_delta(service, entity->weight);
++
++ if (bfqq) {
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "calc_finish: serv %lu, w %d",
++ service, entity->weight);
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "calc_finish: start %llu, finish %llu, delta %llu",
++ entity->start, entity->finish,
++ bfq_delta(service, entity->weight));
++ }
++}
++
++/**
++ * bfq_entity_of - get an entity from a node.
++ * @node: the node field of the entity.
++ *
++ * Convert a node pointer to the relative entity. This is used only
++ * to simplify the logic of some functions and not as the generic
++ * conversion mechanism because, e.g., in the tree walking functions,
++ * the check for a %NULL value would be redundant.
++ */
++static struct bfq_entity *bfq_entity_of(struct rb_node *node)
++{
++ struct bfq_entity *entity = NULL;
++
++ if (node)
++ entity = rb_entry(node, struct bfq_entity, rb_node);
++
++ return entity;
++}
++
++/**
++ * bfq_extract - remove an entity from a tree.
++ * @root: the tree root.
++ * @entity: the entity to remove.
++ */
++static void bfq_extract(struct rb_root *root, struct bfq_entity *entity)
++{
++ BUG_ON(entity->tree != root);
++
++ entity->tree = NULL;
++ rb_erase(&entity->rb_node, root);
++}
++
++/**
++ * bfq_idle_extract - extract an entity from the idle tree.
++ * @st: the service tree of the owning @entity.
++ * @entity: the entity being removed.
++ */
++static void bfq_idle_extract(struct bfq_service_tree *st,
++ struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ struct rb_node *next;
++
++ BUG_ON(entity->tree != &st->idle);
++
++ if (entity == st->first_idle) {
++ next = rb_next(&entity->rb_node);
++ st->first_idle = bfq_entity_of(next);
++ }
++
++ if (entity == st->last_idle) {
++ next = rb_prev(&entity->rb_node);
++ st->last_idle = bfq_entity_of(next);
++ }
++
++ bfq_extract(&st->idle, entity);
++
++ if (bfqq)
++ list_del(&bfqq->bfqq_list);
++}
++
++/**
++ * bfq_insert - generic tree insertion.
++ * @root: tree root.
++ * @entity: entity to insert.
++ *
++ * This is used for the idle and the active tree, since they are both
++ * ordered by finish time.
++ */
++static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
++{
++ struct bfq_entity *entry;
++ struct rb_node **node = &root->rb_node;
++ struct rb_node *parent = NULL;
++
++ BUG_ON(entity->tree);
++
++ while (*node) {
++ parent = *node;
++ entry = rb_entry(parent, struct bfq_entity, rb_node);
++
++ if (bfq_gt(entry->finish, entity->finish))
++ node = &parent->rb_left;
++ else
++ node = &parent->rb_right;
++ }
++
++ rb_link_node(&entity->rb_node, parent, node);
++ rb_insert_color(&entity->rb_node, root);
++
++ entity->tree = root;
++}
++
++/**
++ * bfq_update_min - update the min_start field of a entity.
++ * @entity: the entity to update.
++ * @node: one of its children.
++ *
++ * This function is called when @entity may store an invalid value for
++ * min_start due to updates to the active tree. The function assumes
++ * that the subtree rooted at @node (which may be its left or its right
++ * child) has a valid min_start value.
++ */
++static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node)
++{
++ struct bfq_entity *child;
++
++ if (node) {
++ child = rb_entry(node, struct bfq_entity, rb_node);
++ if (bfq_gt(entity->min_start, child->min_start))
++ entity->min_start = child->min_start;
++ }
++}
++
++/**
++ * bfq_update_active_node - recalculate min_start.
++ * @node: the node to update.
++ *
++ * @node may have changed position or one of its children may have moved,
++ * this function updates its min_start value. The left and right subtrees
++ * are assumed to hold a correct min_start value.
++ */
++static void bfq_update_active_node(struct rb_node *node)
++{
++ struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
++
++ entity->min_start = entity->start;
++ bfq_update_min(entity, node->rb_right);
++ bfq_update_min(entity, node->rb_left);
++}
++
++/**
++ * bfq_update_active_tree - update min_start for the whole active tree.
++ * @node: the starting node.
++ *
++ * @node must be the deepest modified node after an update. This function
++ * updates its min_start using the values held by its children, assuming
++ * that they did not change, and then updates all the nodes that may have
++ * changed in the path to the root. The only nodes that may have changed
++ * are the ones in the path or their siblings.
++ */
++static void bfq_update_active_tree(struct rb_node *node)
++{
++ struct rb_node *parent;
++
++up:
++ bfq_update_active_node(node);
++
++ parent = rb_parent(node);
++ if (!parent)
++ return;
++
++ if (node == parent->rb_left && parent->rb_right)
++ bfq_update_active_node(parent->rb_right);
++ else if (parent->rb_left)
++ bfq_update_active_node(parent->rb_left);
++
++ node = parent;
++ goto up;
++}
++
++static void bfq_weights_tree_add(struct bfq_data *bfqd,
++ struct bfq_entity *entity,
++ struct rb_root *root);
++
++static void bfq_weights_tree_remove(struct bfq_data *bfqd,
++ struct bfq_entity *entity,
++ struct rb_root *root);
++
++
++/**
++ * bfq_active_insert - insert an entity in the active tree of its
++ * group/device.
++ * @st: the service tree of the entity.
++ * @entity: the entity being inserted.
++ *
++ * The active tree is ordered by finish time, but an extra key is kept
++ * per each node, containing the minimum value for the start times of
++ * its children (and the node itself), so it's possible to search for
++ * the eligible node with the lowest finish time in logarithmic time.
++ */
++static void bfq_active_insert(struct bfq_service_tree *st,
++ struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ struct rb_node *node = &entity->rb_node;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ struct bfq_sched_data *sd = NULL;
++ struct bfq_group *bfqg = NULL;
++ struct bfq_data *bfqd = NULL;
++#endif
++
++ bfq_insert(&st->active, entity);
++
++ if (node->rb_left)
++ node = node->rb_left;
++ else if (node->rb_right)
++ node = node->rb_right;
++
++ bfq_update_active_tree(node);
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ sd = entity->sched_data;
++ bfqg = container_of(sd, struct bfq_group, sched_data);
++ BUG_ON(!bfqg);
++ bfqd = (struct bfq_data *)bfqg->bfqd;
++#endif
++ if (bfqq)
++ list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else { /* bfq_group */
++ BUG_ON(!bfqd);
++ bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
++ }
++ if (bfqg != bfqd->root_group) {
++ BUG_ON(!bfqg);
++ BUG_ON(!bfqd);
++ bfqg->active_entities++;
++ if (bfqg->active_entities == 2)
++ bfqd->active_numerous_groups++;
++ }
++#endif
++}
++
++/**
++ * bfq_ioprio_to_weight - calc a weight from an ioprio.
++ * @ioprio: the ioprio value to convert.
++ */
++static unsigned short bfq_ioprio_to_weight(int ioprio)
++{
++ BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
++ return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - ioprio;
++}
++
++/**
++ * bfq_weight_to_ioprio - calc an ioprio from a weight.
++ * @weight: the weight value to convert.
++ *
++ * To preserve as much as possible the old only-ioprio user interface,
++ * 0 is used as an escape ioprio value for weights (numerically) equal or
++ * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF.
++ */
++static unsigned short bfq_weight_to_ioprio(int weight)
++{
++ BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
++ return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight < 0 ?
++ 0 : IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight;
++}
++
++static void bfq_get_entity(struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++ if (bfqq) {
++ atomic_inc(&bfqq->ref);
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
++ bfqq, atomic_read(&bfqq->ref));
++ }
++}
++
++/**
++ * bfq_find_deepest - find the deepest node that an extraction can modify.
++ * @node: the node being removed.
++ *
++ * Do the first step of an extraction in an rb tree, looking for the
++ * node that will replace @node, and returning the deepest node that
++ * the following modifications to the tree can touch. If @node is the
++ * last node in the tree return %NULL.
++ */
++static struct rb_node *bfq_find_deepest(struct rb_node *node)
++{
++ struct rb_node *deepest;
++
++ if (!node->rb_right && !node->rb_left)
++ deepest = rb_parent(node);
++ else if (!node->rb_right)
++ deepest = node->rb_left;
++ else if (!node->rb_left)
++ deepest = node->rb_right;
++ else {
++ deepest = rb_next(node);
++ if (deepest->rb_right)
++ deepest = deepest->rb_right;
++ else if (rb_parent(deepest) != node)
++ deepest = rb_parent(deepest);
++ }
++
++ return deepest;
++}
++
++/**
++ * bfq_active_extract - remove an entity from the active tree.
++ * @st: the service_tree containing the tree.
++ * @entity: the entity being removed.
++ */
++static void bfq_active_extract(struct bfq_service_tree *st,
++ struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ struct rb_node *node;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ struct bfq_sched_data *sd = NULL;
++ struct bfq_group *bfqg = NULL;
++ struct bfq_data *bfqd = NULL;
++#endif
++
++ node = bfq_find_deepest(&entity->rb_node);
++ bfq_extract(&st->active, entity);
++
++ if (node)
++ bfq_update_active_tree(node);
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ sd = entity->sched_data;
++ bfqg = container_of(sd, struct bfq_group, sched_data);
++ BUG_ON(!bfqg);
++ bfqd = (struct bfq_data *)bfqg->bfqd;
++#endif
++ if (bfqq)
++ list_del(&bfqq->bfqq_list);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else { /* bfq_group */
++ BUG_ON(!bfqd);
++ bfq_weights_tree_remove(bfqd, entity,
++ &bfqd->group_weights_tree);
++ }
++ if (bfqg != bfqd->root_group) {
++ BUG_ON(!bfqg);
++ BUG_ON(!bfqd);
++ BUG_ON(!bfqg->active_entities);
++ bfqg->active_entities--;
++ if (bfqg->active_entities == 1) {
++ BUG_ON(!bfqd->active_numerous_groups);
++ bfqd->active_numerous_groups--;
++ }
++ }
++#endif
++}
++
++/**
++ * bfq_idle_insert - insert an entity into the idle tree.
++ * @st: the service tree containing the tree.
++ * @entity: the entity to insert.
++ */
++static void bfq_idle_insert(struct bfq_service_tree *st,
++ struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ struct bfq_entity *first_idle = st->first_idle;
++ struct bfq_entity *last_idle = st->last_idle;
++
++ if (!first_idle || bfq_gt(first_idle->finish, entity->finish))
++ st->first_idle = entity;
++ if (!last_idle || bfq_gt(entity->finish, last_idle->finish))
++ st->last_idle = entity;
++
++ bfq_insert(&st->idle, entity);
++
++ if (bfqq)
++ list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
++}
++
++/**
++ * bfq_forget_entity - remove an entity from the wfq trees.
++ * @st: the service tree.
++ * @entity: the entity being removed.
++ *
++ * Update the device status and forget everything about @entity, putting
++ * the device reference to it, if it is a queue. Entities belonging to
++ * groups are not refcounted.
++ */
++static void bfq_forget_entity(struct bfq_service_tree *st,
++ struct bfq_entity *entity)
++{
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ struct bfq_sched_data *sd;
++
++ BUG_ON(!entity->on_st);
++
++ entity->on_st = 0;
++ st->wsum -= entity->weight;
++ if (bfqq) {
++ sd = entity->sched_data;
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
++ bfqq, atomic_read(&bfqq->ref));
++ bfq_put_queue(bfqq);
++ }
++}
++
++/**
++ * bfq_put_idle_entity - release the idle tree ref of an entity.
++ * @st: service tree for the entity.
++ * @entity: the entity being released.
++ */
++static void bfq_put_idle_entity(struct bfq_service_tree *st,
++ struct bfq_entity *entity)
++{
++ bfq_idle_extract(st, entity);
++ bfq_forget_entity(st, entity);
++}
++
++/**
++ * bfq_forget_idle - update the idle tree if necessary.
++ * @st: the service tree to act upon.
++ *
++ * To preserve the global O(log N) complexity we only remove one entry here;
++ * as the idle tree will not grow indefinitely this can be done safely.
++ */
++static void bfq_forget_idle(struct bfq_service_tree *st)
++{
++ struct bfq_entity *first_idle = st->first_idle;
++ struct bfq_entity *last_idle = st->last_idle;
++
++ if (RB_EMPTY_ROOT(&st->active) && last_idle &&
++ !bfq_gt(last_idle->finish, st->vtime)) {
++ /*
++ * Forget the whole idle tree, increasing the vtime past
++ * the last finish time of idle entities.
++ */
++ st->vtime = last_idle->finish;
++ }
++
++ if (first_idle && !bfq_gt(first_idle->finish, st->vtime))
++ bfq_put_idle_entity(st, first_idle);
++}
++
++static struct bfq_service_tree *
++__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
++ struct bfq_entity *entity)
++{
++ struct bfq_service_tree *new_st = old_st;
++
++ if (entity->prio_changed) {
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ unsigned short prev_weight, new_weight;
++ struct bfq_data *bfqd = NULL;
++ struct rb_root *root;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ struct bfq_sched_data *sd;
++ struct bfq_group *bfqg;
++#endif
++
++ if (bfqq)
++ bfqd = bfqq->bfqd;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else {
++ sd = entity->my_sched_data;
++ bfqg = container_of(sd, struct bfq_group, sched_data);
++ BUG_ON(!bfqg);
++ bfqd = (struct bfq_data *)bfqg->bfqd;
++ BUG_ON(!bfqd);
++ }
++#endif
++
++ BUG_ON(old_st->wsum < entity->weight);
++ old_st->wsum -= entity->weight;
++
++ if (entity->new_weight != entity->orig_weight) {
++ if (entity->new_weight < BFQ_MIN_WEIGHT ||
++ entity->new_weight > BFQ_MAX_WEIGHT) {
++ printk(KERN_CRIT "update_weight_prio: "
++ "new_weight %d\n",
++ entity->new_weight);
++ BUG();
++ }
++ entity->orig_weight = entity->new_weight;
++ if (bfqq)
++ bfqq->ioprio =
++ bfq_weight_to_ioprio(entity->orig_weight);
++ }
++
++ if (bfqq)
++ bfqq->ioprio_class = bfqq->new_ioprio_class;
++ entity->prio_changed = 0;
++
++ /*
++ * NOTE: here we may be changing the weight too early,
++ * this will cause unfairness. The correct approach
++ * would have required additional complexity to defer
++ * weight changes to the proper time instants (i.e.,
++ * when entity->finish <= old_st->vtime).
++ */
++ new_st = bfq_entity_service_tree(entity);
++
++ prev_weight = entity->weight;
++ new_weight = entity->orig_weight *
++ (bfqq ? bfqq->wr_coeff : 1);
++ /*
++ * If the weight of the entity changes, remove the entity
++ * from its old weight counter (if there is a counter
++ * associated with the entity), and add it to the counter
++ * associated with its new weight.
++ */
++ if (prev_weight != new_weight) {
++ root = bfqq ? &bfqd->queue_weights_tree :
++ &bfqd->group_weights_tree;
++ bfq_weights_tree_remove(bfqd, entity, root);
++ }
++ entity->weight = new_weight;
++ /*
++ * Add the entity to its weights tree only if it is
++ * not associated with a weight-raised queue.
++ */
++ if (prev_weight != new_weight &&
++ (bfqq ? bfqq->wr_coeff == 1 : 1))
++ /* If we get here, root has been initialized. */
++ bfq_weights_tree_add(bfqd, entity, root);
++
++ new_st->wsum += entity->weight;
++
++ if (new_st != old_st)
++ entity->start = new_st->vtime;
++ }
++
++ return new_st;
++}
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
++#endif
++
++/**
++ * bfq_bfqq_served - update the scheduler status after selection for
++ * service.
++ * @bfqq: the queue being served.
++ * @served: bytes to transfer.
++ *
++ * NOTE: this can be optimized, as the timestamps of upper level entities
++ * are synchronized every time a new bfqq is selected for service. By now,
++ * we keep it to better check consistency.
++ */
++static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++ struct bfq_service_tree *st;
++
++ for_each_entity(entity) {
++ st = bfq_entity_service_tree(entity);
++
++ entity->service += served;
++ BUG_ON(entity->service > entity->budget);
++ BUG_ON(st->wsum == 0);
++
++ st->vtime += bfq_delta(served, st->wsum);
++ bfq_forget_idle(st);
++ }
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
++#endif
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
++}
++
++/**
++ * bfq_bfqq_charge_full_budget - set the service to the entity budget.
++ * @bfqq: the queue that needs a service update.
++ *
++ * When it's not possible to be fair in the service domain, because
++ * a queue is not consuming its budget fast enough (the meaning of
++ * fast depends on the timeout parameter), we charge it a full
++ * budget. In this way we should obtain a sort of time-domain
++ * fairness among all the seeky/slow queues.
++ */
++static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
++
++ bfq_bfqq_served(bfqq, entity->budget - entity->service);
++}
++
++/**
++ * __bfq_activate_entity - activate an entity.
++ * @entity: the entity being activated.
++ *
++ * Called whenever an entity is activated, i.e., it is not active and one
++ * of its children receives a new request, or has to be reactivated due to
++ * budget exhaustion. It uses the current budget of the entity (and the
++ * service received if @entity is active) of the queue to calculate its
++ * timestamps.
++ */
++static void __bfq_activate_entity(struct bfq_entity *entity)
++{
++ struct bfq_sched_data *sd = entity->sched_data;
++ struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++
++ if (entity == sd->in_service_entity) {
++ BUG_ON(entity->tree);
++ /*
++ * If we are requeueing the current entity we have
++ * to take care of not charging to it service it has
++ * not received.
++ */
++ bfq_calc_finish(entity, entity->service);
++ entity->start = entity->finish;
++ sd->in_service_entity = NULL;
++ } else if (entity->tree == &st->active) {
++ /*
++ * Requeueing an entity due to a change of some
++ * next_in_service entity below it. We reuse the
++ * old start time.
++ */
++ bfq_active_extract(st, entity);
++ } else if (entity->tree == &st->idle) {
++ /*
++ * Must be on the idle tree, bfq_idle_extract() will
++ * check for that.
++ */
++ bfq_idle_extract(st, entity);
++ entity->start = bfq_gt(st->vtime, entity->finish) ?
++ st->vtime : entity->finish;
++ } else {
++ /*
++ * The finish time of the entity may be invalid, and
++ * it is in the past for sure, otherwise the queue
++ * would have been on the idle tree.
++ */
++ entity->start = st->vtime;
++ st->wsum += entity->weight;
++ bfq_get_entity(entity);
++
++ BUG_ON(entity->on_st);
++ entity->on_st = 1;
++ }
++
++ st = __bfq_entity_update_weight_prio(st, entity);
++ bfq_calc_finish(entity, entity->budget);
++ bfq_active_insert(st, entity);
++}
++
++/**
++ * bfq_activate_entity - activate an entity and its ancestors if necessary.
++ * @entity: the entity to activate.
++ *
++ * Activate @entity and all the entities on the path from it to the root.
++ */
++static void bfq_activate_entity(struct bfq_entity *entity)
++{
++ struct bfq_sched_data *sd;
++
++ for_each_entity(entity) {
++ __bfq_activate_entity(entity);
++
++ sd = entity->sched_data;
++ if (!bfq_update_next_in_service(sd))
++ /*
++ * No need to propagate the activation to the
++ * upper entities, as they will be updated when
++ * the in-service entity is rescheduled.
++ */
++ break;
++ }
++}
++
++/**
++ * __bfq_deactivate_entity - deactivate an entity from its service tree.
++ * @entity: the entity to deactivate.
++ * @requeue: if false, the entity will not be put into the idle tree.
++ *
++ * Deactivate an entity, independently from its previous state. If the
++ * entity was not on a service tree just return, otherwise if it is on
++ * any scheduler tree, extract it from that tree, and if necessary
++ * and if the caller did not specify @requeue, put it on the idle tree.
++ *
++ * Return %1 if the caller should update the entity hierarchy, i.e.,
++ * if the entity was in service or if it was the next_in_service for
++ * its sched_data; return %0 otherwise.
++ */
++static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++{
++ struct bfq_sched_data *sd = entity->sched_data;
++ struct bfq_service_tree *st;
++ int was_in_service;
++ int ret = 0;
++
++ if (sd == NULL || !entity->on_st) /* never activated, or inactive */
++ return 0;
++
++ st = bfq_entity_service_tree(entity);
++ was_in_service = entity == sd->in_service_entity;
++
++ BUG_ON(was_in_service && entity->tree);
++
++ if (was_in_service) {
++ bfq_calc_finish(entity, entity->service);
++ sd->in_service_entity = NULL;
++ } else if (entity->tree == &st->active)
++ bfq_active_extract(st, entity);
++ else if (entity->tree == &st->idle)
++ bfq_idle_extract(st, entity);
++ else if (entity->tree)
++ BUG();
++
++ if (was_in_service || sd->next_in_service == entity)
++ ret = bfq_update_next_in_service(sd);
++
++ if (!requeue || !bfq_gt(entity->finish, st->vtime))
++ bfq_forget_entity(st, entity);
++ else
++ bfq_idle_insert(st, entity);
++
++ BUG_ON(sd->in_service_entity == entity);
++ BUG_ON(sd->next_in_service == entity);
++
++ return ret;
++}
++
++/**
++ * bfq_deactivate_entity - deactivate an entity.
++ * @entity: the entity to deactivate.
++ * @requeue: true if the entity can be put on the idle tree
++ */
++static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++{
++ struct bfq_sched_data *sd;
++ struct bfq_entity *parent;
++
++ for_each_entity_safe(entity, parent) {
++ sd = entity->sched_data;
++
++ if (!__bfq_deactivate_entity(entity, requeue))
++ /*
++ * The parent entity is still backlogged, and
++ * we don't need to update it as it is still
++ * in service.
++ */
++ break;
++
++ if (sd->next_in_service)
++ /*
++ * The parent entity is still backlogged and
++ * the budgets on the path towards the root
++ * need to be updated.
++ */
++ goto update;
++
++ /*
++ * If we reach there the parent is no more backlogged and
++ * we want to propagate the dequeue upwards.
++ */
++ requeue = 1;
++ }
++
++ return;
++
++update:
++ entity = parent;
++ for_each_entity(entity) {
++ __bfq_activate_entity(entity);
++
++ sd = entity->sched_data;
++ if (!bfq_update_next_in_service(sd))
++ break;
++ }
++}
++
++/**
++ * bfq_update_vtime - update vtime if necessary.
++ * @st: the service tree to act upon.
++ *
++ * If necessary update the service tree vtime to have at least one
++ * eligible entity, skipping to its start time. Assumes that the
++ * active tree of the device is not empty.
++ *
++ * NOTE: this hierarchical implementation updates vtimes quite often,
++ * we may end up with reactivated processes getting timestamps after a
++ * vtime skip done because we needed a ->first_active entity on some
++ * intermediate node.
++ */
++static void bfq_update_vtime(struct bfq_service_tree *st)
++{
++ struct bfq_entity *entry;
++ struct rb_node *node = st->active.rb_node;
++
++ entry = rb_entry(node, struct bfq_entity, rb_node);
++ if (bfq_gt(entry->min_start, st->vtime)) {
++ st->vtime = entry->min_start;
++ bfq_forget_idle(st);
++ }
++}
++
++/**
++ * bfq_first_active_entity - find the eligible entity with
++ * the smallest finish time
++ * @st: the service tree to select from.
++ *
++ * This function searches the first schedulable entity, starting from the
++ * root of the tree and going on the left every time on this side there is
++ * a subtree with at least one eligible (start >= vtime) entity. The path on
++ * the right is followed only if a) the left subtree contains no eligible
++ * entities and b) no eligible entity has been found yet.
++ */
++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
++{
++ struct bfq_entity *entry, *first = NULL;
++ struct rb_node *node = st->active.rb_node;
++
++ while (node) {
++ entry = rb_entry(node, struct bfq_entity, rb_node);
++left:
++ if (!bfq_gt(entry->start, st->vtime))
++ first = entry;
++
++ BUG_ON(bfq_gt(entry->min_start, st->vtime));
++
++ if (node->rb_left) {
++ entry = rb_entry(node->rb_left,
++ struct bfq_entity, rb_node);
++ if (!bfq_gt(entry->min_start, st->vtime)) {
++ node = node->rb_left;
++ goto left;
++ }
++ }
++ if (first)
++ break;
++ node = node->rb_right;
++ }
++
++ BUG_ON(!first && !RB_EMPTY_ROOT(&st->active));
++ return first;
++}
++
++/**
++ * __bfq_lookup_next_entity - return the first eligible entity in @st.
++ * @st: the service tree.
++ *
++ * Update the virtual time in @st and return the first eligible entity
++ * it contains.
++ */
++static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
++ bool force)
++{
++ struct bfq_entity *entity, *new_next_in_service = NULL;
++
++ if (RB_EMPTY_ROOT(&st->active))
++ return NULL;
++
++ bfq_update_vtime(st);
++ entity = bfq_first_active_entity(st);
++ BUG_ON(bfq_gt(entity->start, st->vtime));
++
++ /*
++ * If the chosen entity does not match with the sched_data's
++ * next_in_service and we are forcedly serving the IDLE priority
++ * class tree, bubble up budget update.
++ */
++ if (unlikely(force && entity != entity->sched_data->next_in_service)) {
++ new_next_in_service = entity;
++ for_each_entity(new_next_in_service)
++ bfq_update_budget(new_next_in_service);
++ }
++
++ return entity;
++}
++
++/**
++ * bfq_lookup_next_entity - return the first eligible entity in @sd.
++ * @sd: the sched_data.
++ * @extract: if true the returned entity will be also extracted from @sd.
++ *
++ * NOTE: since we cache the next_in_service entity at each level of the
++ * hierarchy, the complexity of the lookup can be decreased with
++ * absolutely no effort just returning the cached next_in_service value;
++ * we prefer to do full lookups to test the consistency of * the data
++ * structures.
++ */
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
++ int extract,
++ struct bfq_data *bfqd)
++{
++ struct bfq_service_tree *st = sd->service_tree;
++ struct bfq_entity *entity;
++ int i = 0;
++
++ BUG_ON(sd->in_service_entity);
++
++ if (bfqd &&
++ jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
++ entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
++ true);
++ if (entity) {
++ i = BFQ_IOPRIO_CLASSES - 1;
++ bfqd->bfq_class_idle_last_service = jiffies;
++ sd->next_in_service = entity;
++ }
++ }
++ for (; i < BFQ_IOPRIO_CLASSES; i++) {
++ entity = __bfq_lookup_next_entity(st + i, false);
++ if (entity) {
++ if (extract) {
++ bfq_check_next_in_service(sd, entity);
++ bfq_active_extract(st + i, entity);
++ sd->in_service_entity = entity;
++ sd->next_in_service = NULL;
++ }
++ break;
++ }
++ }
++
++ return entity;
++}
++
++/*
++ * Get next queue for service.
++ */
++static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
++{
++ struct bfq_entity *entity = NULL;
++ struct bfq_sched_data *sd;
++ struct bfq_queue *bfqq;
++
++ BUG_ON(bfqd->in_service_queue);
++
++ if (bfqd->busy_queues == 0)
++ return NULL;
++
++ sd = &bfqd->root_group->sched_data;
++ for (; sd ; sd = entity->my_sched_data) {
++ entity = bfq_lookup_next_entity(sd, 1, bfqd);
++ BUG_ON(!entity);
++ entity->service = 0;
++ }
++
++ bfqq = bfq_entity_to_bfqq(entity);
++ BUG_ON(!bfqq);
++
++ return bfqq;
++}
++
++static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
++{
++ if (bfqd->in_service_bic) {
++ put_io_context(bfqd->in_service_bic->icq.ioc);
++ bfqd->in_service_bic = NULL;
++ }
++
++ bfqd->in_service_queue = NULL;
++ del_timer(&bfqd->idle_slice_timer);
++}
++
++static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ int requeue)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++
++ if (bfqq == bfqd->in_service_queue)
++ __bfq_bfqd_reset_in_service(bfqd);
++
++ bfq_deactivate_entity(entity, requeue);
++}
++
++static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++
++ bfq_activate_entity(entity);
++}
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
++#endif
++
++/*
++ * Called when the bfqq no longer has requests pending, remove it from
++ * the service tree.
++ */
++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ int requeue)
++{
++ BUG_ON(!bfq_bfqq_busy(bfqq));
++ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++
++ bfq_log_bfqq(bfqd, bfqq, "del from busy");
++
++ bfq_clear_bfqq_busy(bfqq);
++
++ BUG_ON(bfqd->busy_queues == 0);
++ bfqd->busy_queues--;
++
++ if (!bfqq->dispatched) {
++ bfq_weights_tree_remove(bfqd, &bfqq->entity,
++ &bfqd->queue_weights_tree);
++ if (!blk_queue_nonrot(bfqd->queue)) {
++ BUG_ON(!bfqd->busy_in_flight_queues);
++ bfqd->busy_in_flight_queues--;
++ if (bfq_bfqq_constantly_seeky(bfqq)) {
++ BUG_ON(!bfqd->
++ const_seeky_busy_in_flight_queues);
++ bfqd->const_seeky_busy_in_flight_queues--;
++ }
++ }
++ }
++ if (bfqq->wr_coeff > 1)
++ bfqd->wr_busy_queues--;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ bfqg_stats_update_dequeue(bfqq_group(bfqq));
++#endif
++
++ bfq_deactivate_bfqq(bfqd, bfqq, requeue);
++}
++
++/*
++ * Called when an inactive queue receives a new request.
++ */
++static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++ BUG_ON(bfq_bfqq_busy(bfqq));
++ BUG_ON(bfqq == bfqd->in_service_queue);
++
++ bfq_log_bfqq(bfqd, bfqq, "add to busy");
++
++ bfq_activate_bfqq(bfqd, bfqq);
++
++ bfq_mark_bfqq_busy(bfqq);
++ bfqd->busy_queues++;
++
++ if (!bfqq->dispatched) {
++ if (bfqq->wr_coeff == 1)
++ bfq_weights_tree_add(bfqd, &bfqq->entity,
++ &bfqd->queue_weights_tree);
++ if (!blk_queue_nonrot(bfqd->queue)) {
++ bfqd->busy_in_flight_queues++;
++ if (bfq_bfqq_constantly_seeky(bfqq))
++ bfqd->const_seeky_busy_in_flight_queues++;
++ }
++ }
++ if (bfqq->wr_coeff > 1)
++ bfqd->wr_busy_queues++;
++}
+diff --git a/block/bfq.h b/block/bfq.h
+new file mode 100644
+index 0000000..485d0c9
+--- /dev/null
++++ b/block/bfq.h
+@@ -0,0 +1,801 @@
++/*
++ * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ * Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ */
++
++#ifndef _BFQ_H
++#define _BFQ_H
++
++#include <linux/blktrace_api.h>
++#include <linux/hrtimer.h>
++#include <linux/ioprio.h>
++#include <linux/rbtree.h>
++#include <linux/blk-cgroup.h>
++
++#define BFQ_IOPRIO_CLASSES 3
++#define BFQ_CL_IDLE_TIMEOUT (HZ/5)
++
++#define BFQ_MIN_WEIGHT 1
++#define BFQ_MAX_WEIGHT 1000
++#define BFQ_WEIGHT_CONVERSION_COEFF 10
++
++#define BFQ_DEFAULT_QUEUE_IOPRIO 4
++
++#define BFQ_DEFAULT_GRP_WEIGHT 10
++#define BFQ_DEFAULT_GRP_IOPRIO 0
++#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
++
++struct bfq_entity;
++
++/**
++ * struct bfq_service_tree - per ioprio_class service tree.
++ * @active: tree for active entities (i.e., those backlogged).
++ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
++ * @first_idle: idle entity with minimum F_i.
++ * @last_idle: idle entity with maximum F_i.
++ * @vtime: scheduler virtual time.
++ * @wsum: scheduler weight sum; active and idle entities contribute to it.
++ *
++ * Each service tree represents a B-WF2Q+ scheduler on its own. Each
++ * ioprio_class has its own independent scheduler, and so its own
++ * bfq_service_tree. All the fields are protected by the queue lock
++ * of the containing bfqd.
++ */
++struct bfq_service_tree {
++ struct rb_root active;
++ struct rb_root idle;
++
++ struct bfq_entity *first_idle;
++ struct bfq_entity *last_idle;
++
++ u64 vtime;
++ unsigned long wsum;
++};
++
++/**
++ * struct bfq_sched_data - multi-class scheduler.
++ * @in_service_entity: entity in service.
++ * @next_in_service: head-of-the-line entity in the scheduler.
++ * @service_tree: array of service trees, one per ioprio_class.
++ *
++ * bfq_sched_data is the basic scheduler queue. It supports three
++ * ioprio_classes, and can be used either as a toplevel queue or as
++ * an intermediate queue on a hierarchical setup.
++ * @next_in_service points to the active entity of the sched_data
++ * service trees that will be scheduled next.
++ *
++ * The supported ioprio_classes are the same as in CFQ, in descending
++ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
++ * Requests from higher priority queues are served before all the
++ * requests from lower priority queues; among requests of the same
++ * queue requests are served according to B-WF2Q+.
++ * All the fields are protected by the queue lock of the containing bfqd.
++ */
++struct bfq_sched_data {
++ struct bfq_entity *in_service_entity;
++ struct bfq_entity *next_in_service;
++ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
++};
++
++/**
++ * struct bfq_weight_counter - counter of the number of all active entities
++ * with a given weight.
++ * @weight: weight of the entities that this counter refers to.
++ * @num_active: number of active entities with this weight.
++ * @weights_node: weights tree member (see bfq_data's @queue_weights_tree
++ * and @group_weights_tree).
++ */
++struct bfq_weight_counter {
++ short int weight;
++ unsigned int num_active;
++ struct rb_node weights_node;
++};
++
++/**
++ * struct bfq_entity - schedulable entity.
++ * @rb_node: service_tree member.
++ * @weight_counter: pointer to the weight counter associated with this entity.
++ * @on_st: flag, true if the entity is on a tree (either the active or
++ * the idle one of its service_tree).
++ * @finish: B-WF2Q+ finish timestamp (aka F_i).
++ * @start: B-WF2Q+ start timestamp (aka S_i).
++ * @tree: tree the entity is enqueued into; %NULL if not on a tree.
++ * @min_start: minimum start time of the (active) subtree rooted at
++ * this entity; used for O(log N) lookups into active trees.
++ * @service: service received during the last round of service.
++ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
++ * @weight: weight of the queue
++ * @parent: parent entity, for hierarchical scheduling.
++ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
++ * associated scheduler queue, %NULL on leaf nodes.
++ * @sched_data: the scheduler queue this entity belongs to.
++ * @ioprio: the ioprio in use.
++ * @new_weight: when a weight change is requested, the new weight value.
++ * @orig_weight: original weight, used to implement weight boosting
++ * @prio_changed: flag, true when the user requested a weight, ioprio or
++ * ioprio_class change.
++ *
++ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
++ * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each
++ * entity belongs to the sched_data of the parent group in the cgroup
++ * hierarchy. Non-leaf entities have also their own sched_data, stored
++ * in @my_sched_data.
++ *
++ * Each entity stores independently its priority values; this would
++ * allow different weights on different devices, but this
++ * functionality is not exported to userspace by now. Priorities and
++ * weights are updated lazily, first storing the new values into the
++ * new_* fields, then setting the @prio_changed flag. As soon as
++ * there is a transition in the entity state that allows the priority
++ * update to take place the effective and the requested priority
++ * values are synchronized.
++ *
++ * Unless cgroups are used, the weight value is calculated from the
++ * ioprio to export the same interface as CFQ. When dealing with
++ * ``well-behaved'' queues (i.e., queues that do not spend too much
++ * time to consume their budget and have true sequential behavior, and
++ * when there are no external factors breaking anticipation) the
++ * relative weights at each level of the cgroups hierarchy should be
++ * guaranteed. All the fields are protected by the queue lock of the
++ * containing bfqd.
++ */
++struct bfq_entity {
++ struct rb_node rb_node;
++ struct bfq_weight_counter *weight_counter;
++
++ int on_st;
++
++ u64 finish;
++ u64 start;
++
++ struct rb_root *tree;
++
++ u64 min_start;
++
++ int service, budget;
++ unsigned short weight, new_weight;
++ unsigned short orig_weight;
++
++ struct bfq_entity *parent;
++
++ struct bfq_sched_data *my_sched_data;
++ struct bfq_sched_data *sched_data;
++
++ int prio_changed;
++};
++
++struct bfq_group;
++
++/**
++ * struct bfq_queue - leaf schedulable entity.
++ * @ref: reference counter.
++ * @bfqd: parent bfq_data.
++ * @new_ioprio: when an ioprio change is requested, the new ioprio value.
++ * @ioprio_class: the ioprio_class in use.
++ * @new_ioprio_class: when an ioprio_class change is requested, the new
++ * ioprio_class value.
++ * @new_bfqq: shared bfq_queue if queue is cooperating with
++ * one or more other queues.
++ * @sort_list: sorted list of pending requests.
++ * @next_rq: if fifo isn't expired, next request to serve.
++ * @queued: nr of requests queued in @sort_list.
++ * @allocated: currently allocated requests.
++ * @meta_pending: pending metadata requests.
++ * @fifo: fifo list of requests in sort_list.
++ * @entity: entity representing this queue in the scheduler.
++ * @max_budget: maximum budget allowed from the feedback mechanism.
++ * @budget_timeout: budget expiration (in jiffies).
++ * @dispatched: number of requests on the dispatch list or inside driver.
++ * @flags: status flags.
++ * @bfqq_list: node for active/idle bfqq list inside our bfqd.
++ * @burst_list_node: node for the device's burst list.
++ * @seek_samples: number of seeks sampled
++ * @seek_total: sum of the distances of the seeks sampled
++ * @seek_mean: mean seek distance
++ * @last_request_pos: position of the last request enqueued
++ * @requests_within_timer: number of consecutive pairs of request completion
++ * and arrival, such that the queue becomes idle
++ * after the completion, but the next request arrives
++ * within an idle time slice; used only if the queue's
++ * IO_bound has been cleared.
++ * @pid: pid of the process owning the queue, used for logging purposes.
++ * @last_wr_start_finish: start time of the current weight-raising period if
++ * the @bfq-queue is being weight-raised, otherwise
++ * finish time of the last weight-raising period
++ * @wr_cur_max_time: current max raising time for this queue
++ * @soft_rt_next_start: minimum time instant such that, only if a new
++ * request is enqueued after this time instant in an
++ * idle @bfq_queue with no outstanding requests, then
++ * the task associated with the queue it is deemed as
++ * soft real-time (see the comments to the function
++ * bfq_bfqq_softrt_next_start())
++ * @last_idle_bklogged: time of the last transition of the @bfq_queue from
++ * idle to backlogged
++ * @service_from_backlogged: cumulative service received from the @bfq_queue
++ * since the last transition from idle to
++ * backlogged
++ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
++ * queue is shared
++ *
++ * A bfq_queue is a leaf request queue; it can be associated with an
++ * io_context or more, if it is async or shared between cooperating
++ * processes. @cgroup holds a reference to the cgroup, to be sure that it
++ * does not disappear while a bfqq still references it (mostly to avoid
++ * races between request issuing and task migration followed by cgroup
++ * destruction).
++ * All the fields are protected by the queue lock of the containing bfqd.
++ */
++struct bfq_queue {
++ atomic_t ref;
++ struct bfq_data *bfqd;
++
++ unsigned short ioprio, new_ioprio;
++ unsigned short ioprio_class, new_ioprio_class;
++
++ /* fields for cooperating queues handling */
++ struct bfq_queue *new_bfqq;
++ struct rb_node pos_node;
++ struct rb_root *pos_root;
++
++ struct rb_root sort_list;
++ struct request *next_rq;
++ int queued[2];
++ int allocated[2];
++ int meta_pending;
++ struct list_head fifo;
++
++ struct bfq_entity entity;
++
++ int max_budget;
++ unsigned long budget_timeout;
++
++ int dispatched;
++
++ unsigned int flags;
++
++ struct list_head bfqq_list;
++
++ struct hlist_node burst_list_node;
++
++ unsigned int seek_samples;
++ u64 seek_total;
++ sector_t seek_mean;
++ sector_t last_request_pos;
++
++ unsigned int requests_within_timer;
++
++ pid_t pid;
++ struct bfq_io_cq *bic;
++
++ /* weight-raising fields */
++ unsigned long wr_cur_max_time;
++ unsigned long soft_rt_next_start;
++ unsigned long last_wr_start_finish;
++ unsigned int wr_coeff;
++ unsigned long last_idle_bklogged;
++ unsigned long service_from_backlogged;
++};
++
++/**
++ * struct bfq_ttime - per process thinktime stats.
++ * @ttime_total: total process thinktime
++ * @ttime_samples: number of thinktime samples
++ * @ttime_mean: average process thinktime
++ */
++struct bfq_ttime {
++ unsigned long last_end_request;
++
++ unsigned long ttime_total;
++ unsigned long ttime_samples;
++ unsigned long ttime_mean;
++};
++
++/**
++ * struct bfq_io_cq - per (request_queue, io_context) structure.
++ * @icq: associated io_cq structure
++ * @bfqq: array of two process queues, the sync and the async
++ * @ttime: associated @bfq_ttime struct
++ * @ioprio: per (request_queue, blkcg) ioprio.
++ * @blkcg_id: id of the blkcg the related io_cq belongs to.
++ */
++struct bfq_io_cq {
++ struct io_cq icq; /* must be the first member */
++ struct bfq_queue *bfqq[2];
++ struct bfq_ttime ttime;
++ int ioprio;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ uint64_t blkcg_id; /* the current blkcg ID */
++#endif
++};
++
++enum bfq_device_speed {
++ BFQ_BFQD_FAST,
++ BFQ_BFQD_SLOW,
++};
++
++/**
++ * struct bfq_data - per device data structure.
++ * @queue: request queue for the managed device.
++ * @root_group: root bfq_group for the device.
++ * @active_numerous_groups: number of bfq_groups containing more than one
++ * active @bfq_entity.
++ * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by
++ * weight. Used to keep track of whether all @bfq_queues
++ * have the same weight. The tree contains one counter
++ * for each distinct weight associated to some active
++ * and not weight-raised @bfq_queue (see the comments to
++ * the functions bfq_weights_tree_[add|remove] for
++ * further details).
++ * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted
++ * by weight. Used to keep track of whether all
++ * @bfq_groups have the same weight. The tree contains
++ * one counter for each distinct weight associated to
++ * some active @bfq_group (see the comments to the
++ * functions bfq_weights_tree_[add|remove] for further
++ * details).
++ * @busy_queues: number of bfq_queues containing requests (including the
++ * queue in service, even if it is idling).
++ * @busy_in_flight_queues: number of @bfq_queues containing pending or
++ * in-flight requests, plus the @bfq_queue in
++ * service, even if idle but waiting for the
++ * possible arrival of its next sync request. This
++ * field is updated only if the device is rotational,
++ * but used only if the device is also NCQ-capable.
++ * The reason why the field is updated also for non-
++ * NCQ-capable rotational devices is related to the
++ * fact that the value of @hw_tag may be set also
++ * later than when busy_in_flight_queues may need to
++ * be incremented for the first time(s). Taking also
++ * this possibility into account, to avoid unbalanced
++ * increments/decrements, would imply more overhead
++ * than just updating busy_in_flight_queues
++ * regardless of the value of @hw_tag.
++ * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues
++ * (that is, seeky queues that expired
++ * for budget timeout at least once)
++ * containing pending or in-flight
++ * requests, including the in-service
++ * @bfq_queue if constantly seeky. This
++ * field is updated only if the device
++ * is rotational, but used only if the
++ * device is also NCQ-capable (see the
++ * comments to @busy_in_flight_queues).
++ * @wr_busy_queues: number of weight-raised busy @bfq_queues.
++ * @queued: number of queued requests.
++ * @rq_in_driver: number of requests dispatched and waiting for completion.
++ * @sync_flight: number of sync requests in the driver.
++ * @max_rq_in_driver: max number of reqs in driver in the last
++ * @hw_tag_samples completed requests.
++ * @hw_tag_samples: nr of samples used to calculate hw_tag.
++ * @hw_tag: flag set to one if the driver is showing a queueing behavior.
++ * @budgets_assigned: number of budgets assigned.
++ * @idle_slice_timer: timer set when idling for the next sequential request
++ * from the queue in service.
++ * @unplug_work: delayed work to restart dispatching on the request queue.
++ * @in_service_queue: bfq_queue in service.
++ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue.
++ * @last_position: on-disk position of the last served request.
++ * @last_budget_start: beginning of the last budget.
++ * @last_idling_start: beginning of the last idle slice.
++ * @peak_rate: peak transfer rate observed for a budget.
++ * @peak_rate_samples: number of samples used to calculate @peak_rate.
++ * @bfq_max_budget: maximum budget allotted to a bfq_queue before
++ * rescheduling.
++ * @active_list: list of all the bfq_queues active on the device.
++ * @idle_list: list of all the bfq_queues idle on the device.
++ * @bfq_fifo_expire: timeout for async/sync requests; when it expires
++ * requests are served in fifo order.
++ * @bfq_back_penalty: weight of backward seeks wrt forward ones.
++ * @bfq_back_max: maximum allowed backward seek.
++ * @bfq_slice_idle: maximum idling time.
++ * @bfq_user_max_budget: user-configured max budget value
++ * (0 for auto-tuning).
++ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
++ * async queues.
++ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
++ * to prevent seeky queues to impose long latencies to well
++ * behaved ones (this also implies that seeky queues cannot
++ * receive guarantees in the service domain; after a timeout
++ * they are charged for the whole allocated budget, to try
++ * to preserve a behavior reasonably fair among them, but
++ * without service-domain guarantees).
++ * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is
++ * no more granted any weight-raising.
++ * @bfq_failed_cooperations: number of consecutive failed cooperation
++ * chances after which weight-raising is restored
++ * to a queue subject to more than bfq_coop_thresh
++ * queue merges.
++ * @bfq_requests_within_timer: number of consecutive requests that must be
++ * issued within the idle time slice to set
++ * again idling to a queue which was marked as
++ * non-I/O-bound (see the definition of the
++ * IO_bound flag for further details).
++ * @last_ins_in_burst: last time at which a queue entered the current
++ * burst of queues being activated shortly after
++ * each other; for more details about this and the
++ * following parameters related to a burst of
++ * activations, see the comments to the function
++ * @bfq_handle_burst.
++ * @bfq_burst_interval: reference time interval used to decide whether a
++ * queue has been activated shortly after
++ * @last_ins_in_burst.
++ * @burst_size: number of queues in the current burst of queue activations.
++ * @bfq_large_burst_thresh: maximum burst size above which the current
++ * queue-activation burst is deemed as 'large'.
++ * @large_burst: true if a large queue-activation burst is in progress.
++ * @burst_list: head of the burst list (as for the above fields, more details
++ * in the comments to the function bfq_handle_burst).
++ * @low_latency: if set to true, low-latency heuristics are enabled.
++ * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised
++ * queue is multiplied.
++ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies).
++ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes.
++ * @bfq_wr_min_idle_time: minimum idle period after which weight-raising
++ * may be reactivated for a queue (in jiffies).
++ * @bfq_wr_min_inter_arr_async: minimum period between request arrivals
++ * after which weight-raising may be
++ * reactivated for an already busy queue
++ * (in jiffies).
++ * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue,
++ * sectors per seconds.
++ * @RT_prod: cached value of the product R*T used for computing the maximum
++ * duration of the weight raising automatically.
++ * @device_speed: device-speed class for the low-latency heuristic.
++ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions.
++ *
++ * All the fields are protected by the @queue lock.
++ */
++struct bfq_data {
++ struct request_queue *queue;
++
++ struct bfq_group *root_group;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ int active_numerous_groups;
++#endif
++
++ struct rb_root queue_weights_tree;
++ struct rb_root group_weights_tree;
++
++ int busy_queues;
++ int busy_in_flight_queues;
++ int const_seeky_busy_in_flight_queues;
++ int wr_busy_queues;
++ int queued;
++ int rq_in_driver;
++ int sync_flight;
++
++ int max_rq_in_driver;
++ int hw_tag_samples;
++ int hw_tag;
++
++ int budgets_assigned;
++
++ struct timer_list idle_slice_timer;
++ struct work_struct unplug_work;
++
++ struct bfq_queue *in_service_queue;
++ struct bfq_io_cq *in_service_bic;
++
++ sector_t last_position;
++
++ ktime_t last_budget_start;
++ ktime_t last_idling_start;
++ int peak_rate_samples;
++ u64 peak_rate;
++ int bfq_max_budget;
++
++ struct list_head active_list;
++ struct list_head idle_list;
++
++ unsigned int bfq_fifo_expire[2];
++ unsigned int bfq_back_penalty;
++ unsigned int bfq_back_max;
++ unsigned int bfq_slice_idle;
++ u64 bfq_class_idle_last_service;
++
++ int bfq_user_max_budget;
++ int bfq_max_budget_async_rq;
++ unsigned int bfq_timeout[2];
++
++ unsigned int bfq_coop_thresh;
++ unsigned int bfq_failed_cooperations;
++ unsigned int bfq_requests_within_timer;
++
++ unsigned long last_ins_in_burst;
++ unsigned long bfq_burst_interval;
++ int burst_size;
++ unsigned long bfq_large_burst_thresh;
++ bool large_burst;
++ struct hlist_head burst_list;
++
++ bool low_latency;
++
++ /* parameters of the low_latency heuristics */
++ unsigned int bfq_wr_coeff;
++ unsigned int bfq_wr_max_time;
++ unsigned int bfq_wr_rt_max_time;
++ unsigned int bfq_wr_min_idle_time;
++ unsigned long bfq_wr_min_inter_arr_async;
++ unsigned int bfq_wr_max_softrt_rate;
++ u64 RT_prod;
++ enum bfq_device_speed device_speed;
++
++ struct bfq_queue oom_bfqq;
++};
++
++enum bfqq_state_flags {
++ BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */
++ BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */
++ BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
++ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
++ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */
++ BFQ_BFQQ_FLAG_sync, /* synchronous queue */
++ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */
++ BFQ_BFQQ_FLAG_IO_bound, /*
++ * bfqq has timed-out at least once
++ * having consumed at most 2/10 of
++ * its budget
++ */
++ BFQ_BFQQ_FLAG_in_large_burst, /*
++ * bfqq activated in a large burst,
++ * see comments to bfq_handle_burst.
++ */
++ BFQ_BFQQ_FLAG_constantly_seeky, /*
++ * bfqq has proved to be slow and
++ * seeky until budget timeout
++ */
++ BFQ_BFQQ_FLAG_softrt_update, /*
++ * may need softrt-next-start
++ * update
++ */
++};
++
++#define BFQ_BFQQ_FNS(name) \
++static void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \
++{ \
++ (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \
++} \
++static void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \
++{ \
++ (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \
++} \
++static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
++{ \
++ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \
++}
++
++BFQ_BFQQ_FNS(busy);
++BFQ_BFQQ_FNS(wait_request);
++BFQ_BFQQ_FNS(must_alloc);
++BFQ_BFQQ_FNS(fifo_expire);
++BFQ_BFQQ_FNS(idle_window);
++BFQ_BFQQ_FNS(sync);
++BFQ_BFQQ_FNS(budget_new);
++BFQ_BFQQ_FNS(IO_bound);
++BFQ_BFQQ_FNS(in_large_burst);
++BFQ_BFQQ_FNS(constantly_seeky);
++BFQ_BFQQ_FNS(softrt_update);
++#undef BFQ_BFQQ_FNS
++
++/* Logging facilities. */
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
++ blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)
++
++#define bfq_log(bfqd, fmt, args...) \
++ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
++
++/* Expiration reasons. */
++enum bfqq_expiration {
++ BFQ_BFQQ_TOO_IDLE = 0, /*
++ * queue has been idling for
++ * too long
++ */
++ BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */
++ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */
++ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */
++};
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++
++struct bfqg_stats {
++ /* total bytes transferred */
++ struct blkg_rwstat service_bytes;
++ /* total IOs serviced, post merge */
++ struct blkg_rwstat serviced;
++ /* number of ios merged */
++ struct blkg_rwstat merged;
++ /* total time spent on device in ns, may not be accurate w/ queueing */
++ struct blkg_rwstat service_time;
++ /* total time spent waiting in scheduler queue in ns */
++ struct blkg_rwstat wait_time;
++ /* number of IOs queued up */
++ struct blkg_rwstat queued;
++ /* total sectors transferred */
++ struct blkg_stat sectors;
++ /* total disk time and nr sectors dispatched by this group */
++ struct blkg_stat time;
++ /* time not charged to this cgroup */
++ struct blkg_stat unaccounted_time;
++ /* sum of number of ios queued across all samples */
++ struct blkg_stat avg_queue_size_sum;
++ /* count of samples taken for average */
++ struct blkg_stat avg_queue_size_samples;
++ /* how many times this group has been removed from service tree */
++ struct blkg_stat dequeue;
++ /* total time spent waiting for it to be assigned a timeslice. */
++ struct blkg_stat group_wait_time;
++ /* time spent idling for this blkcg_gq */
++ struct blkg_stat idle_time;
++ /* total time with empty current active q with other requests queued */
++ struct blkg_stat empty_time;
++ /* fields after this shouldn't be cleared on stat reset */
++ uint64_t start_group_wait_time;
++ uint64_t start_idle_time;
++ uint64_t start_empty_time;
++ uint16_t flags;
++};
++
++/*
++ * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
++ *
++ * @ps: @blkcg_policy_storage that this structure inherits
++ * @weight: weight of the bfq_group
++ */
++struct bfq_group_data {
++ /* must be the first member */
++ struct blkcg_policy_data pd;
++
++ unsigned short weight;
++};
++
++/**
++ * struct bfq_group - per (device, cgroup) data structure.
++ * @entity: schedulable entity to insert into the parent group sched_data.
++ * @sched_data: own sched_data, to contain child entities (they may be
++ * both bfq_queues and bfq_groups).
++ * @bfqd: the bfq_data for the device this group acts upon.
++ * @async_bfqq: array of async queues for all the tasks belonging to
++ * the group, one queue per ioprio value per ioprio_class,
++ * except for the idle class that has only one queue.
++ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
++ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
++ * to avoid too many special cases during group creation/
++ * migration.
++ * @active_entities: number of active entities belonging to the group;
++ * unused for the root group. Used to know whether there
++ * are groups with more than one active @bfq_entity
++ * (see the comments to the function
++ * bfq_bfqq_must_not_expire()).
++ *
++ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
++ * there is a set of bfq_groups, each one collecting the lower-level
++ * entities belonging to the group that are acting on the same device.
++ *
++ * Locking works as follows:
++ * o @bfqd is protected by the queue lock, RCU is used to access it
++ * from the readers.
++ * o All the other fields are protected by the @bfqd queue lock.
++ */
++struct bfq_group {
++ /* must be the first member */
++ struct blkg_policy_data pd;
++
++ struct bfq_entity entity;
++ struct bfq_sched_data sched_data;
++
++ void *bfqd;
++
++ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
++ struct bfq_queue *async_idle_bfqq;
++
++ struct bfq_entity *my_entity;
++
++ int active_entities;
++
++ struct bfqg_stats stats;
++ struct bfqg_stats dead_stats; /* stats pushed from dead children */
++};
++
++#else
++struct bfq_group {
++ struct bfq_sched_data sched_data;
++
++ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
++ struct bfq_queue *async_idle_bfqq;
++};
++#endif
++
++static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
++
++static struct bfq_service_tree *
++bfq_entity_service_tree(struct bfq_entity *entity)
++{
++ struct bfq_sched_data *sched_data = entity->sched_data;
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
++ BFQ_DEFAULT_GRP_CLASS;
++
++ BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
++ BUG_ON(sched_data == NULL);
++
++ return sched_data->service_tree + idx;
++}
++
++static struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
++{
++ return bic->bfqq[is_sync];
++}
++
++static void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq,
++ bool is_sync)
++{
++ bic->bfqq[is_sync] = bfqq;
++}
++
++static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
++{
++ return bic->icq.q->elevator->elevator_data;
++}
++
++/**
++ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
++ * @ptr: a pointer to a bfqd.
++ * @flags: storage for the flags to be saved.
++ *
++ * This function allows bfqg->bfqd to be protected by the
++ * queue lock of the bfqd they reference; the pointer is dereferenced
++ * under RCU, so the storage for bfqd is assured to be safe as long
++ * as the RCU read side critical section does not end. After the
++ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
++ * sure that no other writer accessed it. If we raced with a writer,
++ * the function returns NULL, with the queue unlocked, otherwise it
++ * returns the dereferenced pointer, with the queue locked.
++ */
++static struct bfq_data *bfq_get_bfqd_locked(void **ptr, unsigned long *flags)
++{
++ struct bfq_data *bfqd;
++
++ rcu_read_lock();
++ bfqd = rcu_dereference(*(struct bfq_data **)ptr);
++
++ if (bfqd != NULL) {
++ spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
++ if (ptr == NULL)
++ printk(KERN_CRIT "get_bfqd_locked pointer NULL\n");
++ else if (*ptr == bfqd)
++ goto out;
++ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
++ }
++
++ bfqd = NULL;
++out:
++ rcu_read_unlock();
++ return bfqd;
++}
++
++static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags)
++{
++ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
++}
++
++static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
++static void bfq_put_queue(struct bfq_queue *bfqq);
++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
++ struct bio *bio, int is_sync,
++ struct bfq_io_cq *bic, gfp_t gfp_mask);
++static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
++ struct bfq_group *bfqg);
++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
++
++#endif /* _BFQ_H */
+--
+1.9.1
+
diff --git a/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch b/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch
new file mode 100644
index 0000000..eb23acc
--- /dev/null
+++ b/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch
@@ -0,0 +1,1101 @@
+From d93e55da4df8c5e7c33379780ad7d2fdb02e0568 Mon Sep 17 00:00:00 2001
+From: Mauro Andreolini <mauro.andreolini@unimore.it>
+Date: Sun, 6 Sep 2015 16:09:05 +0200
+Subject: [PATCH 3/4] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r11 for
+ 4.7.0
+
+A set of processes may happen to perform interleaved reads, i.e.,requests
+whose union would give rise to a sequential read pattern. There are two
+typical cases: in the first case, processes read fixed-size chunks of
+data at a fixed distance from each other, while in the second case processes
+may read variable-size chunks at variable distances. The latter case occurs
+for example with QEMU, which splits the I/O generated by the guest into
+multiple chunks, and lets these chunks be served by a pool of cooperating
+processes, iteratively assigning the next chunk of I/O to the first
+available process. CFQ uses actual queue merging for the first type of
+rocesses, whereas it uses preemption to get a sequential read pattern out
+of the read requests performed by the second type of processes. In the end
+it uses two different mechanisms to achieve the same goal: boosting the
+throughput with interleaved I/O.
+
+This patch introduces Early Queue Merge (EQM), a unified mechanism to get a
+sequential read pattern with both types of processes. The main idea is
+checking newly arrived requests against the next request of the active queue
+both in case of actual request insert and in case of request merge. By doing
+so, both the types of processes can be handled by just merging their queues.
+EQM is then simpler and more compact than the pair of mechanisms used in
+CFQ.
+
+Finally, EQM also preserves the typical low-latency properties of BFQ, by
+properly restoring the weight-raising state of a queue when it gets back to
+a non-merged state.
+
+Signed-off-by: Mauro Andreolini <mauro.andreolini@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini@google.com>
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+---
+ block/bfq-cgroup.c | 4 +
+ block/bfq-iosched.c | 687 ++++++++++++++++++++++++++++++++++++++++++++++++++--
+ block/bfq.h | 66 +++++
+ 3 files changed, 743 insertions(+), 14 deletions(-)
+
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index 8610cd6..5ee99ec 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -437,6 +437,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
+ */
+ bfqg->bfqd = bfqd;
+ bfqg->active_entities = 0;
++ bfqg->rq_pos_tree = RB_ROOT;
+ }
+
+ static void bfq_pd_free(struct blkg_policy_data *pd)
+@@ -530,6 +531,8 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+ return bfqg;
+ }
+
++static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
++
+ /**
+ * bfq_bfqq_move - migrate @bfqq to @bfqg.
+ * @bfqd: queue descriptor.
+@@ -577,6 +580,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ bfqg_get(bfqg);
+
+ if (busy) {
++ bfq_pos_tree_add_move(bfqd, bfqq);
+ if (resume)
+ bfq_activate_bfqq(bfqd, bfqq);
+ }
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index f9787a6..d1f648d 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -296,6 +296,72 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
+ }
+ }
+
++static struct bfq_queue *
++bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
++ sector_t sector, struct rb_node **ret_parent,
++ struct rb_node ***rb_link)
++{
++ struct rb_node **p, *parent;
++ struct bfq_queue *bfqq = NULL;
++
++ parent = NULL;
++ p = &root->rb_node;
++ while (*p) {
++ struct rb_node **n;
++
++ parent = *p;
++ bfqq = rb_entry(parent, struct bfq_queue, pos_node);
++
++ /*
++ * Sort strictly based on sector. Smallest to the left,
++ * largest to the right.
++ */
++ if (sector > blk_rq_pos(bfqq->next_rq))
++ n = &(*p)->rb_right;
++ else if (sector < blk_rq_pos(bfqq->next_rq))
++ n = &(*p)->rb_left;
++ else
++ break;
++ p = n;
++ bfqq = NULL;
++ }
++
++ *ret_parent = parent;
++ if (rb_link)
++ *rb_link = p;
++
++ bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
++ (long long unsigned)sector,
++ bfqq ? bfqq->pid : 0);
++
++ return bfqq;
++}
++
++static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++ struct rb_node **p, *parent;
++ struct bfq_queue *__bfqq;
++
++ if (bfqq->pos_root) {
++ rb_erase(&bfqq->pos_node, bfqq->pos_root);
++ bfqq->pos_root = NULL;
++ }
++
++ if (bfq_class_idle(bfqq))
++ return;
++ if (!bfqq->next_rq)
++ return;
++
++ bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
++ __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
++ blk_rq_pos(bfqq->next_rq), &parent, &p);
++ if (!__bfqq) {
++ rb_link_node(&bfqq->pos_node, parent, p);
++ rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
++ } else
++ bfqq->pos_root = NULL;
++}
++
+ /*
+ * Tell whether there are active queues or groups with differentiated weights.
+ */
+@@ -528,6 +594,57 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+ return dur;
+ }
+
++static unsigned bfq_bfqq_cooperations(struct bfq_queue *bfqq)
++{
++ return bfqq->bic ? bfqq->bic->cooperations : 0;
++}
++
++static void
++bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
++{
++ if (bic->saved_idle_window)
++ bfq_mark_bfqq_idle_window(bfqq);
++ else
++ bfq_clear_bfqq_idle_window(bfqq);
++ if (bic->saved_IO_bound)
++ bfq_mark_bfqq_IO_bound(bfqq);
++ else
++ bfq_clear_bfqq_IO_bound(bfqq);
++ /* Assuming that the flag in_large_burst is already correctly set */
++ if (bic->wr_time_left && bfqq->bfqd->low_latency &&
++ !bfq_bfqq_in_large_burst(bfqq) &&
++ bic->cooperations < bfqq->bfqd->bfq_coop_thresh) {
++ /*
++ * Start a weight raising period with the duration given by
++ * the raising_time_left snapshot.
++ */
++ if (bfq_bfqq_busy(bfqq))
++ bfqq->bfqd->wr_busy_queues++;
++ bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff;
++ bfqq->wr_cur_max_time = bic->wr_time_left;
++ bfqq->last_wr_start_finish = jiffies;
++ bfqq->entity.prio_changed = 1;
++ }
++ /*
++ * Clear wr_time_left to prevent bfq_bfqq_save_state() from
++ * getting confused about the queue's need of a weight-raising
++ * period.
++ */
++ bic->wr_time_left = 0;
++}
++
++static int bfqq_process_refs(struct bfq_queue *bfqq)
++{
++ int process_refs, io_refs;
++
++ lockdep_assert_held(bfqq->bfqd->queue->queue_lock);
++
++ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
++ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
++ BUG_ON(process_refs < 0);
++ return process_refs;
++}
++
+ /* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */
+ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+@@ -764,8 +881,14 @@ static void bfq_add_request(struct request *rq)
+ BUG_ON(!next_rq);
+ bfqq->next_rq = next_rq;
+
++ /*
++ * Adjust priority tree position, if next_rq changes.
++ */
++ if (prev != bfqq->next_rq)
++ bfq_pos_tree_add_move(bfqd, bfqq);
++
+ if (!bfq_bfqq_busy(bfqq)) {
+- bool soft_rt, in_burst,
++ bool soft_rt, coop_or_in_burst,
+ idle_for_long_time = time_is_before_jiffies(
+ bfqq->budget_timeout +
+ bfqd->bfq_wr_min_idle_time);
+@@ -793,11 +916,12 @@ static void bfq_add_request(struct request *rq)
+ bfqd->last_ins_in_burst = jiffies;
+ }
+
+- in_burst = bfq_bfqq_in_large_burst(bfqq);
++ coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) ||
++ bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh;
+ soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+- !in_burst &&
++ !coop_or_in_burst &&
+ time_is_before_jiffies(bfqq->soft_rt_next_start);
+- interactive = !in_burst && idle_for_long_time;
++ interactive = !coop_or_in_burst && idle_for_long_time;
+ entity->budget = max_t(unsigned long, bfqq->max_budget,
+ bfq_serv_to_charge(next_rq, bfqq));
+
+@@ -816,6 +940,9 @@ static void bfq_add_request(struct request *rq)
+ if (!bfqd->low_latency)
+ goto add_bfqq_busy;
+
++ if (bfq_bfqq_just_split(bfqq))
++ goto set_prio_changed;
++
+ /*
+ * If the queue:
+ * - is not being boosted,
+@@ -840,7 +967,7 @@ static void bfq_add_request(struct request *rq)
+ } else if (old_wr_coeff > 1) {
+ if (interactive)
+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+- else if (in_burst ||
++ else if (coop_or_in_burst ||
+ (bfqq->wr_cur_max_time ==
+ bfqd->bfq_wr_rt_max_time &&
+ !soft_rt)) {
+@@ -905,6 +1032,7 @@ static void bfq_add_request(struct request *rq)
+ bfqd->bfq_wr_rt_max_time;
+ }
+ }
++set_prio_changed:
+ if (old_wr_coeff != bfqq->wr_coeff)
+ entity->prio_changed = 1;
+ add_bfqq_busy:
+@@ -1047,6 +1175,15 @@ static void bfq_merged_request(struct request_queue *q, struct request *req,
+ bfqd->last_position);
+ BUG_ON(!next_rq);
+ bfqq->next_rq = next_rq;
++ /*
++ * If next_rq changes, update both the queue's budget to
++ * fit the new request and the queue's position in its
++ * rq_pos_tree.
++ */
++ if (prev != bfqq->next_rq) {
++ bfq_updated_next_req(bfqd, bfqq);
++ bfq_pos_tree_add_move(bfqd, bfqq);
++ }
+ }
+ }
+
+@@ -1129,11 +1266,346 @@ static void bfq_end_wr(struct bfq_data *bfqd)
+ spin_unlock_irq(bfqd->queue->queue_lock);
+ }
+
++static sector_t bfq_io_struct_pos(void *io_struct, bool request)
++{
++ if (request)
++ return blk_rq_pos(io_struct);
++ else
++ return ((struct bio *)io_struct)->bi_iter.bi_sector;
++}
++
++static int bfq_rq_close_to_sector(void *io_struct, bool request,
++ sector_t sector)
++{
++ return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
++ BFQQ_SEEK_THR;
++}
++
++static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ sector_t sector)
++{
++ struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
++ struct rb_node *parent, *node;
++ struct bfq_queue *__bfqq;
++
++ if (RB_EMPTY_ROOT(root))
++ return NULL;
++
++ /*
++ * First, if we find a request starting at the end of the last
++ * request, choose it.
++ */
++ __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
++ if (__bfqq)
++ return __bfqq;
++
++ /*
++ * If the exact sector wasn't found, the parent of the NULL leaf
++ * will contain the closest sector (rq_pos_tree sorted by
++ * next_request position).
++ */
++ __bfqq = rb_entry(parent, struct bfq_queue, pos_node);
++ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
++ return __bfqq;
++
++ if (blk_rq_pos(__bfqq->next_rq) < sector)
++ node = rb_next(&__bfqq->pos_node);
++ else
++ node = rb_prev(&__bfqq->pos_node);
++ if (!node)
++ return NULL;
++
++ __bfqq = rb_entry(node, struct bfq_queue, pos_node);
++ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
++ return __bfqq;
++
++ return NULL;
++}
++
++static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd,
++ struct bfq_queue *cur_bfqq,
++ sector_t sector)
++{
++ struct bfq_queue *bfqq;
++
++ /*
++ * We shall notice if some of the queues are cooperating,
++ * e.g., working closely on the same area of the device. In
++ * that case, we can group them together and: 1) don't waste
++ * time idling, and 2) serve the union of their requests in
++ * the best possible order for throughput.
++ */
++ bfqq = bfqq_find_close(bfqd, cur_bfqq, sector);
++ if (!bfqq || bfqq == cur_bfqq)
++ return NULL;
++
++ return bfqq;
++}
++
++static struct bfq_queue *
++bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
++{
++ int process_refs, new_process_refs;
++ struct bfq_queue *__bfqq;
++
++ /*
++ * If there are no process references on the new_bfqq, then it is
++ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
++ * may have dropped their last reference (not just their last process
++ * reference).
++ */
++ if (!bfqq_process_refs(new_bfqq))
++ return NULL;
++
++ /* Avoid a circular list and skip interim queue merges. */
++ while ((__bfqq = new_bfqq->new_bfqq)) {
++ if (__bfqq == bfqq)
++ return NULL;
++ new_bfqq = __bfqq;
++ }
++
++ process_refs = bfqq_process_refs(bfqq);
++ new_process_refs = bfqq_process_refs(new_bfqq);
++ /*
++ * If the process for the bfqq has gone away, there is no
++ * sense in merging the queues.
++ */
++ if (process_refs == 0 || new_process_refs == 0)
++ return NULL;
++
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
++ new_bfqq->pid);
++
++ /*
++ * Merging is just a redirection: the requests of the process
++ * owning one of the two queues are redirected to the other queue.
++ * The latter queue, in its turn, is set as shared if this is the
++ * first time that the requests of some process are redirected to
++ * it.
++ *
++ * We redirect bfqq to new_bfqq and not the opposite, because we
++ * are in the context of the process owning bfqq, hence we have
++ * the io_cq of this process. So we can immediately configure this
++ * io_cq to redirect the requests of the process to new_bfqq.
++ *
++ * NOTE, even if new_bfqq coincides with the in-service queue, the
++ * io_cq of new_bfqq is not available, because, if the in-service
++ * queue is shared, bfqd->in_service_bic may not point to the
++ * io_cq of the in-service queue.
++ * Redirecting the requests of the process owning bfqq to the
++ * currently in-service queue is in any case the best option, as
++ * we feed the in-service queue with new requests close to the
++ * last request served and, by doing so, hopefully increase the
++ * throughput.
++ */
++ bfqq->new_bfqq = new_bfqq;
++ atomic_add(process_refs, &new_bfqq->ref);
++ return new_bfqq;
++}
++
++static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
++ struct bfq_queue *new_bfqq)
++{
++ if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) ||
++ (bfqq->ioprio_class != new_bfqq->ioprio_class))
++ return false;
++
++ /*
++ * If either of the queues has already been detected as seeky,
++ * then merging it with the other queue is unlikely to lead to
++ * sequential I/O.
++ */
++ if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq))
++ return false;
++
++ /*
++ * Interleaved I/O is known to be done by (some) applications
++ * only for reads, so it does not make sense to merge async
++ * queues.
++ */
++ if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq))
++ return false;
++
++ return true;
++}
++
++/*
++ * Attempt to schedule a merge of bfqq with the currently in-service queue
++ * or with a close queue among the scheduled queues.
++ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
++ * structure otherwise.
++ *
++ * The OOM queue is not allowed to participate to cooperation: in fact, since
++ * the requests temporarily redirected to the OOM queue could be redirected
++ * again to dedicated queues at any time, the state needed to correctly
++ * handle merging with the OOM queue would be quite complex and expensive
++ * to maintain. Besides, in such a critical condition as an out of memory,
++ * the benefits of queue merging may be little relevant, or even negligible.
++ */
++static struct bfq_queue *
++bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ void *io_struct, bool request)
++{
++ struct bfq_queue *in_service_bfqq, *new_bfqq;
++
++ if (bfqq->new_bfqq)
++ return bfqq->new_bfqq;
++ if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
++ return NULL;
++ /* If device has only one backlogged bfq_queue, don't search. */
++ if (bfqd->busy_queues == 1)
++ return NULL;
++
++ in_service_bfqq = bfqd->in_service_queue;
++
++ if (!in_service_bfqq || in_service_bfqq == bfqq ||
++ !bfqd->in_service_bic ||
++ unlikely(in_service_bfqq == &bfqd->oom_bfqq))
++ goto check_scheduled;
++
++ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
++ bfqq->entity.parent == in_service_bfqq->entity.parent &&
++ bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) {
++ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
++ if (new_bfqq)
++ return new_bfqq;
++ }
++ /*
++ * Check whether there is a cooperator among currently scheduled
++ * queues. The only thing we need is that the bio/request is not
++ * NULL, as we need it to establish whether a cooperator exists.
++ */
++check_scheduled:
++ new_bfqq = bfq_find_close_cooperator(bfqd, bfqq,
++ bfq_io_struct_pos(io_struct, request));
++
++ BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent);
++
++ if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) &&
++ bfq_may_be_close_cooperator(bfqq, new_bfqq))
++ return bfq_setup_merge(bfqq, new_bfqq);
++
++ return NULL;
++}
++
++static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
++{
++ /*
++ * If !bfqq->bic, the queue is already shared or its requests
++ * have already been redirected to a shared queue; both idle window
++ * and weight raising state have already been saved. Do nothing.
++ */
++ if (!bfqq->bic)
++ return;
++ if (bfqq->bic->wr_time_left)
++ /*
++ * This is the queue of a just-started process, and would
++ * deserve weight raising: we set wr_time_left to the full
++ * weight-raising duration to trigger weight-raising when
++ * and if the queue is split and the first request of the
++ * queue is enqueued.
++ */
++ bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd);
++ else if (bfqq->wr_coeff > 1) {
++ unsigned long wr_duration =
++ jiffies - bfqq->last_wr_start_finish;
++ /*
++ * It may happen that a queue's weight raising period lasts
++ * longer than its wr_cur_max_time, as weight raising is
++ * handled only when a request is enqueued or dispatched (it
++ * does not use any timer). If the weight raising period is
++ * about to end, don't save it.
++ */
++ if (bfqq->wr_cur_max_time <= wr_duration)
++ bfqq->bic->wr_time_left = 0;
++ else
++ bfqq->bic->wr_time_left =
++ bfqq->wr_cur_max_time - wr_duration;
++ /*
++ * The bfq_queue is becoming shared or the requests of the
++ * process owning the queue are being redirected to a shared
++ * queue. Stop the weight raising period of the queue, as in
++ * both cases it should not be owned by an interactive or
++ * soft real-time application.
++ */
++ bfq_bfqq_end_wr(bfqq);
++ } else
++ bfqq->bic->wr_time_left = 0;
++ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
++ bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
++ bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
++ bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
++ bfqq->bic->cooperations++;
++ bfqq->bic->failed_cooperations = 0;
++}
++
++static void bfq_get_bic_reference(struct bfq_queue *bfqq)
++{
++ /*
++ * If bfqq->bic has a non-NULL value, the bic to which it belongs
++ * is about to begin using a shared bfq_queue.
++ */
++ if (bfqq->bic)
++ atomic_long_inc(&bfqq->bic->icq.ioc->refcount);
++}
++
++static void
++bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
++ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
++{
++ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
++ (long unsigned)new_bfqq->pid);
++ /* Save weight raising and idle window of the merged queues */
++ bfq_bfqq_save_state(bfqq);
++ bfq_bfqq_save_state(new_bfqq);
++ if (bfq_bfqq_IO_bound(bfqq))
++ bfq_mark_bfqq_IO_bound(new_bfqq);
++ bfq_clear_bfqq_IO_bound(bfqq);
++ /*
++ * Grab a reference to the bic, to prevent it from being destroyed
++ * before being possibly touched by a bfq_split_bfqq().
++ */
++ bfq_get_bic_reference(bfqq);
++ bfq_get_bic_reference(new_bfqq);
++ /*
++ * Merge queues (that is, let bic redirect its requests to new_bfqq)
++ */
++ bic_set_bfqq(bic, new_bfqq, 1);
++ bfq_mark_bfqq_coop(new_bfqq);
++ /*
++ * new_bfqq now belongs to at least two bics (it is a shared queue):
++ * set new_bfqq->bic to NULL. bfqq either:
++ * - does not belong to any bic any more, and hence bfqq->bic must
++ * be set to NULL, or
++ * - is a queue whose owning bics have already been redirected to a
++ * different queue, hence the queue is destined to not belong to
++ * any bic soon and bfqq->bic is already NULL (therefore the next
++ * assignment causes no harm).
++ */
++ new_bfqq->bic = NULL;
++ bfqq->bic = NULL;
++ bfq_put_queue(bfqq);
++}
++
++static void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
++{
++ struct bfq_io_cq *bic = bfqq->bic;
++ struct bfq_data *bfqd = bfqq->bfqd;
++
++ if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) {
++ bic->failed_cooperations++;
++ if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations)
++ bic->cooperations = 0;
++ }
++}
++
+ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+ struct bio *bio)
+ {
+ struct bfq_data *bfqd = q->elevator->elevator_data;
+ struct bfq_io_cq *bic;
++ struct bfq_queue *bfqq, *new_bfqq;
+
+ /*
+ * Disallow merge of a sync bio into an async request.
+@@ -1150,7 +1622,26 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+ if (!bic)
+ return 0;
+
+- return bic_to_bfqq(bic, bfq_bio_sync(bio)) == RQ_BFQQ(rq);
++ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
++ /*
++ * We take advantage of this function to perform an early merge
++ * of the queues of possible cooperating processes.
++ */
++ if (bfqq) {
++ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
++ if (new_bfqq) {
++ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
++ /*
++ * If we get here, the bio will be queued in the
++ * shared queue, i.e., new_bfqq, so use new_bfqq
++ * to decide whether bio and rq can be merged.
++ */
++ bfqq = new_bfqq;
++ } else
++ bfq_bfqq_increase_failed_cooperations(bfqq);
++ }
++
++ return bfqq == RQ_BFQQ(rq);
+ }
+
+ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+@@ -1349,6 +1840,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+
+ __bfq_bfqd_reset_in_service(bfqd);
+
++ /*
++ * If this bfqq is shared between multiple processes, check
++ * to make sure that those processes are still issuing I/Os
++ * within the mean seek distance. If not, it may be time to
++ * break the queues apart again.
++ */
++ if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
++ bfq_mark_bfqq_split_coop(bfqq);
++
+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+ /*
+ * Overloading budget_timeout field to store the time
+@@ -1357,8 +1857,13 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ */
+ bfqq->budget_timeout = jiffies;
+ bfq_del_bfqq_busy(bfqd, bfqq, 1);
+- } else
++ } else {
+ bfq_activate_bfqq(bfqd, bfqq);
++ /*
++ * Resort priority tree of potential close cooperators.
++ */
++ bfq_pos_tree_add_move(bfqd, bfqq);
++ }
+ }
+
+ /**
+@@ -2242,10 +2747,12 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ /*
+ * If the queue was activated in a burst, or
+ * too much time has elapsed from the beginning
+- * of this weight-raising period, then end weight
+- * raising.
++ * of this weight-raising period, or the queue has
++ * exceeded the acceptable number of cooperations,
++ * then end weight raising.
+ */
+ if (bfq_bfqq_in_large_burst(bfqq) ||
++ bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh ||
+ time_is_before_jiffies(bfqq->last_wr_start_finish +
+ bfqq->wr_cur_max_time)) {
+ bfqq->last_wr_start_finish = jiffies;
+@@ -2474,6 +2981,25 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+ #endif
+ }
+
++static void bfq_put_cooperator(struct bfq_queue *bfqq)
++{
++ struct bfq_queue *__bfqq, *next;
++
++ /*
++ * If this queue was scheduled to merge with another queue, be
++ * sure to drop the reference taken on that queue (and others in
++ * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
++ */
++ __bfqq = bfqq->new_bfqq;
++ while (__bfqq) {
++ if (__bfqq == bfqq)
++ break;
++ next = __bfqq->new_bfqq;
++ bfq_put_queue(__bfqq);
++ __bfqq = next;
++ }
++}
++
+ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ if (bfqq == bfqd->in_service_queue) {
+@@ -2484,6 +3010,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
+ atomic_read(&bfqq->ref));
+
++ bfq_put_cooperator(bfqq);
++
+ bfq_put_queue(bfqq);
+ }
+
+@@ -2492,6 +3020,25 @@ static void bfq_init_icq(struct io_cq *icq)
+ struct bfq_io_cq *bic = icq_to_bic(icq);
+
+ bic->ttime.last_end_request = jiffies;
++ /*
++ * A newly created bic indicates that the process has just
++ * started doing I/O, and is probably mapping into memory its
++ * executable and libraries: it definitely needs weight raising.
++ * There is however the possibility that the process performs,
++ * for a while, I/O close to some other process. EQM intercepts
++ * this behavior and may merge the queue corresponding to the
++ * process with some other queue, BEFORE the weight of the queue
++ * is raised. Merged queues are not weight-raised (they are assumed
++ * to belong to processes that benefit only from high throughput).
++ * If the merge is basically the consequence of an accident, then
++ * the queue will be split soon and will get back its old weight.
++ * It is then important to write down somewhere that this queue
++ * does need weight raising, even if it did not make it to get its
++ * weight raised before being merged. To this purpose, we overload
++ * the field raising_time_left and assign 1 to it, to mark the queue
++ * as needing weight raising.
++ */
++ bic->wr_time_left = 1;
+ }
+
+ static void bfq_exit_icq(struct io_cq *icq)
+@@ -2505,6 +3052,13 @@ static void bfq_exit_icq(struct io_cq *icq)
+ }
+
+ if (bic->bfqq[BLK_RW_SYNC]) {
++ /*
++ * If the bic is using a shared queue, put the reference
++ * taken on the io_context when the bic started using a
++ * shared bfq_queue.
++ */
++ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))
++ put_io_context(icq->ioc);
+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
+ bic->bfqq[BLK_RW_SYNC] = NULL;
+ }
+@@ -2809,6 +3363,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
+ return;
+
++ /* Idle window just restored, statistics are meaningless. */
++ if (bfq_bfqq_just_split(bfqq))
++ return;
++
+ enable_idle = bfq_bfqq_idle_window(bfqq);
+
+ if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
+@@ -2856,6 +3414,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
+ !BFQQ_SEEKY(bfqq))
+ bfq_update_idle_window(bfqd, bfqq, bic);
++ bfq_clear_bfqq_just_split(bfqq);
+
+ bfq_log_bfqq(bfqd, bfqq,
+ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
+@@ -2920,12 +3479,47 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+ {
+ struct bfq_data *bfqd = q->elevator->elevator_data;
+- struct bfq_queue *bfqq = RQ_BFQQ(rq);
++ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;
+
+ assert_spin_locked(bfqd->queue->queue_lock);
+
++ /*
++ * An unplug may trigger a requeue of a request from the device
++ * driver: make sure we are in process context while trying to
++ * merge two bfq_queues.
++ */
++ if (!in_interrupt()) {
++ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
++ if (new_bfqq) {
++ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
++ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
++ /*
++ * Release the request's reference to the old bfqq
++ * and make sure one is taken to the shared queue.
++ */
++ new_bfqq->allocated[rq_data_dir(rq)]++;
++ bfqq->allocated[rq_data_dir(rq)]--;
++ atomic_inc(&new_bfqq->ref);
++ bfq_put_queue(bfqq);
++ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
++ bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
++ bfqq, new_bfqq);
++ rq->elv.priv[1] = new_bfqq;
++ bfqq = new_bfqq;
++ } else
++ bfq_bfqq_increase_failed_cooperations(bfqq);
++ }
++
+ bfq_add_request(rq);
+
++ /*
++ * Here a newly-created bfq_queue has already started a weight-raising
++ * period: clear raising_time_left to prevent bfq_bfqq_save_state()
++ * from assigning it a full weight-raising period. See the detailed
++ * comments about this field in bfq_init_icq().
++ */
++ if (bfqq->bic)
++ bfqq->bic->wr_time_left = 0;
+ rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+ list_add_tail(&rq->queuelist, &bfqq->fifo);
+
+@@ -3094,6 +3688,32 @@ static void bfq_put_request(struct request *rq)
+ }
+
+ /*
++ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
++ * was the last process referring to said bfqq.
++ */
++static struct bfq_queue *
++bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
++{
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
++
++ put_io_context(bic->icq.ioc);
++
++ if (bfqq_process_refs(bfqq) == 1) {
++ bfqq->pid = current->pid;
++ bfq_clear_bfqq_coop(bfqq);
++ bfq_clear_bfqq_split_coop(bfqq);
++ return bfqq;
++ }
++
++ bic_set_bfqq(bic, NULL, 1);
++
++ bfq_put_cooperator(bfqq);
++
++ bfq_put_queue(bfqq);
++ return NULL;
++}
++
++/*
+ * Allocate bfq data structures associated with this request.
+ */
+ static int bfq_set_request(struct request_queue *q, struct request *rq,
+@@ -3105,6 +3725,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ const int is_sync = rq_is_sync(rq);
+ struct bfq_queue *bfqq;
+ unsigned long flags;
++ bool split = false;
+
+ might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+
+@@ -3117,15 +3738,30 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+
+ bfq_bic_update_cgroup(bic, bio);
+
++new_queue:
+ bfqq = bic_to_bfqq(bic, is_sync);
+ if (!bfqq || bfqq == &bfqd->oom_bfqq) {
+ bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask);
+ bic_set_bfqq(bic, bfqq, is_sync);
+- if (is_sync) {
+- if (bfqd->large_burst)
++ if (split && is_sync) {
++ if ((bic->was_in_burst_list && bfqd->large_burst) ||
++ bic->saved_in_large_burst)
+ bfq_mark_bfqq_in_large_burst(bfqq);
+- else
+- bfq_clear_bfqq_in_large_burst(bfqq);
++ else {
++ bfq_clear_bfqq_in_large_burst(bfqq);
++ if (bic->was_in_burst_list)
++ hlist_add_head(&bfqq->burst_list_node,
++ &bfqd->burst_list);
++ }
++ }
++ } else {
++ /* If the queue was seeky for too long, break it apart. */
++ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
++ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
++ bfqq = bfq_split_bfqq(bic, bfqq);
++ split = true;
++ if (!bfqq)
++ goto new_queue;
+ }
+ }
+
+@@ -3137,6 +3773,26 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ rq->elv.priv[0] = bic;
+ rq->elv.priv[1] = bfqq;
+
++ /*
++ * If a bfq_queue has only one process reference, it is owned
++ * by only one bfq_io_cq: we can set the bic field of the
++ * bfq_queue to the address of that structure. Also, if the
++ * queue has just been split, mark a flag so that the
++ * information is available to the other scheduler hooks.
++ */
++ if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
++ bfqq->bic = bic;
++ if (split) {
++ bfq_mark_bfqq_just_split(bfqq);
++ /*
++ * If the queue has just been split from a shared
++ * queue, restore the idle window and the possible
++ * weight raising period.
++ */
++ bfq_bfqq_resume_state(bfqq, bic);
++ }
++ }
++
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return 0;
+@@ -3290,6 +3946,7 @@ static void bfq_init_root_group(struct bfq_group *root_group,
+ root_group->my_entity = NULL;
+ root_group->bfqd = bfqd;
+ #endif
++ root_group->rq_pos_tree = RB_ROOT;
+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+ root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+ }
+@@ -3370,6 +4027,8 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
+ bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
+
++ bfqd->bfq_coop_thresh = 2;
++ bfqd->bfq_failed_cooperations = 7000;
+ bfqd->bfq_requests_within_timer = 120;
+
+ bfqd->bfq_large_burst_thresh = 11;
+diff --git a/block/bfq.h b/block/bfq.h
+index 485d0c9..f73c942 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -183,6 +183,8 @@ struct bfq_group;
+ * ioprio_class value.
+ * @new_bfqq: shared bfq_queue if queue is cooperating with
+ * one or more other queues.
++ * @pos_node: request-position tree member (see bfq_group's @rq_pos_tree).
++ * @pos_root: request-position tree root (see bfq_group's @rq_pos_tree).
+ * @sort_list: sorted list of pending requests.
+ * @next_rq: if fifo isn't expired, next request to serve.
+ * @queued: nr of requests queued in @sort_list.
+@@ -304,6 +306,26 @@ struct bfq_ttime {
+ * @ttime: associated @bfq_ttime struct
+ * @ioprio: per (request_queue, blkcg) ioprio.
+ * @blkcg_id: id of the blkcg the related io_cq belongs to.
++ * @wr_time_left: snapshot of the time left before weight raising ends
++ * for the sync queue associated to this process; this
++ * snapshot is taken to remember this value while the weight
++ * raising is suspended because the queue is merged with a
++ * shared queue, and is used to set @raising_cur_max_time
++ * when the queue is split from the shared queue and its
++ * weight is raised again
++ * @saved_idle_window: same purpose as the previous field for the idle
++ * window
++ * @saved_IO_bound: same purpose as the previous two fields for the I/O
++ * bound classification of a queue
++ * @saved_in_large_burst: same purpose as the previous fields for the
++ * value of the field keeping the queue's belonging
++ * to a large burst
++ * @was_in_burst_list: true if the queue belonged to a burst list
++ * before its merge with another cooperating queue
++ * @cooperations: counter of consecutive successful queue merges underwent
++ * by any of the process' @bfq_queues
++ * @failed_cooperations: counter of consecutive failed queue merges of any
++ * of the process' @bfq_queues
+ */
+ struct bfq_io_cq {
+ struct io_cq icq; /* must be the first member */
+@@ -314,6 +336,16 @@ struct bfq_io_cq {
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ uint64_t blkcg_id; /* the current blkcg ID */
+ #endif
++
++ unsigned int wr_time_left;
++ bool saved_idle_window;
++ bool saved_IO_bound;
++
++ bool saved_in_large_burst;
++ bool was_in_burst_list;
++
++ unsigned int cooperations;
++ unsigned int failed_cooperations;
+ };
+
+ enum bfq_device_speed {
+@@ -557,6 +589,9 @@ enum bfqq_state_flags {
+ * may need softrt-next-start
+ * update
+ */
++ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */
++ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */
++ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */
+ };
+
+ #define BFQ_BFQQ_FNS(name) \
+@@ -583,6 +618,9 @@ BFQ_BFQQ_FNS(budget_new);
+ BFQ_BFQQ_FNS(IO_bound);
+ BFQ_BFQQ_FNS(in_large_burst);
+ BFQ_BFQQ_FNS(constantly_seeky);
++BFQ_BFQQ_FNS(coop);
++BFQ_BFQQ_FNS(split_coop);
++BFQ_BFQQ_FNS(just_split);
+ BFQ_BFQQ_FNS(softrt_update);
+ #undef BFQ_BFQQ_FNS
+
+@@ -675,6 +713,9 @@ struct bfq_group_data {
+ * are groups with more than one active @bfq_entity
+ * (see the comments to the function
+ * bfq_bfqq_must_not_expire()).
++ * @rq_pos_tree: rbtree sorted by next_request position, used when
++ * determining if two or more queues have interleaving
++ * requests (see bfq_find_close_cooperator()).
+ *
+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
+ * there is a set of bfq_groups, each one collecting the lower-level
+@@ -701,6 +742,8 @@ struct bfq_group {
+
+ int active_entities;
+
++ struct rb_root rq_pos_tree;
++
+ struct bfqg_stats stats;
+ struct bfqg_stats dead_stats; /* stats pushed from dead children */
+ };
+@@ -711,6 +754,8 @@ struct bfq_group {
+
+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+ struct bfq_queue *async_idle_bfqq;
++
++ struct rb_root rq_pos_tree;
+ };
+ #endif
+
+@@ -787,6 +832,27 @@ static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags)
+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
+ }
+
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++
++static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
++{
++ struct bfq_entity *group_entity = bfqq->entity.parent;
++
++ if (!group_entity)
++ group_entity = &bfqq->bfqd->root_group->entity;
++
++ return container_of(group_entity, struct bfq_group, entity);
++}
++
++#else
++
++static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
++{
++ return bfqq->bfqd->root_group;
++}
++
++#endif
++
+ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
+ static void bfq_put_queue(struct bfq_queue *bfqq);
+ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
+--
+1.9.1
+
diff --git a/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1 b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1
new file mode 100644
index 0000000..372f093
--- /dev/null
+++ b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1
@@ -0,0 +1,6361 @@
+From 21d90fdc7488cd7c28f47b5ba759e62c697c0382 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Tue, 17 May 2016 08:28:04 +0200
+Subject: [PATCH 4/4] block, bfq: turn BFQ-v7r11 for 4.7.0 into BFQ-v8 for
+ 4.7.0
+
+---
+ block/Kconfig.iosched | 2 +-
+ block/bfq-cgroup.c | 448 +++++----
+ block/bfq-iosched.c | 2581 +++++++++++++++++++++++++++++--------------------
+ block/bfq-sched.c | 432 +++++++--
+ block/bfq.h | 697 +++++++------
+ 5 files changed, 2433 insertions(+), 1727 deletions(-)
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index f78cd1a..6d92579 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -53,7 +53,7 @@ config IOSCHED_BFQ
+
+ config BFQ_GROUP_IOSCHED
+ bool "BFQ hierarchical scheduling support"
+- depends on CGROUPS && IOSCHED_BFQ=y
++ depends on IOSCHED_BFQ && BLK_CGROUP
+ default n
+ ---help---
+ Enable hierarchical scheduling in BFQ, using the blkio controller.
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index 5ee99ec..bc01663 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -162,7 +162,6 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
+ static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
+ {
+ struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq);
+- BUG_ON(!pd);
+ return pd_to_bfqg(pd);
+ }
+
+@@ -224,14 +223,6 @@ static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw)
+ blkg_rwstat_add(&bfqg->stats.merged, rw, 1);
+ }
+
+-static void bfqg_stats_update_dispatch(struct bfq_group *bfqg,
+- uint64_t bytes, int rw)
+-{
+- blkg_stat_add(&bfqg->stats.sectors, bytes >> 9);
+- blkg_rwstat_add(&bfqg->stats.serviced, rw, 1);
+- blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes);
+-}
+-
+ static void bfqg_stats_update_completion(struct bfq_group *bfqg,
+ uint64_t start_time, uint64_t io_start_time, int rw)
+ {
+@@ -248,17 +239,11 @@ static void bfqg_stats_update_completion(struct bfq_group *bfqg,
+ /* @stats = 0 */
+ static void bfqg_stats_reset(struct bfqg_stats *stats)
+ {
+- if (!stats)
+- return;
+-
+ /* queued stats shouldn't be cleared */
+- blkg_rwstat_reset(&stats->service_bytes);
+- blkg_rwstat_reset(&stats->serviced);
+ blkg_rwstat_reset(&stats->merged);
+ blkg_rwstat_reset(&stats->service_time);
+ blkg_rwstat_reset(&stats->wait_time);
+ blkg_stat_reset(&stats->time);
+- blkg_stat_reset(&stats->unaccounted_time);
+ blkg_stat_reset(&stats->avg_queue_size_sum);
+ blkg_stat_reset(&stats->avg_queue_size_samples);
+ blkg_stat_reset(&stats->dequeue);
+@@ -268,21 +253,19 @@ static void bfqg_stats_reset(struct bfqg_stats *stats)
+ }
+
+ /* @to += @from */
+-static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from)
++static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
+ {
+ if (!to || !from)
+ return;
+
+ /* queued stats shouldn't be cleared */
+- blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes);
+- blkg_rwstat_add_aux(&to->serviced, &from->serviced);
+ blkg_rwstat_add_aux(&to->merged, &from->merged);
+ blkg_rwstat_add_aux(&to->service_time, &from->service_time);
+ blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
+ blkg_stat_add_aux(&from->time, &from->time);
+- blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
+ blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
+- blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
++ blkg_stat_add_aux(&to->avg_queue_size_samples,
++ &from->avg_queue_size_samples);
+ blkg_stat_add_aux(&to->dequeue, &from->dequeue);
+ blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
+ blkg_stat_add_aux(&to->idle_time, &from->idle_time);
+@@ -308,10 +291,8 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
+ if (unlikely(!parent))
+ return;
+
+- bfqg_stats_merge(&parent->dead_stats, &bfqg->stats);
+- bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats);
++ bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
+ bfqg_stats_reset(&bfqg->stats);
+- bfqg_stats_reset(&bfqg->dead_stats);
+ }
+
+ static void bfq_init_entity(struct bfq_entity *entity,
+@@ -332,15 +313,11 @@ static void bfq_init_entity(struct bfq_entity *entity,
+
+ static void bfqg_stats_exit(struct bfqg_stats *stats)
+ {
+- blkg_rwstat_exit(&stats->service_bytes);
+- blkg_rwstat_exit(&stats->serviced);
+ blkg_rwstat_exit(&stats->merged);
+ blkg_rwstat_exit(&stats->service_time);
+ blkg_rwstat_exit(&stats->wait_time);
+ blkg_rwstat_exit(&stats->queued);
+- blkg_stat_exit(&stats->sectors);
+ blkg_stat_exit(&stats->time);
+- blkg_stat_exit(&stats->unaccounted_time);
+ blkg_stat_exit(&stats->avg_queue_size_sum);
+ blkg_stat_exit(&stats->avg_queue_size_samples);
+ blkg_stat_exit(&stats->dequeue);
+@@ -351,15 +328,11 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
+
+ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
+ {
+- if (blkg_rwstat_init(&stats->service_bytes, gfp) ||
+- blkg_rwstat_init(&stats->serviced, gfp) ||
+- blkg_rwstat_init(&stats->merged, gfp) ||
++ if (blkg_rwstat_init(&stats->merged, gfp) ||
+ blkg_rwstat_init(&stats->service_time, gfp) ||
+ blkg_rwstat_init(&stats->wait_time, gfp) ||
+ blkg_rwstat_init(&stats->queued, gfp) ||
+- blkg_stat_init(&stats->sectors, gfp) ||
+ blkg_stat_init(&stats->time, gfp) ||
+- blkg_stat_init(&stats->unaccounted_time, gfp) ||
+ blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
+ blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
+ blkg_stat_init(&stats->dequeue, gfp) ||
+@@ -374,20 +347,36 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
+ }
+
+ static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
+- {
++{
+ return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
+- }
++}
+
+ static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
+ {
+ return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
+ }
+
++static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
++{
++ struct bfq_group_data *bgd;
++
++ bgd = kzalloc(sizeof(*bgd), GFP_KERNEL);
++ if (!bgd)
++ return NULL;
++ return &bgd->pd;
++}
++
+ static void bfq_cpd_init(struct blkcg_policy_data *cpd)
+ {
+ struct bfq_group_data *d = cpd_to_bfqgd(cpd);
+
+- d->weight = BFQ_DEFAULT_GRP_WEIGHT;
++ d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
++ CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
++}
++
++static void bfq_cpd_free(struct blkcg_policy_data *cpd)
++{
++ kfree(cpd_to_bfqgd(cpd));
+ }
+
+ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+@@ -398,8 +387,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+ if (!bfqg)
+ return NULL;
+
+- if (bfqg_stats_init(&bfqg->stats, gfp) ||
+- bfqg_stats_init(&bfqg->dead_stats, gfp)) {
++ if (bfqg_stats_init(&bfqg->stats, gfp)) {
+ kfree(bfqg);
+ return NULL;
+ }
+@@ -407,27 +395,20 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+ return &bfqg->pd;
+ }
+
+-static void bfq_group_set_parent(struct bfq_group *bfqg,
+- struct bfq_group *parent)
++static void bfq_pd_init(struct blkg_policy_data *pd)
+ {
++ struct blkcg_gq *blkg;
++ struct bfq_group *bfqg;
++ struct bfq_data *bfqd;
+ struct bfq_entity *entity;
++ struct bfq_group_data *d;
+
+- BUG_ON(!parent);
+- BUG_ON(!bfqg);
+- BUG_ON(bfqg == parent);
+-
++ blkg = pd_to_blkg(pd);
++ BUG_ON(!blkg);
++ bfqg = blkg_to_bfqg(blkg);
++ bfqd = blkg->q->elevator->elevator_data;
+ entity = &bfqg->entity;
+- entity->parent = parent->my_entity;
+- entity->sched_data = &parent->sched_data;
+-}
+-
+-static void bfq_pd_init(struct blkg_policy_data *pd)
+-{
+- struct blkcg_gq *blkg = pd_to_blkg(pd);
+- struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+- struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
+- struct bfq_entity *entity = &bfqg->entity;
+- struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
++ d = blkcg_to_bfqgd(blkg->blkcg);
+
+ entity->orig_weight = entity->weight = entity->new_weight = d->weight;
+ entity->my_sched_data = &bfqg->sched_data;
+@@ -445,45 +426,28 @@ static void bfq_pd_free(struct blkg_policy_data *pd)
+ struct bfq_group *bfqg = pd_to_bfqg(pd);
+
+ bfqg_stats_exit(&bfqg->stats);
+- bfqg_stats_exit(&bfqg->dead_stats);
+-
+ return kfree(bfqg);
+ }
+
+-/* offset delta from bfqg->stats to bfqg->dead_stats */
+-static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) -
+- offsetof(struct bfq_group, stats);
+-
+-/* to be used by recursive prfill, sums live and dead stats recursively */
+-static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
++static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
+ {
+- u64 sum = 0;
++ struct bfq_group *bfqg = pd_to_bfqg(pd);
+
+- sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
+- sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
+- off + dead_stats_off_delta);
+- return sum;
++ bfqg_stats_reset(&bfqg->stats);
+ }
+
+-/* to be used by recursive prfill, sums live and dead rwstats recursively */
+-static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
+- int off)
++static void bfq_group_set_parent(struct bfq_group *bfqg,
++ struct bfq_group *parent)
+ {
+- struct blkg_rwstat a, b;
+-
+- a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
+- b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
+- off + dead_stats_off_delta);
+- blkg_rwstat_add_aux(&a, &b);
+- return a;
+-}
++ struct bfq_entity *entity;
+
+-static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
+-{
+- struct bfq_group *bfqg = pd_to_bfqg(pd);
++ BUG_ON(!parent);
++ BUG_ON(!bfqg);
++ BUG_ON(bfqg == parent);
+
+- bfqg_stats_reset(&bfqg->stats);
+- bfqg_stats_reset(&bfqg->dead_stats);
++ entity = &bfqg->entity;
++ entity->parent = parent->my_entity;
++ entity->sched_data = &parent->sched_data;
+ }
+
+ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+@@ -531,13 +495,18 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+ return bfqg;
+ }
+
+-static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
++static void bfq_pos_tree_add_move(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq);
++
++static void bfq_bfqq_expire(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ bool compensate,
++ enum bfqq_expiration reason);
+
+ /**
+ * bfq_bfqq_move - migrate @bfqq to @bfqg.
+ * @bfqd: queue descriptor.
+ * @bfqq: the queue to move.
+- * @entity: @bfqq's entity.
+ * @bfqg: the group to move to.
+ *
+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
+@@ -548,26 +517,40 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ * rcu_read_lock()).
+ */
+ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+- struct bfq_entity *entity, struct bfq_group *bfqg)
++ struct bfq_group *bfqg)
+ {
+- int busy, resume;
++ struct bfq_entity *entity = &bfqq->entity;
+
+- busy = bfq_bfqq_busy(bfqq);
+- resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
+-
+- BUG_ON(resume && !entity->on_st);
+- BUG_ON(busy && !resume && entity->on_st &&
++ BUG_ON(!bfq_bfqq_busy(bfqq) && !RB_EMPTY_ROOT(&bfqq->sort_list));
++ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list) && !entity->on_st);
++ BUG_ON(bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list)
++ && entity->on_st &&
+ bfqq != bfqd->in_service_queue);
++ BUG_ON(!bfq_bfqq_busy(bfqq) && bfqq == bfqd->in_service_queue);
++
++ /* If bfqq is empty, then bfq_bfqq_expire also invokes
++ * bfq_del_bfqq_busy, thereby removing bfqq and its entity
++ * from data structures related to current group. Otherwise we
++ * need to remove bfqq explicitly with bfq_deactivate_bfqq, as
++ * we do below.
++ */
++ if (bfqq == bfqd->in_service_queue)
++ bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
++ false, BFQ_BFQQ_PREEMPTED);
++
++ BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq)
++ && &bfq_entity_service_tree(entity)->idle !=
++ entity->tree);
+
+- if (busy) {
+- BUG_ON(atomic_read(&bfqq->ref) < 2);
++ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
+
+- if (!resume)
+- bfq_del_bfqq_busy(bfqd, bfqq, 0);
+- else
+- bfq_deactivate_bfqq(bfqd, bfqq, 0);
+- } else if (entity->on_st)
++ if (bfq_bfqq_busy(bfqq))
++ bfq_deactivate_bfqq(bfqd, bfqq, 0);
++ else if (entity->on_st) {
++ BUG_ON(&bfq_entity_service_tree(entity)->idle !=
++ entity->tree);
+ bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
++ }
+ bfqg_put(bfqq_group(bfqq));
+
+ /*
+@@ -579,14 +562,17 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ entity->sched_data = &bfqg->sched_data;
+ bfqg_get(bfqg);
+
+- if (busy) {
++ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
++ if (bfq_bfqq_busy(bfqq)) {
+ bfq_pos_tree_add_move(bfqd, bfqq);
+- if (resume)
+- bfq_activate_bfqq(bfqd, bfqq);
++ bfq_activate_bfqq(bfqd, bfqq);
+ }
+
+ if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
+ bfq_schedule_dispatch(bfqd);
++ BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq)
++ && &bfq_entity_service_tree(entity)->idle !=
++ entity->tree);
+ }
+
+ /**
+@@ -621,7 +607,8 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ bic_set_bfqq(bic, NULL, 0);
+ bfq_log_bfqq(bfqd, async_bfqq,
+ "bic_change_group: %p %d",
+- async_bfqq, atomic_read(&async_bfqq->ref));
++ async_bfqq,
++ async_bfqq->ref);
+ bfq_put_queue(async_bfqq);
+ }
+ }
+@@ -629,7 +616,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ if (sync_bfqq) {
+ entity = &sync_bfqq->entity;
+ if (entity->sched_data != &bfqg->sched_data)
+- bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
++ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ }
+
+ return bfqg;
+@@ -638,25 +625,23 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+ {
+ struct bfq_data *bfqd = bic_to_bfqd(bic);
+- struct blkcg *blkcg;
+ struct bfq_group *bfqg = NULL;
+- uint64_t id;
++ uint64_t serial_nr;
+
+ rcu_read_lock();
+- blkcg = bio_blkcg(bio);
+- id = blkcg->css.serial_nr;
+- rcu_read_unlock();
++ serial_nr = bio_blkcg(bio)->css.serial_nr;
+
+ /*
+ * Check whether blkcg has changed. The condition may trigger
+ * spuriously on a newly created cic but there's no harm.
+ */
+- if (unlikely(!bfqd) || likely(bic->blkcg_id == id))
+- return;
++ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
++ goto out;
+
+- bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg);
+- BUG_ON(!bfqg);
+- bic->blkcg_id = id;
++ bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
++ bic->blkcg_serial_nr = serial_nr;
++out:
++ rcu_read_unlock();
+ }
+
+ /**
+@@ -682,8 +667,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+ BUG_ON(!bfqq);
+- bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
+- return;
++ bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
+ }
+
+ /**
+@@ -711,16 +695,15 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd,
+ if (bfqg->sched_data.in_service_entity)
+ bfq_reparent_leaf_entity(bfqd,
+ bfqg->sched_data.in_service_entity);
+-
+- return;
+ }
+
+ /**
+- * bfq_destroy_group - destroy @bfqg.
+- * @bfqg: the group being destroyed.
++ * bfq_pd_offline - deactivate the entity associated with @pd,
++ * and reparent its children entities.
++ * @pd: descriptor of the policy going offline.
+ *
+- * Destroy @bfqg, making sure that it is not referenced from its parent.
+- * blkio already grabs the queue_lock for us, so no need to use RCU-based magic
++ * blkio already grabs the queue_lock for us, so no need to use
++ * RCU-based magic
+ */
+ static void bfq_pd_offline(struct blkg_policy_data *pd)
+ {
+@@ -779,6 +762,12 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
+ bfq_put_async_queues(bfqd, bfqg);
+ BUG_ON(entity->tree);
+
++ /*
++ * @blkg is going offline and will be ignored by
++ * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so
++ * that they don't get lost. If IOs complete after this point, the
++ * stats for them will be lost. Oh well...
++ */
+ bfqg_stats_xfer_dead(bfqg);
+ }
+
+@@ -788,46 +777,35 @@ static void bfq_end_wr_async(struct bfq_data *bfqd)
+
+ list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
+ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++ BUG_ON(!bfqg);
+
+ bfq_end_wr_async_queues(bfqd, bfqg);
+ }
+ bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+ }
+
+-static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css,
+- struct cftype *cftype)
+-{
+- struct blkcg *blkcg = css_to_blkcg(css);
+- struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+- int ret = -EINVAL;
+-
+- spin_lock_irq(&blkcg->lock);
+- ret = bfqgd->weight;
+- spin_unlock_irq(&blkcg->lock);
+-
+- return ret;
+-}
+-
+-static int bfqio_cgroup_weight_read_dfl(struct seq_file *sf, void *v)
++static int bfq_io_show_weight(struct seq_file *sf, void *v)
+ {
+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
++ unsigned int val = 0;
+
+- spin_lock_irq(&blkcg->lock);
+- seq_printf(sf, "%u\n", bfqgd->weight);
+- spin_unlock_irq(&blkcg->lock);
++ if (bfqgd)
++ val = bfqgd->weight;
++
++ seq_printf(sf, "%u\n", val);
+
+ return 0;
+ }
+
+-static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
+- struct cftype *cftype,
+- u64 val)
++static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
++ struct cftype *cftype,
++ u64 val)
+ {
+ struct blkcg *blkcg = css_to_blkcg(css);
+ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+ struct blkcg_gq *blkg;
+- int ret = -EINVAL;
++ int ret = -ERANGE;
+
+ if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
+ return ret;
+@@ -837,6 +815,7 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
+ bfqgd->weight = (unsigned short)val;
+ hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++
+ if (!bfqg)
+ continue;
+ /*
+@@ -871,13 +850,18 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
+ return ret;
+ }
+
+-static ssize_t bfqio_cgroup_weight_write_dfl(struct kernfs_open_file *of,
+- char *buf, size_t nbytes,
+- loff_t off)
++static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
++ char *buf, size_t nbytes,
++ loff_t off)
+ {
++ u64 weight;
+ /* First unsigned long found in the file is used */
+- return bfqio_cgroup_weight_write(of_css(of), NULL,
+- simple_strtoull(strim(buf), NULL, 0));
++ int ret = kstrtoull(strim(buf), 0, &weight);
++
++ if (ret)
++ return ret;
++
++ return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
+ }
+
+ static int bfqg_print_stat(struct seq_file *sf, void *v)
+@@ -897,16 +881,17 @@ static int bfqg_print_rwstat(struct seq_file *sf, void *v)
+ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+ {
+- u64 sum = bfqg_stat_pd_recursive_sum(pd, off);
+-
++ u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
++ &blkcg_policy_bfq, off);
+ return __blkg_prfill_u64(sf, pd, sum);
+ }
+
+ static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+ {
+- struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off);
+-
++ struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
++ &blkcg_policy_bfq,
++ off);
+ return __blkg_prfill_rwstat(sf, pd, &sum);
+ }
+
+@@ -926,6 +911,41 @@ static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
+ return 0;
+ }
+
++static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
++ int off)
++{
++ u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
++
++ return __blkg_prfill_u64(sf, pd, sum >> 9);
++}
++
++static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
++{
++ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++ bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
++ return 0;
++}
++
++static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
++ struct blkg_policy_data *pd, int off)
++{
++ struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
++ offsetof(struct blkcg_gq, stat_bytes));
++ u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
++ atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
++
++ return __blkg_prfill_u64(sf, pd, sum >> 9);
++}
++
++static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
++{
++ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++ bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
++ false);
++ return 0;
++}
++
++
+ static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+ {
+@@ -950,7 +970,8 @@ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
+ return 0;
+ }
+
+-static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
++static struct bfq_group *
++bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+ {
+ int ret;
+
+@@ -958,41 +979,18 @@ static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int n
+ if (ret)
+ return NULL;
+
+- return blkg_to_bfqg(bfqd->queue->root_blkg);
+-}
+-
+-static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
+-{
+- struct bfq_group_data *bgd;
+-
+- bgd = kzalloc(sizeof(*bgd), GFP_KERNEL);
+- if (!bgd)
+- return NULL;
+- return &bgd->pd;
++ return blkg_to_bfqg(bfqd->queue->root_blkg);
+ }
+
+-static void bfq_cpd_free(struct blkcg_policy_data *cpd)
+-{
+- kfree(cpd_to_bfqgd(cpd));
+-}
+-
+-static struct cftype bfqio_files_dfl[] = {
++static struct cftype bfq_blkcg_legacy_files[] = {
+ {
+- .name = "weight",
++ .name = "bfq.weight",
+ .flags = CFTYPE_NOT_ON_ROOT,
+- .seq_show = bfqio_cgroup_weight_read_dfl,
+- .write = bfqio_cgroup_weight_write_dfl,
++ .seq_show = bfq_io_show_weight,
++ .write_u64 = bfq_io_set_weight_legacy,
+ },
+- {} /* terminate */
+-};
+
+-static struct cftype bfqio_files[] = {
+- {
+- .name = "bfq.weight",
+- .read_u64 = bfqio_cgroup_weight_read,
+- .write_u64 = bfqio_cgroup_weight_write,
+- },
+- /* statistics, cover only the tasks in the bfqg */
++ /* statistics, covers only the tasks in the bfqg */
+ {
+ .name = "bfq.time",
+ .private = offsetof(struct bfq_group, stats.time),
+@@ -1000,18 +998,17 @@ static struct cftype bfqio_files[] = {
+ },
+ {
+ .name = "bfq.sectors",
+- .private = offsetof(struct bfq_group, stats.sectors),
+- .seq_show = bfqg_print_stat,
++ .seq_show = bfqg_print_stat_sectors,
+ },
+ {
+ .name = "bfq.io_service_bytes",
+- .private = offsetof(struct bfq_group, stats.service_bytes),
+- .seq_show = bfqg_print_rwstat,
++ .private = (unsigned long)&blkcg_policy_bfq,
++ .seq_show = blkg_print_stat_bytes,
+ },
+ {
+ .name = "bfq.io_serviced",
+- .private = offsetof(struct bfq_group, stats.serviced),
+- .seq_show = bfqg_print_rwstat,
++ .private = (unsigned long)&blkcg_policy_bfq,
++ .seq_show = blkg_print_stat_ios,
+ },
+ {
+ .name = "bfq.io_service_time",
+@@ -1042,18 +1039,17 @@ static struct cftype bfqio_files[] = {
+ },
+ {
+ .name = "bfq.sectors_recursive",
+- .private = offsetof(struct bfq_group, stats.sectors),
+- .seq_show = bfqg_print_stat_recursive,
++ .seq_show = bfqg_print_stat_sectors_recursive,
+ },
+ {
+ .name = "bfq.io_service_bytes_recursive",
+- .private = offsetof(struct bfq_group, stats.service_bytes),
+- .seq_show = bfqg_print_rwstat_recursive,
++ .private = (unsigned long)&blkcg_policy_bfq,
++ .seq_show = blkg_print_stat_bytes_recursive,
+ },
+ {
+ .name = "bfq.io_serviced_recursive",
+- .private = offsetof(struct bfq_group, stats.serviced),
+- .seq_show = bfqg_print_rwstat_recursive,
++ .private = (unsigned long)&blkcg_policy_bfq,
++ .seq_show = blkg_print_stat_ios_recursive,
+ },
+ {
+ .name = "bfq.io_service_time_recursive",
+@@ -1099,32 +1095,35 @@ static struct cftype bfqio_files[] = {
+ .private = offsetof(struct bfq_group, stats.dequeue),
+ .seq_show = bfqg_print_stat,
+ },
+- {
+- .name = "bfq.unaccounted_time",
+- .private = offsetof(struct bfq_group, stats.unaccounted_time),
+- .seq_show = bfqg_print_stat,
+- },
+ { } /* terminate */
+ };
+
+-static struct blkcg_policy blkcg_policy_bfq = {
+- .dfl_cftypes = bfqio_files_dfl,
+- .legacy_cftypes = bfqio_files,
+-
+- .pd_alloc_fn = bfq_pd_alloc,
+- .pd_init_fn = bfq_pd_init,
+- .pd_offline_fn = bfq_pd_offline,
+- .pd_free_fn = bfq_pd_free,
+- .pd_reset_stats_fn = bfq_pd_reset_stats,
+-
+- .cpd_alloc_fn = bfq_cpd_alloc,
+- .cpd_init_fn = bfq_cpd_init,
+- .cpd_bind_fn = bfq_cpd_init,
+- .cpd_free_fn = bfq_cpd_free,
+-
++static struct cftype bfq_blkg_files[] = {
++ {
++ .name = "bfq.weight",
++ .flags = CFTYPE_NOT_ON_ROOT,
++ .seq_show = bfq_io_show_weight,
++ .write = bfq_io_set_weight,
++ },
++ {} /* terminate */
+ };
+
+-#else
++#else /* CONFIG_BFQ_GROUP_IOSCHED */
++
++static inline void bfqg_stats_update_io_add(struct bfq_group *bfqg,
++ struct bfq_queue *bfqq, int rw) { }
++static inline void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw) { }
++static inline void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) { }
++static inline void bfqg_stats_update_completion(struct bfq_group *bfqg,
++ uint64_t start_time, uint64_t io_start_time, int rw) { }
++static inline void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
++struct bfq_group *curr_bfqg) { }
++static inline void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { }
++static inline void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
++static inline void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
++static inline void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
++static inline void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
++static inline void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
+
+ static void bfq_init_entity(struct bfq_entity *entity,
+ struct bfq_group *bfqg)
+@@ -1146,29 +1145,22 @@ bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+ return bfqd->root_group;
+ }
+
+-static void bfq_bfqq_move(struct bfq_data *bfqd,
+- struct bfq_queue *bfqq,
+- struct bfq_entity *entity,
+- struct bfq_group *bfqg)
+-{
+-}
+-
+ static void bfq_end_wr_async(struct bfq_data *bfqd)
+ {
+ bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+ }
+
+-static void bfq_disconnect_groups(struct bfq_data *bfqd)
+-{
+- bfq_put_async_queues(bfqd, bfqd->root_group);
+-}
+-
+ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+ struct blkcg *blkcg)
+ {
+ return bfqd->root_group;
+ }
+
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
++{
++ return bfqq->bfqd->root_group;
++}
++
+ static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+ {
+ struct bfq_group *bfqg;
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index d1f648d..5469442 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -7,25 +7,26 @@
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ * Paolo Valente <paolo.valente@unimore.it>
+ *
+- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ * Copyright (C) 2016 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
+ *
+- * BFQ is a proportional-share storage-I/O scheduling algorithm based on
+- * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
+- * measured in number of sectors, to processes instead of time slices. The
+- * device is not granted to the in-service process for a given time slice,
+- * but until it has exhausted its assigned budget. This change from the time
+- * to the service domain allows BFQ to distribute the device throughput
+- * among processes as desired, without any distortion due to ZBR, workload
+- * fluctuations or other factors. BFQ uses an ad hoc internal scheduler,
+- * called B-WF2Q+, to schedule processes according to their budgets. More
+- * precisely, BFQ schedules queues associated to processes. Thanks to the
+- * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to
+- * I/O-bound processes issuing sequential requests (to boost the
+- * throughput), and yet guarantee a low latency to interactive and soft
+- * real-time applications.
++ * BFQ is a proportional-share storage-I/O scheduling algorithm based
++ * on the slice-by-slice service scheme of CFQ. But BFQ assigns
++ * budgets, measured in number of sectors, to processes instead of
++ * time slices. The device is not granted to the in-service process
++ * for a given time slice, but until it has exhausted its assigned
++ * budget. This change from the time to the service domain enables BFQ
++ * to distribute the device throughput among processes as desired,
++ * without any distortion due to throughput fluctuations, or to device
++ * internal queueing. BFQ uses an ad hoc internal scheduler, called
++ * B-WF2Q+, to schedule processes according to their budgets. More
++ * precisely, BFQ schedules queues associated with processes. Thanks to
++ * the accurate policy of B-WF2Q+, BFQ can afford to assign high
++ * budgets to I/O-bound processes issuing sequential requests (to
++ * boost the throughput), and yet guarantee a low latency to
++ * interactive and soft real-time applications.
+ *
+ * BFQ is described in [1], where also a reference to the initial, more
+ * theoretical paper on BFQ can be found. The interested reader can find
+@@ -87,7 +88,6 @@ static const int bfq_stats_min_budgets = 194;
+
+ /* Default maximum budget values, in sectors and number of requests. */
+ static const int bfq_default_max_budget = 16 * 1024;
+-static const int bfq_max_budget_async_rq = 4;
+
+ /*
+ * Async to sync throughput distribution is controlled as follows:
+@@ -97,8 +97,7 @@ static const int bfq_max_budget_async_rq = 4;
+ static const int bfq_async_charge_factor = 10;
+
+ /* Default timeout values, in jiffies, approximating CFQ defaults. */
+-static const int bfq_timeout_sync = HZ / 8;
+-static int bfq_timeout_async = HZ / 25;
++static const int bfq_timeout = HZ / 8;
+
+ struct kmem_cache *bfq_pool;
+
+@@ -109,8 +108,9 @@ struct kmem_cache *bfq_pool;
+ #define BFQ_HW_QUEUE_THRESHOLD 4
+ #define BFQ_HW_QUEUE_SAMPLES 32
+
+-#define BFQQ_SEEK_THR (sector_t)(8 * 1024)
+-#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
++#define BFQQ_SEEK_THR (sector_t)(8 * 100)
++#define BFQQ_CLOSE_THR (sector_t)(8 * 1024)
++#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8)
+
+ /* Min samples used for peak rate estimation (for autotuning). */
+ #define BFQ_PEAK_RATE_SAMPLES 32
+@@ -141,16 +141,24 @@ struct kmem_cache *bfq_pool;
+ * The device's speed class is dynamically (re)detected in
+ * bfq_update_peak_rate() every time the estimated peak rate is updated.
+ *
+- * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0]
+- * are the reference values for a slow/fast rotational device, whereas
+- * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for
+- * a slow/fast non-rotational device. Finally, device_speed_thresh are the
+- * thresholds used to switch between speed classes.
++ * In the following definitions, R_slow[0]/R_fast[0] and
++ * T_slow[0]/T_fast[0] are the reference values for a slow/fast
++ * rotational device, whereas R_slow[1]/R_fast[1] and
++ * T_slow[1]/T_fast[1] are the reference values for a slow/fast
++ * non-rotational device. Finally, device_speed_thresh are the
++ * thresholds used to switch between speed classes. The reference
++ * rates are not the actual peak rates of the devices used as a
++ * reference, but slightly lower values. The reason for using these
++ * slightly lower values is that the peak-rate estimator tends to
++ * yield slightly lower values than the actual peak rate (it can yield
++ * the actual peak rate only if there is only one process doing I/O,
++ * and the process does sequential I/O).
++ *
+ * Both the reference peak rates and the thresholds are measured in
+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+ */
+-static int R_slow[2] = {1536, 10752};
+-static int R_fast[2] = {17415, 34791};
++static int R_slow[2] = {1000, 10700};
++static int R_fast[2] = {14000, 33000};
+ /*
+ * To improve readability, a conversion function is used to initialize the
+ * following arrays, which entails that they can be initialized only in a
+@@ -410,11 +418,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
+ */
+ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
+ {
+- return
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+- !bfqd->active_numerous_groups &&
+-#endif
+- !bfq_differentiated_weights(bfqd);
++ return !bfq_differentiated_weights(bfqd);
+ }
+
+ /*
+@@ -534,9 +538,19 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
+ static unsigned long bfq_serv_to_charge(struct request *rq,
+ struct bfq_queue *bfqq)
+ {
+- return blk_rq_sectors(rq) *
+- (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) *
+- bfq_async_charge_factor));
++ if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
++ return blk_rq_sectors(rq);
++
++ /*
++ * If there are no weight-raised queues, then amplify service
++ * by just the async charge factor; otherwise amplify service
++ * by twice the async charge factor, to further reduce latency
++ * for weight-raised queues.
++ */
++ if (bfqq->bfqd->wr_busy_queues == 0)
++ return blk_rq_sectors(rq) * bfq_async_charge_factor;
++
++ return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor;
+ }
+
+ /**
+@@ -591,12 +605,23 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+ dur = bfqd->RT_prod;
+ do_div(dur, bfqd->peak_rate);
+
+- return dur;
+-}
++ /*
++ * Limit duration between 3 and 13 seconds. Tests show that
++ * higher values than 13 seconds often yield the opposite of
++ * the desired result, i.e., worsen responsiveness by letting
++ * non-interactive and non-soft-real-time applications
++ * preserve weight raising for a too long time interval.
++ *
++ * On the other end, lower values than 3 seconds make it
++ * difficult for most interactive tasks to complete their jobs
++ * before weight-raising finishes.
++ */
++ if (dur > msecs_to_jiffies(13000))
++ dur = msecs_to_jiffies(13000);
++ else if (dur < msecs_to_jiffies(3000))
++ dur = msecs_to_jiffies(3000);
+
+-static unsigned bfq_bfqq_cooperations(struct bfq_queue *bfqq)
+-{
+- return bfqq->bic ? bfqq->bic->cooperations : 0;
++ return dur;
+ }
+
+ static void
+@@ -606,31 +631,11 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+ bfq_mark_bfqq_idle_window(bfqq);
+ else
+ bfq_clear_bfqq_idle_window(bfqq);
++
+ if (bic->saved_IO_bound)
+ bfq_mark_bfqq_IO_bound(bfqq);
+ else
+ bfq_clear_bfqq_IO_bound(bfqq);
+- /* Assuming that the flag in_large_burst is already correctly set */
+- if (bic->wr_time_left && bfqq->bfqd->low_latency &&
+- !bfq_bfqq_in_large_burst(bfqq) &&
+- bic->cooperations < bfqq->bfqd->bfq_coop_thresh) {
+- /*
+- * Start a weight raising period with the duration given by
+- * the raising_time_left snapshot.
+- */
+- if (bfq_bfqq_busy(bfqq))
+- bfqq->bfqd->wr_busy_queues++;
+- bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff;
+- bfqq->wr_cur_max_time = bic->wr_time_left;
+- bfqq->last_wr_start_finish = jiffies;
+- bfqq->entity.prio_changed = 1;
+- }
+- /*
+- * Clear wr_time_left to prevent bfq_bfqq_save_state() from
+- * getting confused about the queue's need of a weight-raising
+- * period.
+- */
+- bic->wr_time_left = 0;
+ }
+
+ static int bfqq_process_refs(struct bfq_queue *bfqq)
+@@ -640,7 +645,7 @@ static int bfqq_process_refs(struct bfq_queue *bfqq)
+ lockdep_assert_held(bfqq->bfqd->queue->queue_lock);
+
+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
+- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
++ process_refs = bfqq->ref - io_refs - bfqq->entity.on_st;
+ BUG_ON(process_refs < 0);
+ return process_refs;
+ }
+@@ -655,6 +660,7 @@ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ hlist_del_init(&item->burst_list_node);
+ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+ bfqd->burst_size = 1;
++ bfqd->burst_parent_entity = bfqq->entity.parent;
+ }
+
+ /* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
+@@ -663,6 +669,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ /* Increment burst size to take into account also bfqq */
+ bfqd->burst_size++;
+
++ bfq_log_bfqq(bfqd, bfqq, "add_to_burst %d", bfqd->burst_size);
++
++ BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh);
++
+ if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
+ struct bfq_queue *pos, *bfqq_item;
+ struct hlist_node *n;
+@@ -672,15 +682,19 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ * other to consider this burst as large.
+ */
+ bfqd->large_burst = true;
++ bfq_log_bfqq(bfqd, bfqq, "add_to_burst: large burst started");
+
+ /*
+ * We can now mark all queues in the burst list as
+ * belonging to a large burst.
+ */
+ hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
+- burst_list_node)
++ burst_list_node) {
+ bfq_mark_bfqq_in_large_burst(bfqq_item);
++ bfq_log_bfqq(bfqd, bfqq_item, "marked in large burst");
++ }
+ bfq_mark_bfqq_in_large_burst(bfqq);
++ bfq_log_bfqq(bfqd, bfqq, "marked in large burst");
+
+ /*
+ * From now on, and until the current burst finishes, any
+@@ -692,67 +706,79 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
+ burst_list_node)
+ hlist_del_init(&pos->burst_list_node);
+- } else /* burst not yet large: add bfqq to the burst list */
++ } else /*
++ * Burst not yet large: add bfqq to the burst list. Do
++ * not increment the ref counter for bfqq, because bfqq
++ * is removed from the burst list before freeing bfqq
++ * in put_queue.
++ */
+ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+ }
+
+ /*
+- * If many queues happen to become active shortly after each other, then,
+- * to help the processes associated to these queues get their job done as
+- * soon as possible, it is usually better to not grant either weight-raising
+- * or device idling to these queues. In this comment we describe, firstly,
+- * the reasons why this fact holds, and, secondly, the next function, which
+- * implements the main steps needed to properly mark these queues so that
+- * they can then be treated in a different way.
++ * If many queues belonging to the same group happen to be created
++ * shortly after each other, then the processes associated with these
++ * queues have typically a common goal. In particular, bursts of queue
++ * creations are usually caused by services or applications that spawn
++ * many parallel threads/processes. Examples are systemd during boot,
++ * or git grep. To help these processes get their job done as soon as
++ * possible, it is usually better to not grant either weight-raising
++ * or device idling to their queues.
+ *
+- * As for the terminology, we say that a queue becomes active, i.e.,
+- * switches from idle to backlogged, either when it is created (as a
+- * consequence of the arrival of an I/O request), or, if already existing,
+- * when a new request for the queue arrives while the queue is idle.
+- * Bursts of activations, i.e., activations of different queues occurring
+- * shortly after each other, are typically caused by services or applications
+- * that spawn or reactivate many parallel threads/processes. Examples are
+- * systemd during boot or git grep.
++ * In this comment we describe, firstly, the reasons why this fact
++ * holds, and, secondly, the next function, which implements the main
++ * steps needed to properly mark these queues so that they can then be
++ * treated in a different way.
+ *
+- * These services or applications benefit mostly from a high throughput:
+- * the quicker the requests of the activated queues are cumulatively served,
+- * the sooner the target job of these queues gets completed. As a consequence,
+- * weight-raising any of these queues, which also implies idling the device
+- * for it, is almost always counterproductive: in most cases it just lowers
+- * throughput.
++ * The above services or applications benefit mostly from a high
++ * throughput: the quicker the requests of the activated queues are
++ * cumulatively served, the sooner the target job of these queues gets
++ * completed. As a consequence, weight-raising any of these queues,
++ * which also implies idling the device for it, is almost always
++ * counterproductive. In most cases it just lowers throughput.
+ *
+- * On the other hand, a burst of activations may be also caused by the start
+- * of an application that does not consist in a lot of parallel I/O-bound
+- * threads. In fact, with a complex application, the burst may be just a
+- * consequence of the fact that several processes need to be executed to
+- * start-up the application. To start an application as quickly as possible,
+- * the best thing to do is to privilege the I/O related to the application
+- * with respect to all other I/O. Therefore, the best strategy to start as
+- * quickly as possible an application that causes a burst of activations is
+- * to weight-raise all the queues activated during the burst. This is the
++ * On the other hand, a burst of queue creations may be caused also by
++ * the start of an application that does not consist of a lot of
++ * parallel I/O-bound threads. In fact, with a complex application,
++ * several short processes may need to be executed to start-up the
++ * application. In this respect, to start an application as quickly as
++ * possible, the best thing to do is in any case to privilege the I/O
++ * related to the application with respect to all other
++ * I/O. Therefore, the best strategy to start as quickly as possible
++ * an application that causes a burst of queue creations is to
++ * weight-raise all the queues created during the burst. This is the
+ * exact opposite of the best strategy for the other type of bursts.
+ *
+- * In the end, to take the best action for each of the two cases, the two
+- * types of bursts need to be distinguished. Fortunately, this seems
+- * relatively easy to do, by looking at the sizes of the bursts. In
+- * particular, we found a threshold such that bursts with a larger size
+- * than that threshold are apparently caused only by services or commands
+- * such as systemd or git grep. For brevity, hereafter we call just 'large'
+- * these bursts. BFQ *does not* weight-raise queues whose activations occur
+- * in a large burst. In addition, for each of these queues BFQ performs or
+- * does not perform idling depending on which choice boosts the throughput
+- * most. The exact choice depends on the device and request pattern at
++ * In the end, to take the best action for each of the two cases, the
++ * two types of bursts need to be distinguished. Fortunately, this
++ * seems relatively easy, by looking at the sizes of the bursts. In
++ * particular, we found a threshold such that only bursts with a
++ * larger size than that threshold are apparently caused by
++ * services or commands such as systemd or git grep. For brevity,
++ * hereafter we call just 'large' these bursts. BFQ *does not*
++ * weight-raise queues whose creation occurs in a large burst. In
++ * addition, for each of these queues BFQ performs or does not perform
++ * idling depending on which choice boosts the throughput more. The
++ * exact choice depends on the device and request pattern at
+ * hand.
+ *
+- * Turning back to the next function, it implements all the steps needed
+- * to detect the occurrence of a large burst and to properly mark all the
+- * queues belonging to it (so that they can then be treated in a different
+- * way). This goal is achieved by maintaining a special "burst list" that
+- * holds, temporarily, the queues that belong to the burst in progress. The
+- * list is then used to mark these queues as belonging to a large burst if
+- * the burst does become large. The main steps are the following.
++ * Unfortunately, false positives may occur while an interactive task
++ * is starting (e.g., an application is being started). The
++ * consequence is that the queues associated with the task do not
++ * enjoy weight raising as expected. Fortunately these false positives
++ * are very rare. They typically occur if some service happens to
++ * start doing I/O exactly when the interactive task starts.
++ *
++ * Turning back to the next function, it implements all the steps
++ * needed to detect the occurrence of a large burst and to properly
++ * mark all the queues belonging to it (so that they can then be
++ * treated in a different way). This goal is achieved by maintaining a
++ * "burst list" that holds, temporarily, the queues that belong to the
++ * burst in progress. The list is then used to mark these queues as
++ * belonging to a large burst if the burst does become large. The main
++ * steps are the following.
+ *
+- * . when the very first queue is activated, the queue is inserted into the
++ * . when the very first queue is created, the queue is inserted into the
+ * list (as it could be the first queue in a possible burst)
+ *
+ * . if the current burst has not yet become large, and a queue Q that does
+@@ -773,13 +799,13 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ *
+ * . the device enters a large-burst mode
+ *
+- * . if a queue Q that does not belong to the burst is activated while
++ * . if a queue Q that does not belong to the burst is created while
+ * the device is in large-burst mode and shortly after the last time
+ * at which a queue either entered the burst list or was marked as
+ * belonging to the current large burst, then Q is immediately marked
+ * as belonging to a large burst.
+ *
+- * . if a queue Q that does not belong to the burst is activated a while
++ * . if a queue Q that does not belong to the burst is created a while
+ * later, i.e., not shortly after, than the last time at which a queue
+ * either entered the burst list or was marked as belonging to the
+ * current large burst, then the current burst is deemed as finished and:
+@@ -792,52 +818,44 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ * in a possible new burst (then the burst list contains just Q
+ * after this step).
+ */
+-static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+- bool idle_for_long_time)
++static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ /*
+- * If bfqq happened to be activated in a burst, but has been idle
+- * for at least as long as an interactive queue, then we assume
+- * that, in the overall I/O initiated in the burst, the I/O
+- * associated to bfqq is finished. So bfqq does not need to be
+- * treated as a queue belonging to a burst anymore. Accordingly,
+- * we reset bfqq's in_large_burst flag if set, and remove bfqq
+- * from the burst list if it's there. We do not decrement instead
+- * burst_size, because the fact that bfqq does not need to belong
+- * to the burst list any more does not invalidate the fact that
+- * bfqq may have been activated during the current burst.
+- */
+- if (idle_for_long_time) {
+- hlist_del_init(&bfqq->burst_list_node);
+- bfq_clear_bfqq_in_large_burst(bfqq);
+- }
+-
+- /*
+ * If bfqq is already in the burst list or is part of a large
+- * burst, then there is nothing else to do.
++ * burst, or finally has just been split, then there is
++ * nothing else to do.
+ */
+ if (!hlist_unhashed(&bfqq->burst_list_node) ||
+- bfq_bfqq_in_large_burst(bfqq))
++ bfq_bfqq_in_large_burst(bfqq) ||
++ time_is_after_eq_jiffies(bfqq->split_time +
++ msecs_to_jiffies(10)))
+ return;
+
+ /*
+- * If bfqq's activation happens late enough, then the current
+- * burst is finished, and related data structures must be reset.
++ * If bfqq's creation happens late enough, or bfqq belongs to
++ * a different group than the burst group, then the current
++ * burst is finished, and related data structures must be
++ * reset.
+ *
+- * In this respect, consider the special case where bfqq is the very
+- * first queue being activated. In this case, last_ins_in_burst is
+- * not yet significant when we get here. But it is easy to verify
+- * that, whether or not the following condition is true, bfqq will
+- * end up being inserted into the burst list. In particular the
+- * list will happen to contain only bfqq. And this is exactly what
+- * has to happen, as bfqq may be the first queue in a possible
++ * In this respect, consider the special case where bfqq is
++ * the very first queue created after BFQ is selected for this
++ * device. In this case, last_ins_in_burst and
++ * burst_parent_entity are not yet significant when we get
++ * here. But it is easy to verify that, whether or not the
++ * following condition is true, bfqq will end up being
++ * inserted into the burst list. In particular the list will
++ * happen to contain only bfqq. And this is exactly what has
++ * to happen, as bfqq may be the first queue of the first
+ * burst.
+ */
+ if (time_is_before_jiffies(bfqd->last_ins_in_burst +
+- bfqd->bfq_burst_interval)) {
++ bfqd->bfq_burst_interval) ||
++ bfqq->entity.parent != bfqd->burst_parent_entity) {
+ bfqd->large_burst = false;
+ bfq_reset_burst_list(bfqd, bfqq);
+- return;
++ bfq_log_bfqq(bfqd, bfqq,
++ "handle_burst: late activation or different group");
++ goto end;
+ }
+
+ /*
+@@ -846,8 +864,9 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ * bfqq as belonging to this large burst immediately.
+ */
+ if (bfqd->large_burst) {
++ bfq_log_bfqq(bfqd, bfqq, "handle_burst: marked in burst");
+ bfq_mark_bfqq_in_large_burst(bfqq);
+- return;
++ goto end;
+ }
+
+ /*
+@@ -856,25 +875,492 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ * queue. Then we add bfqq to the burst.
+ */
+ bfq_add_to_burst(bfqd, bfqq);
++end:
++ /*
++ * At this point, bfqq either has been added to the current
++ * burst or has caused the current burst to terminate and a
++ * possible new burst to start. In particular, in the second
++ * case, bfqq has become the first queue in the possible new
++ * burst. In both cases last_ins_in_burst needs to be moved
++ * forward.
++ */
++ bfqd->last_ins_in_burst = jiffies;
++
++}
++
++static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++ return entity->budget - entity->service;
++}
++
++/*
++ * If enough samples have been computed, return the current max budget
++ * stored in bfqd, which is dynamically updated according to the
++ * estimated disk peak rate; otherwise return the default max budget
++ */
++static int bfq_max_budget(struct bfq_data *bfqd)
++{
++ if (bfqd->budgets_assigned < bfq_stats_min_budgets)
++ return bfq_default_max_budget;
++ else
++ return bfqd->bfq_max_budget;
++}
++
++/*
++ * Return min budget, which is a fraction of the current or default
++ * max budget (trying with 1/32)
++ */
++static int bfq_min_budget(struct bfq_data *bfqd)
++{
++ if (bfqd->budgets_assigned < bfq_stats_min_budgets)
++ return bfq_default_max_budget / 32;
++ else
++ return bfqd->bfq_max_budget / 32;
++}
++
++static void bfq_bfqq_expire(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ bool compensate,
++ enum bfqq_expiration reason);
++
++/*
++ * The next function, invoked after the input queue bfqq switches from
++ * idle to busy, updates the budget of bfqq. The function also tells
++ * whether the in-service queue should be expired, by returning
++ * true. The purpose of expiring the in-service queue is to give bfqq
++ * the chance to possibly preempt the in-service queue, and the reason
++ * for preempting the in-service queue is to achieve one of the two
++ * goals below.
++ *
++ * 1. Guarantee to bfqq its reserved bandwidth even if bfqq has
++ * expired because it has remained idle. In particular, bfqq may have
++ * expired for one of the following two reasons:
++ *
++ * - BFQ_BFQQ_NO_MORE_REQUEST bfqq did not enjoy any device idling and
++ * did not make it to issue a new request before its last request
++ * was served;
++ *
++ * - BFQ_BFQQ_TOO_IDLE bfqq did enjoy device idling, but did not issue
++ * a new request before the expiration of the idling-time.
++ *
++ * Even if bfqq has expired for one of the above reasons, the process
++ * associated with the queue may be however issuing requests greedily,
++ * and thus be sensitive to the bandwidth it receives (bfqq may have
++ * remained idle for other reasons: CPU high load, bfqq not enjoying
++ * idling, I/O throttling somewhere in the path from the process to
++ * the I/O scheduler, ...). But if, after every expiration for one of
++ * the above two reasons, bfqq has to wait for the service of at least
++ * one full budget of another queue before being served again, then
++ * bfqq is likely to get a much lower bandwidth or resource time than
++ * its reserved ones. To address this issue, two countermeasures need
++ * to be taken.
++ *
++ * First, the budget and the timestamps of bfqq need to be updated in
++ * a special way on bfqq reactivation: they need to be updated as if
++ * bfqq did not remain idle and did not expire. In fact, if they are
++ * computed as if bfqq expired and remained idle until reactivation,
++ * then the process associated with bfqq is treated as if, instead of
++ * being greedy, it stopped issuing requests when bfqq remained idle,
++ * and restarts issuing requests only on this reactivation. In other
++ * words, the scheduler does not help the process recover the "service
++ * hole" between bfqq expiration and reactivation. As a consequence,
++ * the process receives a lower bandwidth than its reserved one. In
++ * contrast, to recover this hole, the budget must be updated as if
++ * bfqq was not expired at all before this reactivation, i.e., it must
++ * be set to the value of the remaining budget when bfqq was
++ * expired. Along the same line, timestamps need to be assigned the
++ * value they had the last time bfqq was selected for service, i.e.,
++ * before last expiration. Thus timestamps need to be back-shifted
++ * with respect to their normal computation (see [1] for more details
++ * on this tricky aspect).
++ *
++ * Secondly, to allow the process to recover the hole, the in-service
++ * queue must be expired too, to give bfqq the chance to preempt it
++ * immediately. In fact, if bfqq has to wait for a full budget of the
++ * in-service queue to be completed, then it may become impossible to
++ * let the process recover the hole, even if the back-shifted
++ * timestamps of bfqq are lower than those of the in-service queue. If
++ * this happens for most or all of the holes, then the process may not
++ * receive its reserved bandwidth. In this respect, it is worth noting
++ * that, being the service of outstanding requests unpreemptible, a
++ * little fraction of the holes may however be unrecoverable, thereby
++ * causing a little loss of bandwidth.
++ *
++ * The last important point is detecting whether bfqq does need this
++ * bandwidth recovery. In this respect, the next function deems the
++ * process associated with bfqq greedy, and thus allows it to recover
++ * the hole, if: 1) the process is waiting for the arrival of a new
++ * request (which implies that bfqq expired for one of the above two
++ * reasons), and 2) such a request has arrived soon. The first
++ * condition is controlled through the flag non_blocking_wait_rq,
++ * while the second through the flag arrived_in_time. If both
++ * conditions hold, then the function computes the budget in the
++ * above-described special way, and signals that the in-service queue
++ * should be expired. Timestamp back-shifting is done later in
++ * __bfq_activate_entity.
++ *
++ * 2. Reduce latency. Even if timestamps are not backshifted to let
++ * the process associated with bfqq recover a service hole, bfqq may
++ * however happen to have, after being (re)activated, a lower finish
++ * timestamp than the in-service queue. That is, the next budget of
++ * bfqq may have to be completed before the one of the in-service
++ * queue. If this is the case, then preempting the in-service queue
++ * allows this goal to be achieved, apart from the unpreemptible,
++ * outstanding requests mentioned above.
++ *
++ * Unfortunately, regardless of which of the above two goals one wants
++ * to achieve, service trees need first to be updated to know whether
++ * the in-service queue must be preempted. To have service trees
++ * correctly updated, the in-service queue must be expired and
++ * rescheduled, and bfqq must be scheduled too. This is one of the
++ * most costly operations (in future versions, the scheduling
++ * mechanism may be re-designed in such a way to make it possible to
++ * know whether preemption is needed without needing to update service
++ * trees). In addition, queue preemptions almost always cause random
++ * I/O, and thus loss of throughput. Because of these facts, the next
++ * function adopts the following simple scheme to avoid both costly
++ * operations and too frequent preemptions: it requests the expiration
++ * of the in-service queue (unconditionally) only for queues that need
++ * to recover a hole, or that either are weight-raised or deserve to
++ * be weight-raised.
++ */
++static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ bool arrived_in_time,
++ bool wr_or_deserves_wr)
++{
++ struct bfq_entity *entity = &bfqq->entity;
++
++ if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time) {
++ /*
++ * We do not clear the flag non_blocking_wait_rq here, as
++ * the latter is used in bfq_activate_bfqq to signal
++ * that timestamps need to be back-shifted (and is
++ * cleared right after).
++ */
++
++ /*
++ * In next assignment we rely on that either
++ * entity->service or entity->budget are not updated
++ * on expiration if bfqq is empty (see
++ * __bfq_bfqq_recalc_budget). Thus both quantities
++ * remain unchanged after such an expiration, and the
++ * following statement therefore assigns to
++ * entity->budget the remaining budget on such an
++ * expiration. For clarity, entity->service is not
++ * updated on expiration in any case, and, in normal
++ * operation, is reset only when bfqq is selected for
++ * service (see bfq_get_next_queue).
++ */
++ entity->budget = min_t(unsigned long,
++ bfq_bfqq_budget_left(bfqq),
++ bfqq->max_budget);
++
++ BUG_ON(entity->budget < 0);
++ return true;
++ }
++
++ entity->budget = max_t(unsigned long, bfqq->max_budget,
++ bfq_serv_to_charge(bfqq->next_rq,bfqq));
++ BUG_ON(entity->budget < 0);
++
++ bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
++ return wr_or_deserves_wr;
++}
++
++static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ unsigned int old_wr_coeff,
++ bool wr_or_deserves_wr,
++ bool interactive,
++ bool in_burst,
++ bool soft_rt)
++{
++ if (old_wr_coeff == 1 && wr_or_deserves_wr) {
++ /* start a weight-raising period */
++ bfqq->wr_coeff = bfqd->bfq_wr_coeff;
++ if (interactive) /* update wr duration */
++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++ else
++ bfqq->wr_cur_max_time =
++ bfqd->bfq_wr_rt_max_time;
++ /*
++ * If needed, further reduce budget to make sure it is
++ * close to bfqq's backlog, so as to reduce the
++ * scheduling-error component due to a too large
++ * budget. Do not care about throughput consequences,
++ * but only about latency. Finally, do not assign a
++ * too small budget either, to avoid increasing
++ * latency by causing too frequent expirations.
++ */
++ bfqq->entity.budget = min_t(unsigned long,
++ bfqq->entity.budget,
++ 2 * bfq_min_budget(bfqd));
++
++ bfq_log_bfqq(bfqd, bfqq,
++ "wrais starting at %lu, rais_max_time %u",
++ jiffies,
++ jiffies_to_msecs(bfqq->wr_cur_max_time));
++ } else if (old_wr_coeff > 1) {
++ if (interactive) /* update wr duration */
++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++ else if (in_burst) {
++ bfqq->wr_coeff = 1;
++ bfq_log_bfqq(bfqd, bfqq,
++ "wrais ending at %lu, rais_max_time %u",
++ jiffies,
++ jiffies_to_msecs(bfqq->
++ wr_cur_max_time));
++ } else if (time_before(
++ bfqq->last_wr_start_finish +
++ bfqq->wr_cur_max_time,
++ jiffies +
++ bfqd->bfq_wr_rt_max_time) &&
++ soft_rt) {
++ /*
++ * The remaining weight-raising time is lower
++ * than bfqd->bfq_wr_rt_max_time, which means
++ * that the application is enjoying weight
++ * raising either because deemed soft-rt in
++ * the near past, or because deemed interactive
++ * a long ago.
++ * In both cases, resetting now the current
++ * remaining weight-raising time for the
++ * application to the weight-raising duration
++ * for soft rt applications would not cause any
++ * latency increase for the application (as the
++ * new duration would be higher than the
++ * remaining time).
++ *
++ * In addition, the application is now meeting
++ * the requirements for being deemed soft rt.
++ * In the end we can correctly and safely
++ * (re)charge the weight-raising duration for
++ * the application with the weight-raising
++ * duration for soft rt applications.
++ *
++ * In particular, doing this recharge now, i.e.,
++ * before the weight-raising period for the
++ * application finishes, reduces the probability
++ * of the following negative scenario:
++ * 1) the weight of a soft rt application is
++ * raised at startup (as for any newly
++ * created application),
++ * 2) since the application is not interactive,
++ * at a certain time weight-raising is
++ * stopped for the application,
++ * 3) at that time the application happens to
++ * still have pending requests, and hence
++ * is destined to not have a chance to be
++ * deemed soft rt before these requests are
++ * completed (see the comments to the
++ * function bfq_bfqq_softrt_next_start()
++ * for details on soft rt detection),
++ * 4) these pending requests experience a high
++ * latency because the application is not
++ * weight-raised while they are pending.
++ */
++ bfqq->last_wr_start_finish = jiffies;
++ bfqq->wr_cur_max_time =
++ bfqd->bfq_wr_rt_max_time;
++ bfq_log_bfqq(bfqd, bfqq,
++ "switching to soft_rt wr, or "
++ " just moving forward duration");
++ }
++ }
++}
++
++static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq)
++{
++ return bfqq->dispatched == 0 &&
++ time_is_before_jiffies(
++ bfqq->budget_timeout +
++ bfqd->bfq_wr_min_idle_time);
++}
++
++static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq,
++ int old_wr_coeff,
++ struct request *rq,
++ bool *interactive)
++{
++ bool soft_rt, in_burst, wr_or_deserves_wr,
++ bfqq_wants_to_preempt,
++ idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq),
++ /*
++ * See the comments on
++ * bfq_bfqq_update_budg_for_activation for
++ * details on the usage of the next variable.
++ */
++ arrived_in_time = time_is_after_jiffies(
++ RQ_BIC(rq)->ttime.last_end_request +
++ bfqd->bfq_slice_idle * 3);
++
++ bfq_log_bfqq(bfqd, bfqq,
++ "bfq_add_request non-busy: "
++ "jiffies %lu, in_time %d, idle_long %d busyw %d "
++ "wr_coeff %u",
++ jiffies, arrived_in_time,
++ idle_for_long_time,
++ bfq_bfqq_non_blocking_wait_rq(bfqq),
++ old_wr_coeff);
++
++ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
++
++ BUG_ON(bfqq == bfqd->in_service_queue);
++ bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq,
++ rq->cmd_flags);
++
++ /*
++ * bfqq deserves to be weight-raised if:
++ * - it is sync,
++ * - it does not belong to a large burst,
++ * - it has been idle for enough time or is soft real-time,
++ * - is linked to a bfq_io_cq (it is not shared in any sense)
++ */
++ in_burst = bfq_bfqq_in_large_burst(bfqq);
++ soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
++ !in_burst &&
++ time_is_before_jiffies(bfqq->soft_rt_next_start);
++ *interactive =
++ !in_burst &&
++ idle_for_long_time;
++ wr_or_deserves_wr = bfqd->low_latency &&
++ (bfqq->wr_coeff > 1 ||
++ (bfq_bfqq_sync(bfqq) &&
++ bfqq->bic && (*interactive || soft_rt)));
++
++ bfq_log_bfqq(bfqd, bfqq,
++ "bfq_add_request: "
++ "in_burst %d, "
++ "soft_rt %d (next %lu), inter %d, bic %p",
++ bfq_bfqq_in_large_burst(bfqq), soft_rt,
++ bfqq->soft_rt_next_start,
++ *interactive,
++ bfqq->bic);
++
++ /*
++ * Using the last flag, update budget and check whether bfqq
++ * may want to preempt the in-service queue.
++ */
++ bfqq_wants_to_preempt =
++ bfq_bfqq_update_budg_for_activation(bfqd, bfqq,
++ arrived_in_time,
++ wr_or_deserves_wr);
++
++ /*
++ * If bfqq happened to be activated in a burst, but has been
++ * idle for much more than an interactive queue, then we
++ * assume that, in the overall I/O initiated in the burst, the
++ * I/O associated with bfqq is finished. So bfqq does not need
++ * to be treated as a queue belonging to a burst
++ * anymore. Accordingly, we reset bfqq's in_large_burst flag
++ * if set, and remove bfqq from the burst list if it's
++ * there. We do not decrement burst_size, because the fact
++ * that bfqq does not need to belong to the burst list any
++ * more does not invalidate the fact that bfqq was created in
++ * a burst.
++ */
++ if (likely(!bfq_bfqq_just_created(bfqq)) &&
++ idle_for_long_time &&
++ time_is_before_jiffies(
++ bfqq->budget_timeout +
++ msecs_to_jiffies(10000))) {
++ hlist_del_init(&bfqq->burst_list_node);
++ bfq_clear_bfqq_in_large_burst(bfqq);
++ }
++
++ bfq_clear_bfqq_just_created(bfqq);
++
++ if (!bfq_bfqq_IO_bound(bfqq)) {
++ if (arrived_in_time) {
++ bfqq->requests_within_timer++;
++ if (bfqq->requests_within_timer >=
++ bfqd->bfq_requests_within_timer)
++ bfq_mark_bfqq_IO_bound(bfqq);
++ } else
++ bfqq->requests_within_timer = 0;
++ bfq_log_bfqq(bfqd, bfqq, "requests in time %d",
++ bfqq->requests_within_timer);
++ }
++
++ if (bfqd->low_latency) {
++ if (unlikely(time_is_after_jiffies(bfqq->split_time)))
++ /* wraparound */
++ bfqq->split_time =
++ jiffies - bfqd->bfq_wr_min_idle_time - 1;
++
++ if (time_is_before_jiffies(bfqq->split_time +
++ bfqd->bfq_wr_min_idle_time)) {
++ bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq,
++ old_wr_coeff,
++ wr_or_deserves_wr,
++ *interactive,
++ in_burst,
++ soft_rt);
++
++ if (old_wr_coeff != bfqq->wr_coeff)
++ bfqq->entity.prio_changed = 1;
++ }
++ }
++
++ bfqq->last_idle_bklogged = jiffies;
++ bfqq->service_from_backlogged = 0;
++ bfq_clear_bfqq_softrt_update(bfqq);
++
++ bfq_add_bfqq_busy(bfqd, bfqq);
++
++ /*
++ * Expire in-service queue only if preemption may be needed
++ * for guarantees. In this respect, the function
++ * next_queue_may_preempt just checks a simple, necessary
++ * condition, and not a sufficient condition based on
++ * timestamps. In fact, for the latter condition to be
++ * evaluated, timestamps would need first to be updated, and
++ * this operation is quite costly (see the comments on the
++ * function bfq_bfqq_update_budg_for_activation).
++ */
++ if (bfqd->in_service_queue && bfqq_wants_to_preempt &&
++ bfqd->in_service_queue->wr_coeff == 1 &&
++ next_queue_may_preempt(bfqd)) {
++ struct bfq_queue *in_serv =
++ bfqd->in_service_queue;
++ BUG_ON(in_serv == bfqq);
++
++ bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
++ false, BFQ_BFQQ_PREEMPTED);
++ BUG_ON(in_serv->entity.budget < 0);
++ }
+ }
+
+ static void bfq_add_request(struct request *rq)
+ {
+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
+- struct bfq_entity *entity = &bfqq->entity;
+ struct bfq_data *bfqd = bfqq->bfqd;
+ struct request *next_rq, *prev;
+- unsigned long old_wr_coeff = bfqq->wr_coeff;
++ unsigned int old_wr_coeff = bfqq->wr_coeff;
+ bool interactive = false;
+
+- bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
++ bfq_log_bfqq(bfqd, bfqq, "add_request: size %u %s",
++ blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A");
++
++ if (bfqq->wr_coeff > 1) /* queue is being weight-raised */
++ bfq_log_bfqq(bfqd, bfqq,
++ "raising period dur %u/%u msec, old coeff %u, w %d(%d)",
++ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
++ jiffies_to_msecs(bfqq->wr_cur_max_time),
++ bfqq->wr_coeff,
++ bfqq->entity.weight, bfqq->entity.orig_weight);
++
+ bfqq->queued[rq_is_sync(rq)]++;
+ bfqd->queued++;
+
+ elv_rb_add(&bfqq->sort_list, rq);
+
+ /*
+- * Check if this request is a better next-serve candidate.
++ * Check if this request is a better next-to-serve candidate.
+ */
+ prev = bfqq->next_rq;
+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
+@@ -887,160 +1373,10 @@ static void bfq_add_request(struct request *rq)
+ if (prev != bfqq->next_rq)
+ bfq_pos_tree_add_move(bfqd, bfqq);
+
+- if (!bfq_bfqq_busy(bfqq)) {
+- bool soft_rt, coop_or_in_burst,
+- idle_for_long_time = time_is_before_jiffies(
+- bfqq->budget_timeout +
+- bfqd->bfq_wr_min_idle_time);
+-
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+- bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq,
+- rq->cmd_flags);
+-#endif
+- if (bfq_bfqq_sync(bfqq)) {
+- bool already_in_burst =
+- !hlist_unhashed(&bfqq->burst_list_node) ||
+- bfq_bfqq_in_large_burst(bfqq);
+- bfq_handle_burst(bfqd, bfqq, idle_for_long_time);
+- /*
+- * If bfqq was not already in the current burst,
+- * then, at this point, bfqq either has been
+- * added to the current burst or has caused the
+- * current burst to terminate. In particular, in
+- * the second case, bfqq has become the first
+- * queue in a possible new burst.
+- * In both cases last_ins_in_burst needs to be
+- * moved forward.
+- */
+- if (!already_in_burst)
+- bfqd->last_ins_in_burst = jiffies;
+- }
+-
+- coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) ||
+- bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh;
+- soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+- !coop_or_in_burst &&
+- time_is_before_jiffies(bfqq->soft_rt_next_start);
+- interactive = !coop_or_in_burst && idle_for_long_time;
+- entity->budget = max_t(unsigned long, bfqq->max_budget,
+- bfq_serv_to_charge(next_rq, bfqq));
+-
+- if (!bfq_bfqq_IO_bound(bfqq)) {
+- if (time_before(jiffies,
+- RQ_BIC(rq)->ttime.last_end_request +
+- bfqd->bfq_slice_idle)) {
+- bfqq->requests_within_timer++;
+- if (bfqq->requests_within_timer >=
+- bfqd->bfq_requests_within_timer)
+- bfq_mark_bfqq_IO_bound(bfqq);
+- } else
+- bfqq->requests_within_timer = 0;
+- }
+-
+- if (!bfqd->low_latency)
+- goto add_bfqq_busy;
+-
+- if (bfq_bfqq_just_split(bfqq))
+- goto set_prio_changed;
+-
+- /*
+- * If the queue:
+- * - is not being boosted,
+- * - has been idle for enough time,
+- * - is not a sync queue or is linked to a bfq_io_cq (it is
+- * shared "for its nature" or it is not shared and its
+- * requests have not been redirected to a shared queue)
+- * start a weight-raising period.
+- */
+- if (old_wr_coeff == 1 && (interactive || soft_rt) &&
+- (!bfq_bfqq_sync(bfqq) || bfqq->bic)) {
+- bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+- if (interactive)
+- bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+- else
+- bfqq->wr_cur_max_time =
+- bfqd->bfq_wr_rt_max_time;
+- bfq_log_bfqq(bfqd, bfqq,
+- "wrais starting at %lu, rais_max_time %u",
+- jiffies,
+- jiffies_to_msecs(bfqq->wr_cur_max_time));
+- } else if (old_wr_coeff > 1) {
+- if (interactive)
+- bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+- else if (coop_or_in_burst ||
+- (bfqq->wr_cur_max_time ==
+- bfqd->bfq_wr_rt_max_time &&
+- !soft_rt)) {
+- bfqq->wr_coeff = 1;
+- bfq_log_bfqq(bfqd, bfqq,
+- "wrais ending at %lu, rais_max_time %u",
+- jiffies,
+- jiffies_to_msecs(bfqq->
+- wr_cur_max_time));
+- } else if (time_before(
+- bfqq->last_wr_start_finish +
+- bfqq->wr_cur_max_time,
+- jiffies +
+- bfqd->bfq_wr_rt_max_time) &&
+- soft_rt) {
+- /*
+- *
+- * The remaining weight-raising time is lower
+- * than bfqd->bfq_wr_rt_max_time, which means
+- * that the application is enjoying weight
+- * raising either because deemed soft-rt in
+- * the near past, or because deemed interactive
+- * a long ago.
+- * In both cases, resetting now the current
+- * remaining weight-raising time for the
+- * application to the weight-raising duration
+- * for soft rt applications would not cause any
+- * latency increase for the application (as the
+- * new duration would be higher than the
+- * remaining time).
+- *
+- * In addition, the application is now meeting
+- * the requirements for being deemed soft rt.
+- * In the end we can correctly and safely
+- * (re)charge the weight-raising duration for
+- * the application with the weight-raising
+- * duration for soft rt applications.
+- *
+- * In particular, doing this recharge now, i.e.,
+- * before the weight-raising period for the
+- * application finishes, reduces the probability
+- * of the following negative scenario:
+- * 1) the weight of a soft rt application is
+- * raised at startup (as for any newly
+- * created application),
+- * 2) since the application is not interactive,
+- * at a certain time weight-raising is
+- * stopped for the application,
+- * 3) at that time the application happens to
+- * still have pending requests, and hence
+- * is destined to not have a chance to be
+- * deemed soft rt before these requests are
+- * completed (see the comments to the
+- * function bfq_bfqq_softrt_next_start()
+- * for details on soft rt detection),
+- * 4) these pending requests experience a high
+- * latency because the application is not
+- * weight-raised while they are pending.
+- */
+- bfqq->last_wr_start_finish = jiffies;
+- bfqq->wr_cur_max_time =
+- bfqd->bfq_wr_rt_max_time;
+- }
+- }
+-set_prio_changed:
+- if (old_wr_coeff != bfqq->wr_coeff)
+- entity->prio_changed = 1;
+-add_bfqq_busy:
+- bfqq->last_idle_bklogged = jiffies;
+- bfqq->service_from_backlogged = 0;
+- bfq_clear_bfqq_softrt_update(bfqq);
+- bfq_add_bfqq_busy(bfqd, bfqq);
+- } else {
++ if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */
++ bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff,
++ rq, &interactive);
++ else {
+ if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
+ time_is_before_jiffies(
+ bfqq->last_wr_start_finish +
+@@ -1049,16 +1385,43 @@ add_bfqq_busy:
+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+
+ bfqd->wr_busy_queues++;
+- entity->prio_changed = 1;
++ bfqq->entity.prio_changed = 1;
+ bfq_log_bfqq(bfqd, bfqq,
+- "non-idle wrais starting at %lu, rais_max_time %u",
+- jiffies,
+- jiffies_to_msecs(bfqq->wr_cur_max_time));
++ "non-idle wrais starting, "
++ "wr_max_time %u wr_busy %d",
++ jiffies_to_msecs(bfqq->wr_cur_max_time),
++ bfqd->wr_busy_queues);
+ }
+ if (prev != bfqq->next_rq)
+ bfq_updated_next_req(bfqd, bfqq);
+ }
+
++ /*
++ * Assign jiffies to last_wr_start_finish in the following
++ * cases:
++ *
++ * . if bfqq is not going to be weight-raised, because, for
++ * non weight-raised queues, last_wr_start_finish stores the
++ * arrival time of the last request; as of now, this piece
++ * of information is used only for deciding whether to
++ * weight-raise async queues
++ *
++ * . if bfqq is not weight-raised, because, if bfqq is now
++ * switching to weight-raised, then last_wr_start_finish
++ * stores the time when weight-raising starts
++ *
++ * . if bfqq is interactive, because, regardless of whether
++ * bfqq is currently weight-raised, the weight-raising
++ * period must start or restart (this case is considered
++ * separately because it is not detected by the above
++ * conditions, if bfqq is already weight-raised)
++ *
++ * last_wr_start_finish has to be updated also if bfqq is soft
++ * real-time, because the weight-raising period is constantly
++ * restarted on idle-to-busy transitions for these queues, but
++ * this is already done in bfq_bfqq_handle_idle_busy_switch if
++ * needed.
++ */
+ if (bfqd->low_latency &&
+ (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
+ bfqq->last_wr_start_finish = jiffies;
+@@ -1106,6 +1469,9 @@ static void bfq_remove_request(struct request *rq)
+ struct bfq_data *bfqd = bfqq->bfqd;
+ const int sync = rq_is_sync(rq);
+
++ BUG_ON(bfqq->entity.service > bfqq->entity.budget &&
++ bfqq == bfqd->in_service_queue);
++
+ if (bfqq->next_rq == rq) {
+ bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
+ bfq_updated_next_req(bfqd, bfqq);
+@@ -1119,8 +1485,25 @@ static void bfq_remove_request(struct request *rq)
+ elv_rb_del(&bfqq->sort_list, rq);
+
+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+- if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue)
++ BUG_ON(bfqq->entity.budget < 0);
++
++ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
+ bfq_del_bfqq_busy(bfqd, bfqq, 1);
++
++ /* bfqq emptied. In normal operation, when
++ * bfqq is empty, bfqq->entity.service and
++ * bfqq->entity.budget must contain,
++ * respectively, the service received and the
++ * budget used last time bfqq emptied. These
++ * facts do not hold in this case, as at least
++ * this last removal occurred while bfqq is
++ * not in service. To avoid inconsistencies,
++ * reset both bfqq->entity.service and
++ * bfqq->entity.budget.
++ */
++ bfqq->entity.budget = bfqq->entity.service = 0;
++ }
++
+ /*
+ * Remove queue from request-position tree as it is empty.
+ */
+@@ -1134,9 +1517,7 @@ static void bfq_remove_request(struct request *rq)
+ BUG_ON(bfqq->meta_pending == 0);
+ bfqq->meta_pending--;
+ }
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
+-#endif
+ }
+
+ static int bfq_merge(struct request_queue *q, struct request **req,
+@@ -1221,21 +1602,25 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
+ bfqq->next_rq = rq;
+
+ bfq_remove_request(next);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+-#endif
+ }
+
+ /* Must be called with bfqq != NULL */
+ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
+ {
+ BUG_ON(!bfqq);
++
+ if (bfq_bfqq_busy(bfqq))
+ bfqq->bfqd->wr_busy_queues--;
+ bfqq->wr_coeff = 1;
+ bfqq->wr_cur_max_time = 0;
+- /* Trigger a weight change on the next activation of the queue */
++ /*
++ * Trigger a weight change on the next invocation of
++ * __bfq_entity_update_weight_prio.
++ */
+ bfqq->entity.prio_changed = 1;
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "end_wr: wr_busy %d",
++ bfqq->bfqd->wr_busy_queues);
+ }
+
+ static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+@@ -1278,7 +1663,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request,
+ sector_t sector)
+ {
+ return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
+- BFQQ_SEEK_THR;
++ BFQQ_CLOSE_THR;
+ }
+
+ static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
+@@ -1400,7 +1785,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+ * throughput.
+ */
+ bfqq->new_bfqq = new_bfqq;
+- atomic_add(process_refs, &new_bfqq->ref);
++ new_bfqq->ref += process_refs;
+ return new_bfqq;
+ }
+
+@@ -1431,9 +1816,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+ }
+
+ /*
+- * Attempt to schedule a merge of bfqq with the currently in-service queue
+- * or with a close queue among the scheduled queues.
+- * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
++ * If this function returns true, then bfqq cannot be merged. The idea
++ * is that true cooperation happens very early after processes start
++ * to do I/O. Usually, late cooperations are just accidental false
++ * positives. In case bfqq is weight-raised, such false positives
++ * would evidently degrade latency guarantees for bfqq.
++ */
++bool wr_from_too_long(struct bfq_queue *bfqq)
++{
++ return bfqq->wr_coeff > 1 &&
++ time_is_before_jiffies(bfqq->last_wr_start_finish +
++ msecs_to_jiffies(100));
++}
++
++/*
++ * Attempt to schedule a merge of bfqq with the currently in-service
++ * queue or with a close queue among the scheduled queues. Return
++ * NULL if no merge was scheduled, a pointer to the shared bfq_queue
+ * structure otherwise.
+ *
+ * The OOM queue is not allowed to participate to cooperation: in fact, since
+@@ -1442,6 +1841,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+ * handle merging with the OOM queue would be quite complex and expensive
+ * to maintain. Besides, in such a critical condition as an out of memory,
+ * the benefits of queue merging may be little relevant, or even negligible.
++ *
++ * Weight-raised queues can be merged only if their weight-raising
++ * period has just started. In fact cooperating processes are usually
++ * started together. Thus, with this filter we avoid false positives
++ * that would jeopardize low-latency guarantees.
++ *
++ * WARNING: queue merging may impair fairness among non-weight raised
++ * queues, for at least two reasons: 1) the original weight of a
++ * merged queue may change during the merged state, 2) even being the
++ * weight the same, a merged queue may be bloated with many more
++ * requests than the ones produced by its originally-associated
++ * process.
+ */
+ static struct bfq_queue *
+ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1451,16 +1862,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+
+ if (bfqq->new_bfqq)
+ return bfqq->new_bfqq;
+- if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
++
++ if (io_struct && wr_from_too_long(bfqq) &&
++ likely(bfqq != &bfqd->oom_bfqq))
++ bfq_log_bfqq(bfqd, bfqq,
++ "would have looked for coop, but bfq%d wr",
++ bfqq->pid);
++
++ if (!io_struct ||
++ wr_from_too_long(bfqq) ||
++ unlikely(bfqq == &bfqd->oom_bfqq))
+ return NULL;
+- /* If device has only one backlogged bfq_queue, don't search. */
++
++ /* If there is only one backlogged queue, don't search. */
+ if (bfqd->busy_queues == 1)
+ return NULL;
+
+ in_service_bfqq = bfqd->in_service_queue;
+
++ if (in_service_bfqq && in_service_bfqq != bfqq &&
++ bfqd->in_service_bic && wr_from_too_long(in_service_bfqq)
++ && likely(in_service_bfqq == &bfqd->oom_bfqq))
++ bfq_log_bfqq(bfqd, bfqq,
++ "would have tried merge with in-service-queue, but wr");
++
+ if (!in_service_bfqq || in_service_bfqq == bfqq ||
+- !bfqd->in_service_bic ||
++ !bfqd->in_service_bic || wr_from_too_long(in_service_bfqq) ||
+ unlikely(in_service_bfqq == &bfqd->oom_bfqq))
+ goto check_scheduled;
+
+@@ -1482,7 +1909,15 @@ check_scheduled:
+
+ BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent);
+
+- if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) &&
++ if (new_bfqq && wr_from_too_long(new_bfqq) &&
++ likely(new_bfqq != &bfqd->oom_bfqq) &&
++ bfq_may_be_close_cooperator(bfqq, new_bfqq))
++ bfq_log_bfqq(bfqd, bfqq,
++ "would have merged with bfq%d, but wr",
++ new_bfqq->pid);
++
++ if (new_bfqq && !wr_from_too_long(new_bfqq) &&
++ likely(new_bfqq != &bfqd->oom_bfqq) &&
+ bfq_may_be_close_cooperator(bfqq, new_bfqq))
+ return bfq_setup_merge(bfqq, new_bfqq);
+
+@@ -1498,46 +1933,11 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+ */
+ if (!bfqq->bic)
+ return;
+- if (bfqq->bic->wr_time_left)
+- /*
+- * This is the queue of a just-started process, and would
+- * deserve weight raising: we set wr_time_left to the full
+- * weight-raising duration to trigger weight-raising when
+- * and if the queue is split and the first request of the
+- * queue is enqueued.
+- */
+- bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd);
+- else if (bfqq->wr_coeff > 1) {
+- unsigned long wr_duration =
+- jiffies - bfqq->last_wr_start_finish;
+- /*
+- * It may happen that a queue's weight raising period lasts
+- * longer than its wr_cur_max_time, as weight raising is
+- * handled only when a request is enqueued or dispatched (it
+- * does not use any timer). If the weight raising period is
+- * about to end, don't save it.
+- */
+- if (bfqq->wr_cur_max_time <= wr_duration)
+- bfqq->bic->wr_time_left = 0;
+- else
+- bfqq->bic->wr_time_left =
+- bfqq->wr_cur_max_time - wr_duration;
+- /*
+- * The bfq_queue is becoming shared or the requests of the
+- * process owning the queue are being redirected to a shared
+- * queue. Stop the weight raising period of the queue, as in
+- * both cases it should not be owned by an interactive or
+- * soft real-time application.
+- */
+- bfq_bfqq_end_wr(bfqq);
+- } else
+- bfqq->bic->wr_time_left = 0;
++
+ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
+ bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+ bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
+ bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
+- bfqq->bic->cooperations++;
+- bfqq->bic->failed_cooperations = 0;
+ }
+
+ static void bfq_get_bic_reference(struct bfq_queue *bfqq)
+@@ -1562,6 +1962,40 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+ if (bfq_bfqq_IO_bound(bfqq))
+ bfq_mark_bfqq_IO_bound(new_bfqq);
+ bfq_clear_bfqq_IO_bound(bfqq);
++
++ /*
++ * If bfqq is weight-raised, then let new_bfqq inherit
++ * weight-raising. To reduce false positives, neglect the case
++ * where bfqq has just been created, but has not yet made it
++ * to be weight-raised (which may happen because EQM may merge
++ * bfqq even before bfq_add_request is executed for the first
++ * time for bfqq). Handling this case would however be very
++ * easy, thanks to the flag just_created.
++ */
++ if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) {
++ new_bfqq->wr_coeff = bfqq->wr_coeff;
++ new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time;
++ new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
++ if (bfq_bfqq_busy(new_bfqq))
++ bfqd->wr_busy_queues++;
++ new_bfqq->entity.prio_changed = 1;
++ bfq_log_bfqq(bfqd, new_bfqq,
++ "wr starting after merge with %d, "
++ "rais_max_time %u",
++ bfqq->pid,
++ jiffies_to_msecs(bfqq->wr_cur_max_time));
++ }
++
++ if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */
++ bfqq->wr_coeff = 1;
++ bfqq->entity.prio_changed = 1;
++ if (bfq_bfqq_busy(bfqq))
++ bfqd->wr_busy_queues--;
++ }
++
++ bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d",
++ bfqd->wr_busy_queues);
++
+ /*
+ * Grab a reference to the bic, to prevent it from being destroyed
+ * before being possibly touched by a bfq_split_bfqq().
+@@ -1588,18 +2022,6 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+ bfq_put_queue(bfqq);
+ }
+
+-static void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
+-{
+- struct bfq_io_cq *bic = bfqq->bic;
+- struct bfq_data *bfqd = bfqq->bfqd;
+-
+- if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) {
+- bic->failed_cooperations++;
+- if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations)
+- bic->cooperations = 0;
+- }
+-}
+-
+ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+ struct bio *bio)
+ {
+@@ -1637,30 +2059,86 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+ * to decide whether bio and rq can be merged.
+ */
+ bfqq = new_bfqq;
+- } else
+- bfq_bfqq_increase_failed_cooperations(bfqq);
++ }
+ }
+
+ return bfqq == RQ_BFQQ(rq);
+ }
+
++/*
++ * Set the maximum time for the in-service queue to consume its
++ * budget. This prevents seeky processes from lowering the throughput.
++ * In practice, a time-slice service scheme is used with seeky
++ * processes.
++ */
++static void bfq_set_budget_timeout(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq)
++{
++ unsigned int timeout_coeff;
++ if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
++ timeout_coeff = 1;
++ else
++ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
++
++ bfqd->last_budget_start = ktime_get();
++
++ bfqq->budget_timeout = jiffies +
++ bfqd->bfq_timeout * timeout_coeff;
++
++ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
++ jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff));
++}
++
+ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq)
+ {
+ if (bfqq) {
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
+-#endif
+ bfq_mark_bfqq_must_alloc(bfqq);
+- bfq_mark_bfqq_budget_new(bfqq);
+ bfq_clear_bfqq_fifo_expire(bfqq);
+
+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
+
++ BUG_ON(bfqq == bfqd->in_service_queue);
++ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++
++ if (bfqq->wr_coeff > 1 &&
++ bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
++ time_is_before_jiffies(bfqq->budget_timeout)) {
++ /*
++ * For soft real-time queues, move the start
++ * of the weight-raising period forward by the
++ * time the queue has not received any
++ * service. Otherwise, a relatively long
++ * service delay is likely to cause the
++ * weight-raising period of the queue to end,
++ * because of the short duration of the
++ * weight-raising period of a soft real-time
++ * queue. It is worth noting that this move
++ * is not so dangerous for the other queues,
++ * because soft real-time queues are not
++ * greedy.
++ *
++ * To not add a further variable, we use the
++ * overloaded field budget_timeout to
++ * determine for how long the queue has not
++ * received service, i.e., how much time has
++ * elapsed since the queue expired. However,
++ * this is a little imprecise, because
++ * budget_timeout is set to jiffies if bfqq
++ * not only expires, but also remains with no
++ * request.
++ */
++ bfqq->last_wr_start_finish += jiffies -
++ bfqq->budget_timeout;
++ }
++
++ bfq_set_budget_timeout(bfqd, bfqq);
+ bfq_log_bfqq(bfqd, bfqq,
+ "set_in_service_queue, cur-budget = %d",
+ bfqq->entity.budget);
+- }
++ } else
++ bfq_log(bfqd, "set_in_service_queue: NULL");
+
+ bfqd->in_service_queue = bfqq;
+ }
+@@ -1676,31 +2154,6 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
+ return bfqq;
+ }
+
+-/*
+- * If enough samples have been computed, return the current max budget
+- * stored in bfqd, which is dynamically updated according to the
+- * estimated disk peak rate; otherwise return the default max budget
+- */
+-static int bfq_max_budget(struct bfq_data *bfqd)
+-{
+- if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+- return bfq_default_max_budget;
+- else
+- return bfqd->bfq_max_budget;
+-}
+-
+-/*
+- * Return min budget, which is a fraction of the current or default
+- * max budget (trying with 1/32)
+- */
+-static int bfq_min_budget(struct bfq_data *bfqd)
+-{
+- if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+- return bfq_default_max_budget / 32;
+- else
+- return bfqd->bfq_max_budget / 32;
+-}
+-
+ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+ {
+ struct bfq_queue *bfqq = bfqd->in_service_queue;
+@@ -1723,64 +2176,36 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+ *
+ * To prevent processes with (partly) seeky workloads from
+ * being too ill-treated, grant them a small fraction of the
+- * assigned budget before reducing the waiting time to
+- * BFQ_MIN_TT. This happened to help reduce latency.
+- */
+- sl = bfqd->bfq_slice_idle;
+- /*
+- * Unless the queue is being weight-raised or the scenario is
+- * asymmetric, grant only minimum idle time if the queue either
+- * has been seeky for long enough or has already proved to be
+- * constantly seeky.
+- */
+- if (bfq_sample_valid(bfqq->seek_samples) &&
+- ((BFQQ_SEEKY(bfqq) && bfqq->entity.service >
+- bfq_max_budget(bfqq->bfqd) / 8) ||
+- bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 &&
+- bfq_symmetric_scenario(bfqd))
+- sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
+- else if (bfqq->wr_coeff > 1)
+- sl = sl * 3;
+- bfqd->last_idling_start = ktime_get();
+- mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+- bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
+-#endif
+- bfq_log(bfqd, "arm idle: %u/%u ms",
+- jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
+-}
+-
+-/*
+- * Set the maximum time for the in-service queue to consume its
+- * budget. This prevents seeky processes from lowering the disk
+- * throughput (always guaranteed with a time slice scheme as in CFQ).
+- */
+-static void bfq_set_budget_timeout(struct bfq_data *bfqd)
+-{
+- struct bfq_queue *bfqq = bfqd->in_service_queue;
+- unsigned int timeout_coeff;
+- if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
+- timeout_coeff = 1;
+- else
+- timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
+-
+- bfqd->last_budget_start = ktime_get();
+-
+- bfq_clear_bfqq_budget_new(bfqq);
+- bfqq->budget_timeout = jiffies +
+- bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
++ * assigned budget before reducing the waiting time to
++ * BFQ_MIN_TT. This happened to help reduce latency.
++ */
++ sl = bfqd->bfq_slice_idle;
++ /*
++ * Unless the queue is being weight-raised or the scenario is
++ * asymmetric, grant only minimum idle time if the queue
++ * is seeky. A long idling is preserved for a weight-raised
++ * queue, or, more in general, in an asymemtric scenario,
++ * because a long idling is needed for guaranteeing to a queue
++ * its reserved share of the throughput (in particular, it is
++ * needed if the queue has a higher weight than some other
++ * queue).
++ */
++ if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
++ bfq_symmetric_scenario(bfqd))
++ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
+
+- bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
+- jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
+- timeout_coeff));
++ bfqd->last_idling_start = ktime_get();
++ mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
++ bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
++ bfq_log(bfqd, "arm idle: %u/%u ms",
++ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
+ }
+
+ /*
+- * Move request from internal lists to the request queue dispatch list.
++ * Move request from internal lists to the dispatch list of the request queue
+ */
+ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
+ {
+- struct bfq_data *bfqd = q->elevator->elevator_data;
+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
+
+ /*
+@@ -1794,15 +2219,9 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
+ * incrementing bfqq->dispatched.
+ */
+ bfqq->dispatched++;
++
+ bfq_remove_request(rq);
+ elv_dispatch_sort(q, rq);
+-
+- if (bfq_bfqq_sync(bfqq))
+- bfqd->sync_flight++;
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+- bfqg_stats_update_dispatch(bfqq_group(bfqq), blk_rq_bytes(rq),
+- rq->cmd_flags);
+-#endif
+ }
+
+ /*
+@@ -1822,18 +2241,12 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
+
+ rq = rq_entry_fifo(bfqq->fifo.next);
+
+- if (time_before(jiffies, rq->fifo_time))
++ if (time_is_after_jiffies(rq->fifo_time))
+ return NULL;
+
+ return rq;
+ }
+
+-static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
+-{
+- struct bfq_entity *entity = &bfqq->entity;
+- return entity->budget - entity->service;
+-}
+-
+ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ BUG_ON(bfqq != bfqd->in_service_queue);
+@@ -1850,12 +2263,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ bfq_mark_bfqq_split_coop(bfqq);
+
+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+- /*
+- * Overloading budget_timeout field to store the time
+- * at which the queue remains with no backlog; used by
+- * the weight-raising mechanism.
+- */
+- bfqq->budget_timeout = jiffies;
++ if (bfqq->dispatched == 0)
++ /*
++ * Overloading budget_timeout field to store
++ * the time at which the queue remains with no
++ * backlog and no outstanding request; used by
++ * the weight-raising mechanism.
++ */
++ bfqq->budget_timeout = jiffies;
++
+ bfq_del_bfqq_busy(bfqd, bfqq, 1);
+ } else {
+ bfq_activate_bfqq(bfqd, bfqq);
+@@ -1882,10 +2298,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ struct request *next_rq;
+ int budget, min_budget;
+
+- budget = bfqq->max_budget;
++ BUG_ON(bfqq != bfqd->in_service_queue);
++
+ min_budget = bfq_min_budget(bfqd);
+
+- BUG_ON(bfqq != bfqd->in_service_queue);
++ if (bfqq->wr_coeff == 1)
++ budget = bfqq->max_budget;
++ else /*
++ * Use a constant, low budget for weight-raised queues,
++ * to help achieve a low latency. Keep it slightly higher
++ * than the minimum possible budget, to cause a little
++ * bit fewer expirations.
++ */
++ budget = 2 * min_budget;
+
+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
+ bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
+@@ -1894,7 +2319,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
+ bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
+
+- if (bfq_bfqq_sync(bfqq)) {
++ if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) {
+ switch (reason) {
+ /*
+ * Caveat: in all the following cases we trade latency
+@@ -1936,14 +2361,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ break;
+ case BFQ_BFQQ_BUDGET_TIMEOUT:
+ /*
+- * We double the budget here because: 1) it
+- * gives the chance to boost the throughput if
+- * this is not a seeky process (which may have
+- * bumped into this timeout because of, e.g.,
+- * ZBR), 2) together with charge_full_budget
+- * it helps give seeky processes higher
+- * timestamps, and hence be served less
+- * frequently.
++ * We double the budget here because it gives
++ * the chance to boost the throughput if this
++ * is not a seeky process (and has bumped into
++ * this timeout because of, e.g., ZBR).
+ */
+ budget = min(budget * 2, bfqd->bfq_max_budget);
+ break;
+@@ -1960,17 +2381,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ budget = min(budget * 4, bfqd->bfq_max_budget);
+ break;
+ case BFQ_BFQQ_NO_MORE_REQUESTS:
+- /*
+- * Leave the budget unchanged.
+- */
++ /*
++ * For queues that expire for this reason, it
++ * is particularly important to keep the
++ * budget close to the actual service they
++ * need. Doing so reduces the timestamp
++ * misalignment problem described in the
++ * comments in the body of
++ * __bfq_activate_entity. In fact, suppose
++ * that a queue systematically expires for
++ * BFQ_BFQQ_NO_MORE_REQUESTS and presents a
++ * new request in time to enjoy timestamp
++ * back-shifting. The larger the budget of the
++ * queue is with respect to the service the
++ * queue actually requests in each service
++ * slot, the more times the queue can be
++ * reactivated with the same virtual finish
++ * time. It follows that, even if this finish
++ * time is pushed to the system virtual time
++ * to reduce the consequent timestamp
++ * misalignment, the queue unjustly enjoys for
++ * many re-activations a lower finish time
++ * than all newly activated queues.
++ *
++ * The service needed by bfqq is measured
++ * quite precisely by bfqq->entity.service.
++ * Since bfqq does not enjoy device idling,
++ * bfqq->entity.service is equal to the number
++ * of sectors that the process associated with
++ * bfqq requested to read/write before waiting
++ * for request completions, or blocking for
++ * other reasons.
++ */
++ budget = max_t(int, bfqq->entity.service, min_budget);
++ break;
+ default:
+ return;
+ }
+- } else
++ } else if (!bfq_bfqq_sync(bfqq))
+ /*
+- * Async queues get always the maximum possible budget
+- * (their ability to dispatch is limited by
+- * @bfqd->bfq_max_budget_async_rq).
++ * Async queues get always the maximum possible
++ * budget, as for them we do not care about latency
++ * (in addition, their ability to dispatch is limited
++ * by the charging factor).
+ */
+ budget = bfqd->bfq_max_budget;
+
+@@ -1981,65 +2434,105 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
+
+ /*
+- * Make sure that we have enough budget for the next request.
+- * Since the finish time of the bfqq must be kept in sync with
+- * the budget, be sure to call __bfq_bfqq_expire() after the
++ * If there is still backlog, then assign a new budget, making
++ * sure that it is large enough for the next request. Since
++ * the finish time of bfqq must be kept in sync with the
++ * budget, be sure to call __bfq_bfqq_expire() *after* this
+ * update.
++ *
++ * If there is no backlog, then no need to update the budget;
++ * it will be updated on the arrival of a new request.
+ */
+ next_rq = bfqq->next_rq;
+- if (next_rq)
++ if (next_rq) {
++ BUG_ON(reason == BFQ_BFQQ_TOO_IDLE ||
++ reason == BFQ_BFQQ_NO_MORE_REQUESTS);
+ bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
+ bfq_serv_to_charge(next_rq, bfqq));
+- else
+- bfqq->entity.budget = bfqq->max_budget;
++ BUG_ON(!bfq_bfqq_busy(bfqq));
++ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++ }
+
+ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d",
+ next_rq ? blk_rq_sectors(next_rq) : 0,
+ bfqq->entity.budget);
+ }
+
+-static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
++static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
+ {
+- unsigned long max_budget;
+-
+ /*
+ * The max_budget calculated when autotuning is equal to the
+- * amount of sectors transfered in timeout_sync at the
++ * amount of sectors transfered in timeout at the
+ * estimated peak rate.
+ */
+- max_budget = (unsigned long)(peak_rate * 1000 *
+- timeout >> BFQ_RATE_SHIFT);
+-
+- return max_budget;
++ return bfqd->peak_rate * 1000 * jiffies_to_msecs(bfqd->bfq_timeout) >>
++ BFQ_RATE_SHIFT;
+ }
+
+ /*
+- * In addition to updating the peak rate, checks whether the process
+- * is "slow", and returns 1 if so. This slow flag is used, in addition
+- * to the budget timeout, to reduce the amount of service provided to
+- * seeky processes, and hence reduce their chances to lower the
+- * throughput. See the code for more details.
++ * Update the read peak rate (quantity used for auto-tuning) as a
++ * function of the rate at which bfqq has been served, and check
++ * whether the process associated with bfqq is "slow". Return true if
++ * the process is slow. The slow flag is used, in addition to the
++ * budget timeout, to reduce the amount of service provided to seeky
++ * processes, and hence reduce their chances to lower the
++ * throughput. More details in the body of the function.
++ *
++ * An important observation is in order: with devices with internal
++ * queues, it is hard if ever possible to know when and for how long
++ * an I/O request is processed by the device (apart from the trivial
++ * I/O pattern where a new request is dispatched only after the
++ * previous one has been completed). This makes it hard to evaluate
++ * the real rate at which the I/O requests of each bfq_queue are
++ * served. In fact, for an I/O scheduler like BFQ, serving a
++ * bfq_queue means just dispatching its requests during its service
++ * slot, i.e., until the budget of the queue is exhausted, or the
++ * queue remains idle, or, finally, a timeout fires. But, during the
++ * service slot of a bfq_queue, the device may be still processing
++ * requests of bfq_queues served in previous service slots. On the
++ * opposite end, the requests of the in-service bfq_queue may be
++ * completed after the service slot of the queue finishes. Anyway,
++ * unless more sophisticated solutions are used (where possible), the
++ * sum of the sizes of the requests dispatched during the service slot
++ * of a bfq_queue is probably the only approximation available for
++ * the service received by the bfq_queue during its service slot. And,
++ * as written above, this sum is the quantity used in this function to
++ * evaluate the peak rate.
+ */
+ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+- bool compensate, enum bfqq_expiration reason)
++ bool compensate, enum bfqq_expiration reason,
++ unsigned long *delta_ms)
+ {
+- u64 bw, usecs, expected, timeout;
+- ktime_t delta;
++ u64 bw, bwdiv10, delta_usecs, delta_ms_tmp;
++ ktime_t delta_ktime;
+ int update = 0;
++ bool slow = BFQQ_SEEKY(bfqq); /* if delta too short, use seekyness */
+
+- if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
++ if (!bfq_bfqq_sync(bfqq))
+ return false;
+
+ if (compensate)
+- delta = bfqd->last_idling_start;
++ delta_ktime = bfqd->last_idling_start;
+ else
+- delta = ktime_get();
+- delta = ktime_sub(delta, bfqd->last_budget_start);
+- usecs = ktime_to_us(delta);
++ delta_ktime = ktime_get();
++ delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
++ delta_usecs = ktime_to_us(delta_ktime);
+
+ /* Don't trust short/unrealistic values. */
+- if (usecs < 100 || usecs >= LONG_MAX)
+- return false;
++ if (delta_usecs < 1000 || delta_usecs >= LONG_MAX) {
++ if (blk_queue_nonrot(bfqd->queue))
++ *delta_ms = BFQ_MIN_TT; /* give same worst-case
++ guarantees as
++ idling for seeky
++ */
++ else /* Charge at least one seek */
++ *delta_ms = jiffies_to_msecs(bfq_slice_idle);
++ return slow;
++ }
++
++ delta_ms_tmp = delta_usecs;
++ do_div(delta_ms_tmp, 1000);
++ *delta_ms = delta_ms_tmp;
+
+ /*
+ * Calculate the bandwidth for the last slice. We use a 64 bit
+@@ -2048,32 +2541,51 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ * and to avoid overflows.
+ */
+ bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
+- do_div(bw, (unsigned long)usecs);
+-
+- timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++ do_div(bw, (unsigned long)delta_usecs);
+
++ bfq_log(bfqd, "measured bw = %llu sects/sec",
++ (1000000*bw)>>BFQ_RATE_SHIFT);
+ /*
+ * Use only long (> 20ms) intervals to filter out spikes for
+ * the peak rate estimation.
+ */
+- if (usecs > 20000) {
++ if (delta_usecs > 20000) {
++ bool fully_sequential = bfqq->seek_history == 0;
++ /*
++ * Soft real-time queues are not good candidates for
++ * evaluating bw, as they are likely to be slow even
++ * if sequential.
++ */
++ bool non_soft_rt = bfqq->wr_coeff == 1 ||
++ bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time;
++ bool consumed_large_budget =
++ reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
++ bfqq->entity.budget >= bfqd->bfq_max_budget * 2 / 3;
++ bool served_for_long_time =
++ reason == BFQ_BFQQ_BUDGET_TIMEOUT ||
++ consumed_large_budget;
++
++ BUG_ON(bfqq->seek_history == 0 &&
++ hweight32(bfqq->seek_history) != 0);
++
+ if (bw > bfqd->peak_rate ||
+- (!BFQQ_SEEKY(bfqq) &&
+- reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {
+- bfq_log(bfqd, "measured bw =%llu", bw);
++ (bfq_bfqq_sync(bfqq) && fully_sequential && non_soft_rt &&
++ served_for_long_time)) {
+ /*
+ * To smooth oscillations use a low-pass filter with
+- * alpha=7/8, i.e.,
+- * new_rate = (7/8) * old_rate + (1/8) * bw
++ * alpha=9/10, i.e.,
++ * new_rate = (9/10) * old_rate + (1/10) * bw
+ */
+- do_div(bw, 8);
+- if (bw == 0)
+- return 0;
+- bfqd->peak_rate *= 7;
+- do_div(bfqd->peak_rate, 8);
+- bfqd->peak_rate += bw;
++ bwdiv10 = bw;
++ do_div(bwdiv10, 10);
++ if (bwdiv10 == 0)
++ return false; /* bw too low to be used */
++ bfqd->peak_rate *= 9;
++ do_div(bfqd->peak_rate, 10);
++ bfqd->peak_rate += bwdiv10;
+ update = 1;
+- bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);
++ bfq_log(bfqd, "new peak_rate = %llu sects/sec",
++ (1000000*bfqd->peak_rate)>>BFQ_RATE_SHIFT);
+ }
+
+ update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
+@@ -2086,9 +2598,8 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ int dev_type = blk_queue_nonrot(bfqd->queue);
+ if (bfqd->bfq_user_max_budget == 0) {
+ bfqd->bfq_max_budget =
+- bfq_calc_max_budget(bfqd->peak_rate,
+- timeout);
+- bfq_log(bfqd, "new max_budget=%d",
++ bfq_calc_max_budget(bfqd);
++ bfq_log(bfqd, "new max_budget = %d",
+ bfqd->bfq_max_budget);
+ }
+ if (bfqd->device_speed == BFQ_BFQD_FAST &&
+@@ -2102,38 +2613,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ bfqd->RT_prod = R_fast[dev_type] *
+ T_fast[dev_type];
+ }
++ bfq_log(bfqd, "dev_speed_class = %d (%d sects/sec), "
++ "thresh %d setcs/sec",
++ bfqd->device_speed,
++ bfqd->device_speed == BFQ_BFQD_FAST ?
++ (1000000*R_fast[dev_type])>>BFQ_RATE_SHIFT :
++ (1000000*R_slow[dev_type])>>BFQ_RATE_SHIFT,
++ (1000000*device_speed_thresh[dev_type])>>
++ BFQ_RATE_SHIFT);
+ }
++ /*
++ * Caveat: processes doing IO in the slower disk zones
++ * tend to be slow(er) even if not seeky. In this
++ * respect, the estimated peak rate is likely to be an
++ * average over the disk surface. Accordingly, to not
++ * be too harsh with unlucky processes, a process is
++ * deemed slow only if its bw has been lower than half
++ * of the estimated peak rate.
++ */
++ slow = bw < bfqd->peak_rate / 2;
+ }
+
+- /*
+- * If the process has been served for a too short time
+- * interval to let its possible sequential accesses prevail on
+- * the initial seek time needed to move the disk head on the
+- * first sector it requested, then give the process a chance
+- * and for the moment return false.
+- */
+- if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
+- return false;
+-
+- /*
+- * A process is considered ``slow'' (i.e., seeky, so that we
+- * cannot treat it fairly in the service domain, as it would
+- * slow down too much the other processes) if, when a slice
+- * ends for whatever reason, it has received service at a
+- * rate that would not be high enough to complete the budget
+- * before the budget timeout expiration.
+- */
+- expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;
++ bfq_log_bfqq(bfqd, bfqq,
++ "update_peak_rate: bw %llu sect/s, peak rate %llu, "
++ "slow %d",
++ (1000000*bw)>>BFQ_RATE_SHIFT,
++ (1000000*bfqd->peak_rate)>>BFQ_RATE_SHIFT,
++ bw < bfqd->peak_rate / 2);
+
+- /*
+- * Caveat: processes doing IO in the slower disk zones will
+- * tend to be slow(er) even if not seeky. And the estimated
+- * peak rate will actually be an average over the disk
+- * surface. Hence, to not be too harsh with unlucky processes,
+- * we keep a budget/3 margin of safety before declaring a
+- * process slow.
+- */
+- return expected > (4 * bfqq->entity.budget) / 3;
++ return slow;
+ }
+
+ /*
+@@ -2191,6 +2699,15 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq)
+ {
++ bfq_log_bfqq(bfqd, bfqq,
++ "softrt_next_start: service_blkg %lu "
++ "soft_rate %u sects/sec"
++ "interval %u",
++ bfqq->service_from_backlogged,
++ bfqd->bfq_wr_max_softrt_rate,
++ jiffies_to_msecs(HZ * bfqq->service_from_backlogged /
++ bfqd->bfq_wr_max_softrt_rate));
++
+ return max(bfqq->last_idle_bklogged +
+ HZ * bfqq->service_from_backlogged /
+ bfqd->bfq_wr_max_softrt_rate,
+@@ -2198,13 +2715,21 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+ }
+
+ /*
+- * Return the largest-possible time instant such that, for as long as possible,
+- * the current time will be lower than this time instant according to the macro
+- * time_is_before_jiffies().
++ * Return the farthest future time instant according to jiffies
++ * macros.
++ */
++static unsigned long bfq_greatest_from_now(void)
++{
++ return jiffies + MAX_JIFFY_OFFSET;
++}
++
++/*
++ * Return the farthest past time instant according to jiffies
++ * macros.
+ */
+-static unsigned long bfq_infinity_from_now(unsigned long now)
++static unsigned long bfq_smallest_from_now(void)
+ {
+- return now + ULONG_MAX / 2;
++ return jiffies - MAX_JIFFY_OFFSET;
+ }
+
+ /**
+@@ -2214,28 +2739,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now)
+ * @compensate: if true, compensate for the time spent idling.
+ * @reason: the reason causing the expiration.
+ *
++ * If the process associated with bfqq does slow I/O (e.g., because it
++ * issues random requests), we charge bfqq with the time it has been
++ * in service instead of the service it has received (see
++ * bfq_bfqq_charge_time for details on how this goal is achieved). As
++ * a consequence, bfqq will typically get higher timestamps upon
++ * reactivation, and hence it will be rescheduled as if it had
++ * received more service than what it has actually received. In the
++ * end, bfqq receives less service in proportion to how slowly its
++ * associated process consumes its budgets (and hence how seriously it
++ * tends to lower the throughput). In addition, this time-charging
++ * strategy guarantees time fairness among slow processes. In
++ * contrast, if the process associated with bfqq is not slow, we
++ * charge bfqq exactly with the service it has received.
+ *
+- * If the process associated to the queue is slow (i.e., seeky), or in
+- * case of budget timeout, or, finally, if it is async, we
+- * artificially charge it an entire budget (independently of the
+- * actual service it received). As a consequence, the queue will get
+- * higher timestamps than the correct ones upon reactivation, and
+- * hence it will be rescheduled as if it had received more service
+- * than what it actually received. In the end, this class of processes
+- * will receive less service in proportion to how slowly they consume
+- * their budgets (and hence how seriously they tend to lower the
+- * throughput).
+- *
+- * In contrast, when a queue expires because it has been idling for
+- * too much or because it exhausted its budget, we do not touch the
+- * amount of service it has received. Hence when the queue will be
+- * reactivated and its timestamps updated, the latter will be in sync
+- * with the actual service received by the queue until expiration.
+- *
+- * Charging a full budget to the first type of queues and the exact
+- * service to the others has the effect of using the WF2Q+ policy to
+- * schedule the former on a timeslice basis, without violating the
+- * service domain guarantees of the latter.
++ * Charging time to the first type of queues and the exact service to
++ * the other has the effect of using the WF2Q+ policy to schedule the
++ * former on a timeslice basis, without violating service domain
++ * guarantees among the latter.
+ */
+ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq,
+@@ -2243,40 +2764,51 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ enum bfqq_expiration reason)
+ {
+ bool slow;
++ unsigned long delta = 0;
++ struct bfq_entity *entity = &bfqq->entity;
++
+ BUG_ON(bfqq != bfqd->in_service_queue);
+
+ /*
+- * Update disk peak rate for autotuning and check whether the
++ * Update device peak rate for autotuning and check whether the
+ * process is slow (see bfq_update_peak_rate).
+ */
+- slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
++ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason, &delta);
+
+ /*
+- * As above explained, 'punish' slow (i.e., seeky), timed-out
+- * and async queues, to favor sequential sync workloads.
+- *
+- * Processes doing I/O in the slower disk zones will tend to be
+- * slow(er) even if not seeky. Hence, since the estimated peak
+- * rate is actually an average over the disk surface, these
+- * processes may timeout just for bad luck. To avoid punishing
+- * them we do not charge a full budget to a process that
+- * succeeded in consuming at least 2/3 of its budget.
++ * Increase service_from_backlogged before next statement,
++ * because the possible next invocation of
++ * bfq_bfqq_charge_time would likely inflate
++ * entity->service. In contrast, service_from_backlogged must
++ * contain real service, to enable the soft real-time
++ * heuristic to correctly compute the bandwidth consumed by
++ * bfqq.
+ */
+- if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+- bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3))
+- bfq_bfqq_charge_full_budget(bfqq);
++ bfqq->service_from_backlogged += entity->service;
+
+- bfqq->service_from_backlogged += bfqq->entity.service;
+-
+- if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+- !bfq_bfqq_constantly_seeky(bfqq)) {
+- bfq_mark_bfqq_constantly_seeky(bfqq);
+- if (!blk_queue_nonrot(bfqd->queue))
+- bfqd->const_seeky_busy_in_flight_queues++;
+- }
++ /*
++ * As above explained, charge slow (typically seeky) and
++ * timed-out queues with the time and not the service
++ * received, to favor sequential workloads.
++ *
++ * Processes doing I/O in the slower disk zones will tend to
++ * be slow(er) even if not seeky. Therefore, since the
++ * estimated peak rate is actually an average over the disk
++ * surface, these processes may timeout just for bad luck. To
++ * avoid punishing them, do not charge time to processes that
++ * succeeded in consuming at least 2/3 of their budget. This
++ * allows BFQ to preserve enough elasticity to still perform
++ * bandwidth, and not time, distribution with little unlucky
++ * or quasi-sequential processes.
++ */
++ if (bfqq->wr_coeff == 1 &&
++ (slow ||
++ (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
++ bfq_bfqq_budget_left(bfqq) >= entity->budget / 3)))
++ bfq_bfqq_charge_time(bfqd, bfqq, delta);
+
+ if (reason == BFQ_BFQQ_TOO_IDLE &&
+- bfqq->entity.service <= 2 * bfqq->entity.budget / 10 )
++ entity->service <= 2 * entity->budget / 10 )
+ bfq_clear_bfqq_IO_bound(bfqq);
+
+ if (bfqd->low_latency && bfqq->wr_coeff == 1)
+@@ -2285,19 +2817,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
+ RB_EMPTY_ROOT(&bfqq->sort_list)) {
+ /*
+- * If we get here, and there are no outstanding requests,
+- * then the request pattern is isochronous (see the comments
+- * to the function bfq_bfqq_softrt_next_start()). Hence we
+- * can compute soft_rt_next_start. If, instead, the queue
+- * still has outstanding requests, then we have to wait
+- * for the completion of all the outstanding requests to
++ * If we get here, and there are no outstanding
++ * requests, then the request pattern is isochronous
++ * (see the comments on the function
++ * bfq_bfqq_softrt_next_start()). Thus we can compute
++ * soft_rt_next_start. If, instead, the queue still
++ * has outstanding requests, then we have to wait for
++ * the completion of all the outstanding requests to
+ * discover whether the request pattern is actually
+ * isochronous.
+ */
+- if (bfqq->dispatched == 0)
++ BUG_ON(bfqd->busy_queues < 1);
++ if (bfqq->dispatched == 0) {
+ bfqq->soft_rt_next_start =
+ bfq_bfqq_softrt_next_start(bfqd, bfqq);
+- else {
++ bfq_log_bfqq(bfqd, bfqq, "new soft_rt_next %lu",
++ bfqq->soft_rt_next_start);
++ } else {
+ /*
+ * The application is still waiting for the
+ * completion of one or more requests:
+@@ -2314,7 +2850,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ * happened to be in the past.
+ */
+ bfqq->soft_rt_next_start =
+- bfq_infinity_from_now(jiffies);
++ bfq_greatest_from_now();
+ /*
+ * Schedule an update of soft_rt_next_start to when
+ * the task may be discovered to be isochronous.
+@@ -2324,8 +2860,9 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ }
+
+ bfq_log_bfqq(bfqd, bfqq,
+- "expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
+- slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
++ "expire (%d, slow %d, num_disp %d, idle_win %d, weight %d)",
++ reason, slow, bfqq->dispatched,
++ bfq_bfqq_idle_window(bfqq), entity->weight);
+
+ /*
+ * Increase, decrease or leave budget unchanged according to
+@@ -2333,6 +2870,14 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ */
+ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
+ __bfq_bfqq_expire(bfqd, bfqq);
++
++ BUG_ON(!bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
++ !bfq_class_idle(bfqq));
++
++ if (!bfq_bfqq_busy(bfqq) &&
++ reason != BFQ_BFQQ_BUDGET_TIMEOUT &&
++ reason != BFQ_BFQQ_BUDGET_EXHAUSTED)
++ bfq_mark_bfqq_non_blocking_wait_rq(bfqq);
+ }
+
+ /*
+@@ -2342,20 +2887,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ */
+ static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
+ {
+- if (bfq_bfqq_budget_new(bfqq) ||
+- time_before(jiffies, bfqq->budget_timeout))
+- return false;
+- return true;
++ return time_is_before_eq_jiffies(bfqq->budget_timeout);
+ }
+
+ /*
+- * If we expire a queue that is waiting for the arrival of a new
+- * request, we may prevent the fictitious timestamp back-shifting that
+- * allows the guarantees of the queue to be preserved (see [1] for
+- * this tricky aspect). Hence we return true only if this condition
+- * does not hold, or if the queue is slow enough to deserve only to be
+- * kicked off for preserving a high throughput.
+-*/
++ * If we expire a queue that is actively waiting (i.e., with the
++ * device idled) for the arrival of a new request, then we may incur
++ * the timestamp misalignment problem described in the body of the
++ * function __bfq_activate_entity. Hence we return true only if this
++ * condition does not hold, or if the queue is slow enough to deserve
++ * only to be kicked off for preserving a high throughput.
++ */
+ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
+ {
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+@@ -2397,10 +2939,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ {
+ struct bfq_data *bfqd = bfqq->bfqd;
+ bool idling_boosts_thr, idling_boosts_thr_without_issues,
+- all_queues_seeky, on_hdd_and_not_all_queues_seeky,
+ idling_needed_for_service_guarantees,
+ asymmetric_scenario;
+
++ if (bfqd->strict_guarantees)
++ return true;
++
+ /*
+ * The next variable takes into account the cases where idling
+ * boosts the throughput.
+@@ -2422,7 +2966,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ */
+ idling_boosts_thr = !bfqd->hw_tag ||
+ (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
+- bfq_bfqq_idle_window(bfqq)) ;
++ bfq_bfqq_idle_window(bfqq));
+
+ /*
+ * The value of the next variable,
+@@ -2463,74 +3007,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ bfqd->wr_busy_queues == 0;
+
+ /*
+- * There are then two cases where idling must be performed not
++ * There is then a case where idling must be performed not
+ * for throughput concerns, but to preserve service
+- * guarantees. In the description of these cases, we say, for
+- * short, that a queue is sequential/random if the process
+- * associated to the queue issues sequential/random requests
+- * (in the second case the queue may be tagged as seeky or
+- * even constantly_seeky).
+- *
+- * To introduce the first case, we note that, since
+- * bfq_bfqq_idle_window(bfqq) is false if the device is
+- * NCQ-capable and bfqq is random (see
+- * bfq_update_idle_window()), then, from the above two
+- * assignments it follows that
+- * idling_boosts_thr_without_issues is false if the device is
+- * NCQ-capable and bfqq is random. Therefore, for this case,
+- * device idling would never be allowed if we used just
+- * idling_boosts_thr_without_issues to decide whether to allow
+- * it. And, beneficially, this would imply that throughput
+- * would always be boosted also with random I/O on NCQ-capable
+- * HDDs.
+- *
+- * But we must be careful on this point, to avoid an unfair
+- * treatment for bfqq. In fact, because of the same above
+- * assignments, idling_boosts_thr_without_issues is, on the
+- * other hand, true if 1) the device is an HDD and bfqq is
+- * sequential, and 2) there are no busy weight-raised
+- * queues. As a consequence, if we used just
+- * idling_boosts_thr_without_issues to decide whether to idle
+- * the device, then with an HDD we might easily bump into a
+- * scenario where queues that are sequential and I/O-bound
+- * would enjoy idling, whereas random queues would not. The
+- * latter might then get a low share of the device throughput,
+- * simply because the former would get many requests served
+- * after being set as in service, while the latter would not.
++ * guarantees.
+ *
+- * To address this issue, we start by setting to true a
+- * sentinel variable, on_hdd_and_not_all_queues_seeky, if the
+- * device is rotational and not all queues with pending or
+- * in-flight requests are constantly seeky (i.e., there are
+- * active sequential queues, and bfqq might then be mistreated
+- * if it does not enjoy idling because it is random).
+- */
+- all_queues_seeky = bfq_bfqq_constantly_seeky(bfqq) &&
+- bfqd->busy_in_flight_queues ==
+- bfqd->const_seeky_busy_in_flight_queues;
+-
+- on_hdd_and_not_all_queues_seeky =
+- !blk_queue_nonrot(bfqd->queue) && !all_queues_seeky;
+-
+- /*
+- * To introduce the second case where idling needs to be
+- * performed to preserve service guarantees, we can note that
+- * allowing the drive to enqueue more than one request at a
+- * time, and hence delegating de facto final scheduling
+- * decisions to the drive's internal scheduler, causes loss of
+- * control on the actual request service order. In particular,
+- * the critical situation is when requests from different
+- * processes happens to be present, at the same time, in the
+- * internal queue(s) of the drive. In such a situation, the
+- * drive, by deciding the service order of the
+- * internally-queued requests, does determine also the actual
+- * throughput distribution among these processes. But the
+- * drive typically has no notion or concern about per-process
+- * throughput distribution, and makes its decisions only on a
+- * per-request basis. Therefore, the service distribution
+- * enforced by the drive's internal scheduler is likely to
+- * coincide with the desired device-throughput distribution
+- * only in a completely symmetric scenario where:
++ * To introduce this case, we can note that allowing the drive
++ * to enqueue more than one request at a time, and hence
++ * delegating de facto final scheduling decisions to the
++ * drive's internal scheduler, entails loss of control on the
++ * actual request service order. In particular, the critical
++ * situation is when requests from different processes happen
++ * to be present, at the same time, in the internal queue(s)
++ * of the drive. In such a situation, the drive, by deciding
++ * the service order of the internally-queued requests, does
++ * determine also the actual throughput distribution among
++ * these processes. But the drive typically has no notion or
++ * concern about per-process throughput distribution, and
++ * makes its decisions only on a per-request basis. Therefore,
++ * the service distribution enforced by the drive's internal
++ * scheduler is likely to coincide with the desired
++ * device-throughput distribution only in a completely
++ * symmetric scenario where:
+ * (i) each of these processes must get the same throughput as
+ * the others;
+ * (ii) all these processes have the same I/O pattern
+@@ -2552,26 +3049,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ * words, only if sub-condition (i) holds, then idling is
+ * allowed, and the device tends to be prevented from queueing
+ * many requests, possibly of several processes. The reason
+- * for not controlling also sub-condition (ii) is that, first,
+- * in the case of an HDD, the asymmetry in terms of types of
+- * I/O patterns is already taken in to account in the above
+- * sentinel variable
+- * on_hdd_and_not_all_queues_seeky. Secondly, in the case of a
+- * flash-based device, we prefer however to privilege
+- * throughput (and idling lowers throughput for this type of
+- * devices), for the following reasons:
+- * 1) differently from HDDs, the service time of random
+- * requests is not orders of magnitudes lower than the service
+- * time of sequential requests; thus, even if processes doing
+- * sequential I/O get a preferential treatment with respect to
+- * others doing random I/O, the consequences are not as
+- * dramatic as with HDDs;
+- * 2) if a process doing random I/O does need strong
+- * throughput guarantees, it is hopefully already being
+- * weight-raised, or the user is likely to have assigned it a
+- * higher weight than the other processes (and thus
+- * sub-condition (i) is likely to be false, which triggers
+- * idling).
++ * for not controlling also sub-condition (ii) is that we
++ * exploit preemption to preserve guarantees in case of
++ * symmetric scenarios, even if (ii) does not hold, as
++ * explained in the next two paragraphs.
++ *
++ * Even if a queue, say Q, is expired when it remains idle, Q
++ * can still preempt the new in-service queue if the next
++ * request of Q arrives soon (see the comments on
++ * bfq_bfqq_update_budg_for_activation). If all queues and
++ * groups have the same weight, this form of preemption,
++ * combined with the hole-recovery heuristic described in the
++ * comments on function bfq_bfqq_update_budg_for_activation,
++ * are enough to preserve a correct bandwidth distribution in
++ * the mid term, even without idling. In fact, even if not
++ * idling allows the internal queues of the device to contain
++ * many requests, and thus to reorder requests, we can rather
++ * safely assume that the internal scheduler still preserves a
++ * minimum of mid-term fairness. The motivation for using
++ * preemption instead of idling is that, by not idling,
++ * service guarantees are preserved without minimally
++ * sacrificing throughput. In other words, both a high
++ * throughput and its desired distribution are obtained.
++ *
++ * More precisely, this preemption-based, idleless approach
++ * provides fairness in terms of IOPS, and not sectors per
++ * second. This can be seen with a simple example. Suppose
++ * that there are two queues with the same weight, but that
++ * the first queue receives requests of 8 sectors, while the
++ * second queue receives requests of 1024 sectors. In
++ * addition, suppose that each of the two queues contains at
++ * most one request at a time, which implies that each queue
++ * always remains idle after it is served. Finally, after
++ * remaining idle, each queue receives very quickly a new
++ * request. It follows that the two queues are served
++ * alternatively, preempting each other if needed. This
++ * implies that, although both queues have the same weight,
++ * the queue with large requests receives a service that is
++ * 1024/8 times as high as the service received by the other
++ * queue.
++ *
++ * On the other hand, device idling is performed, and thus
++ * pure sector-domain guarantees are provided, for the
++ * following queues, which are likely to need stronger
++ * throughput guarantees: weight-raised queues, and queues
++ * with a higher weight than other queues. When such queues
++ * are active, sub-condition (i) is false, which triggers
++ * device idling.
+ *
+ * According to the above considerations, the next variable is
+ * true (only) if sub-condition (i) holds. To compute the
+@@ -2579,7 +3103,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ * the function bfq_symmetric_scenario(), but also check
+ * whether bfqq is being weight-raised, because
+ * bfq_symmetric_scenario() does not take into account also
+- * weight-raised queues (see comments to
++ * weight-raised queues (see comments on
+ * bfq_weights_tree_add()).
+ *
+ * As a side note, it is worth considering that the above
+@@ -2601,17 +3125,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ * bfqq. Such a case is when bfqq became active in a burst of
+ * queue activations. Queues that became active during a large
+ * burst benefit only from throughput, as discussed in the
+- * comments to bfq_handle_burst. Thus, if bfqq became active
++ * comments on bfq_handle_burst. Thus, if bfqq became active
+ * in a burst and not idling the device maximizes throughput,
+ * then the device must no be idled, because not idling the
+ * device provides bfqq and all other queues in the burst with
+- * maximum benefit. Combining this and the two cases above, we
+- * can now establish when idling is actually needed to
+- * preserve service guarantees.
++ * maximum benefit. Combining this and the above case, we can
++ * now establish when idling is actually needed to preserve
++ * service guarantees.
+ */
+ idling_needed_for_service_guarantees =
+- (on_hdd_and_not_all_queues_seeky || asymmetric_scenario) &&
+- !bfq_bfqq_in_large_burst(bfqq);
++ asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
+
+ /*
+ * We have now all the components we need to compute the return
+@@ -2621,6 +3144,14 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ * 2) idling either boosts the throughput (without issues), or
+ * is necessary to preserve service guarantees.
+ */
++ bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d "
++ "wr_busy %d boosts %d IO-bound %d guar %d",
++ bfq_bfqq_sync(bfqq), idling_boosts_thr,
++ bfqd->wr_busy_queues,
++ idling_boosts_thr_without_issues,
++ bfq_bfqq_IO_bound(bfqq),
++ idling_needed_for_service_guarantees);
++
+ return bfq_bfqq_sync(bfqq) &&
+ (idling_boosts_thr_without_issues ||
+ idling_needed_for_service_guarantees);
+@@ -2632,7 +3163,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ * 1) the queue must remain in service and cannot be expired, and
+ * 2) the device must be idled to wait for the possible arrival of a new
+ * request for the queue.
+- * See the comments to the function bfq_bfqq_may_idle for the reasons
++ * See the comments on the function bfq_bfqq_may_idle for the reasons
+ * why performing device idling is the best choice to boost the throughput
+ * and preserve service guarantees when bfq_bfqq_may_idle itself
+ * returns true.
+@@ -2698,9 +3229,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+ */
+ bfq_clear_bfqq_wait_request(bfqq);
+ del_timer(&bfqd->idle_slice_timer);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_idle_time(bfqq_group(bfqq));
+-#endif
+ }
+ goto keep_queue;
+ }
+@@ -2745,14 +3274,11 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
+
+ /*
+- * If the queue was activated in a burst, or
+- * too much time has elapsed from the beginning
+- * of this weight-raising period, or the queue has
+- * exceeded the acceptable number of cooperations,
+- * then end weight raising.
++ * If the queue was activated in a burst, or too much
++ * time has elapsed from the beginning of this
++ * weight-raising period, then end weight raising.
+ */
+ if (bfq_bfqq_in_large_burst(bfqq) ||
+- bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh ||
+ time_is_before_jiffies(bfqq->last_wr_start_finish +
+ bfqq->wr_cur_max_time)) {
+ bfqq->last_wr_start_finish = jiffies;
+@@ -2814,10 +3340,25 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+ goto expire;
+ }
+
++ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+ /* Finally, insert request into driver dispatch list. */
+ bfq_bfqq_served(bfqq, service_to_charge);
++
++ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
++
+ bfq_dispatch_insert(bfqd->queue, rq);
+
++ /*
++ * If weight raising has to terminate for bfqq, then next
++ * function causes an immediate update of bfqq's weight,
++ * without waiting for next activation. As a consequence, on
++ * expiration, bfqq will be timestamped as if has never been
++ * weight-raised during this service slot, even if it has
++ * received part or even most of the service as a
++ * weight-raised queue. This inflates bfqq's timestamps, which
++ * is beneficial, as bfqq is then more willing to leave the
++ * device immediately to possible other weight-raised queues.
++ */
+ bfq_update_wr_data(bfqd, bfqq);
+
+ bfq_log_bfqq(bfqd, bfqq,
+@@ -2833,9 +3374,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+ bfqd->in_service_bic = RQ_BIC(rq);
+ }
+
+- if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&
+- dispatched >= bfqd->bfq_max_budget_async_rq) ||
+- bfq_class_idle(bfqq)))
++ if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq))
+ goto expire;
+
+ return dispatched;
+@@ -2881,8 +3420,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd)
+ st = bfq_entity_service_tree(&bfqq->entity);
+
+ dispatched += __bfq_forced_dispatch_bfqq(bfqq);
+- bfqq->max_budget = bfq_max_budget(bfqd);
+
++ bfqq->max_budget = bfq_max_budget(bfqd);
+ bfq_forget_idle(st);
+ }
+
+@@ -2895,9 +3434,9 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+ {
+ struct bfq_data *bfqd = q->elevator->elevator_data;
+ struct bfq_queue *bfqq;
+- int max_dispatch;
+
+ bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
++
+ if (bfqd->busy_queues == 0)
+ return 0;
+
+@@ -2908,21 +3447,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+ if (!bfqq)
+ return 0;
+
+- if (bfq_class_idle(bfqq))
+- max_dispatch = 1;
+-
+- if (!bfq_bfqq_sync(bfqq))
+- max_dispatch = bfqd->bfq_max_budget_async_rq;
+-
+- if (!bfq_bfqq_sync(bfqq) && bfqq->dispatched >= max_dispatch) {
+- if (bfqd->busy_queues > 1)
+- return 0;
+- if (bfqq->dispatched >= 4 * max_dispatch)
+- return 0;
+- }
+-
+- if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))
+- return 0;
++ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
+ bfq_clear_bfqq_wait_request(bfqq);
+ BUG_ON(timer_pending(&bfqd->idle_slice_timer));
+@@ -2933,6 +3458,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+ bfq_log_bfqq(bfqd, bfqq, "dispatched %s request",
+ bfq_bfqq_sync(bfqq) ? "sync" : "async");
+
++ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+ return 1;
+ }
+
+@@ -2949,11 +3475,11 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+ struct bfq_group *bfqg = bfqq_group(bfqq);
+ #endif
+
+- BUG_ON(atomic_read(&bfqq->ref) <= 0);
++ BUG_ON(bfqq->ref <= 0);
+
+- bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
+- atomic_read(&bfqq->ref));
+- if (!atomic_dec_and_test(&bfqq->ref))
++ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref);
++ bfqq->ref--;
++ if (bfqq->ref)
+ return;
+
+ BUG_ON(rb_first(&bfqq->sort_list));
+@@ -3007,8 +3533,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ bfq_schedule_dispatch(bfqd);
+ }
+
+- bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
+- atomic_read(&bfqq->ref));
++ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref);
+
+ bfq_put_cooperator(bfqq);
+
+@@ -3019,26 +3544,7 @@ static void bfq_init_icq(struct io_cq *icq)
+ {
+ struct bfq_io_cq *bic = icq_to_bic(icq);
+
+- bic->ttime.last_end_request = jiffies;
+- /*
+- * A newly created bic indicates that the process has just
+- * started doing I/O, and is probably mapping into memory its
+- * executable and libraries: it definitely needs weight raising.
+- * There is however the possibility that the process performs,
+- * for a while, I/O close to some other process. EQM intercepts
+- * this behavior and may merge the queue corresponding to the
+- * process with some other queue, BEFORE the weight of the queue
+- * is raised. Merged queues are not weight-raised (they are assumed
+- * to belong to processes that benefit only from high throughput).
+- * If the merge is basically the consequence of an accident, then
+- * the queue will be split soon and will get back its old weight.
+- * It is then important to write down somewhere that this queue
+- * does need weight raising, even if it did not make it to get its
+- * weight raised before being merged. To this purpose, we overload
+- * the field raising_time_left and assign 1 to it, to mark the queue
+- * as needing weight raising.
+- */
+- bic->wr_time_left = 1;
++ bic->ttime.last_end_request = bfq_smallest_from_now();
+ }
+
+ static void bfq_exit_icq(struct io_cq *icq)
+@@ -3046,21 +3552,21 @@ static void bfq_exit_icq(struct io_cq *icq)
+ struct bfq_io_cq *bic = icq_to_bic(icq);
+ struct bfq_data *bfqd = bic_to_bfqd(bic);
+
+- if (bic->bfqq[BLK_RW_ASYNC]) {
+- bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);
+- bic->bfqq[BLK_RW_ASYNC] = NULL;
++ if (bic_to_bfqq(bic, false)) {
++ bfq_exit_bfqq(bfqd, bic_to_bfqq(bic, false));
++ bic_set_bfqq(bic, NULL, false);
+ }
+
+- if (bic->bfqq[BLK_RW_SYNC]) {
++ if (bic_to_bfqq(bic, true)) {
+ /*
+ * If the bic is using a shared queue, put the reference
+ * taken on the io_context when the bic started using a
+ * shared bfq_queue.
+ */
+- if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))
++ if (bfq_bfqq_coop(bic_to_bfqq(bic, true)))
+ put_io_context(icq->ioc);
+- bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
+- bic->bfqq[BLK_RW_SYNC] = NULL;
++ bfq_exit_bfqq(bfqd, bic_to_bfqq(bic, true));
++ bic_set_bfqq(bic, NULL, true);
+ }
+ }
+
+@@ -3068,7 +3574,8 @@ static void bfq_exit_icq(struct io_cq *icq)
+ * Update the entity prio values; note that the new values will not
+ * be used until the next (re)activation.
+ */
+-static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
++static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq,
++ struct bfq_io_cq *bic)
+ {
+ struct task_struct *tsk = current;
+ int ioprio_class;
+@@ -3100,7 +3607,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
+ break;
+ }
+
+- if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) {
++ if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
+ printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
+ bfqq->new_ioprio);
+ BUG();
+@@ -3108,45 +3615,40 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
+
+ bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
+ bfqq->entity.prio_changed = 1;
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "set_next_ioprio_data: bic_class %d prio %d class %d",
++ ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class);
+ }
+
+ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
+ {
+- struct bfq_data *bfqd;
+- struct bfq_queue *bfqq, *new_bfqq;
++ struct bfq_data *bfqd = bic_to_bfqd(bic);
++ struct bfq_queue *bfqq;
+ unsigned long uninitialized_var(flags);
+ int ioprio = bic->icq.ioc->ioprio;
+
+- bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
+- &flags);
+ /*
+ * This condition may trigger on a newly created bic, be sure to
+ * drop the lock before returning.
+ */
+ if (unlikely(!bfqd) || likely(bic->ioprio == ioprio))
+- goto out;
++ return;
+
+ bic->ioprio = ioprio;
+
+- bfqq = bic->bfqq[BLK_RW_ASYNC];
++ bfqq = bic_to_bfqq(bic, false);
+ if (bfqq) {
+- new_bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic,
+- GFP_ATOMIC);
+- if (new_bfqq) {
+- bic->bfqq[BLK_RW_ASYNC] = new_bfqq;
+- bfq_log_bfqq(bfqd, bfqq,
+- "check_ioprio_change: bfqq %p %d",
+- bfqq, atomic_read(&bfqq->ref));
+- bfq_put_queue(bfqq);
+- }
++ bfq_put_queue(bfqq);
++ bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
++ bic_set_bfqq(bic, bfqq, false);
++ bfq_log_bfqq(bfqd, bfqq,
++ "check_ioprio_change: bfqq %p %d",
++ bfqq, bfqq->ref);
+ }
+
+- bfqq = bic->bfqq[BLK_RW_SYNC];
++ bfqq = bic_to_bfqq(bic, true);
+ if (bfqq)
+ bfq_set_next_ioprio_data(bfqq, bic);
+-
+-out:
+- bfq_put_bfqd_unlock(bfqd, &flags);
+ }
+
+ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3155,8 +3657,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ RB_CLEAR_NODE(&bfqq->entity.rb_node);
+ INIT_LIST_HEAD(&bfqq->fifo);
+ INIT_HLIST_NODE(&bfqq->burst_list_node);
++ BUG_ON(!hlist_unhashed(&bfqq->burst_list_node));
+
+- atomic_set(&bfqq->ref, 0);
++ bfqq->ref = 0;
+ bfqq->bfqd = bfqd;
+
+ if (bic)
+@@ -3166,6 +3669,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ if (!bfq_class_idle(bfqq))
+ bfq_mark_bfqq_idle_window(bfqq);
+ bfq_mark_bfqq_sync(bfqq);
++ bfq_mark_bfqq_just_created(bfqq);
+ } else
+ bfq_clear_bfqq_sync(bfqq);
+ bfq_mark_bfqq_IO_bound(bfqq);
+@@ -3175,72 +3679,17 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ bfqq->pid = pid;
+
+ bfqq->wr_coeff = 1;
+- bfqq->last_wr_start_finish = 0;
++ bfqq->last_wr_start_finish = bfq_smallest_from_now();
++ bfqq->budget_timeout = bfq_smallest_from_now();
++ bfqq->split_time = bfq_smallest_from_now();
+ /*
+ * Set to the value for which bfqq will not be deemed as
+ * soft rt when it becomes backlogged.
+ */
+- bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies);
+-}
+-
+-static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
+- struct bio *bio, int is_sync,
+- struct bfq_io_cq *bic,
+- gfp_t gfp_mask)
+-{
+- struct bfq_group *bfqg;
+- struct bfq_queue *bfqq, *new_bfqq = NULL;
+- struct blkcg *blkcg;
+-
+-retry:
+- rcu_read_lock();
+-
+- blkcg = bio_blkcg(bio);
+- bfqg = bfq_find_alloc_group(bfqd, blkcg);
+- /* bic always exists here */
+- bfqq = bic_to_bfqq(bic, is_sync);
+-
+- /*
+- * Always try a new alloc if we fall back to the OOM bfqq
+- * originally, since it should just be a temporary situation.
+- */
+- if (!bfqq || bfqq == &bfqd->oom_bfqq) {
+- bfqq = NULL;
+- if (new_bfqq) {
+- bfqq = new_bfqq;
+- new_bfqq = NULL;
+- } else if (gfpflags_allow_blocking(gfp_mask)) {
+- rcu_read_unlock();
+- spin_unlock_irq(bfqd->queue->queue_lock);
+- new_bfqq = kmem_cache_alloc_node(bfq_pool,
+- gfp_mask | __GFP_ZERO,
+- bfqd->queue->node);
+- spin_lock_irq(bfqd->queue->queue_lock);
+- if (new_bfqq)
+- goto retry;
+- } else {
+- bfqq = kmem_cache_alloc_node(bfq_pool,
+- gfp_mask | __GFP_ZERO,
+- bfqd->queue->node);
+- }
+-
+- if (bfqq) {
+- bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
+- is_sync);
+- bfq_init_entity(&bfqq->entity, bfqg);
+- bfq_log_bfqq(bfqd, bfqq, "allocated");
+- } else {
+- bfqq = &bfqd->oom_bfqq;
+- bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
+- }
+- }
+-
+- if (new_bfqq)
+- kmem_cache_free(bfq_pool, new_bfqq);
++ bfqq->soft_rt_next_start = bfq_greatest_from_now();
+
+- rcu_read_unlock();
+-
+- return bfqq;
++ /* first request is almost certainly seeky */
++ bfqq->seek_history = 1;
+ }
+
+ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+@@ -3263,44 +3712,56 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+ }
+
+ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+- struct bio *bio, int is_sync,
+- struct bfq_io_cq *bic, gfp_t gfp_mask)
++ struct bio *bio, bool is_sync,
++ struct bfq_io_cq *bic)
+ {
+ const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+ const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+ struct bfq_queue **async_bfqq = NULL;
+- struct bfq_queue *bfqq = NULL;
++ struct bfq_queue *bfqq;
++ struct bfq_group *bfqg;
+
+- if (!is_sync) {
+- struct blkcg *blkcg;
+- struct bfq_group *bfqg;
++ rcu_read_lock();
+
+- rcu_read_lock();
+- blkcg = bio_blkcg(bio);
+- rcu_read_unlock();
+- bfqg = bfq_find_alloc_group(bfqd, blkcg);
++ bfqg = bfq_find_alloc_group(bfqd,bio_blkcg(bio));
++
++ if (!is_sync) {
+ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
+ ioprio);
+ bfqq = *async_bfqq;
++ if (bfqq)
++ goto out;
+ }
+
+- if (!bfqq)
+- bfqq = bfq_find_alloc_queue(bfqd, bio, is_sync, bic, gfp_mask);
++ bfqq = kmem_cache_alloc_node(bfq_pool, GFP_NOWAIT | __GFP_ZERO,
++ bfqd->queue->node);
++
++ if (bfqq) {
++ bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
++ is_sync);
++ bfq_init_entity(&bfqq->entity, bfqg);
++ bfq_log_bfqq(bfqd, bfqq, "allocated");
++ } else {
++ bfqq = &bfqd->oom_bfqq;
++ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
++ goto out;
++ }
+
+ /*
+ * Pin the queue now that it's allocated, scheduler exit will
+ * prune it.
+ */
+- if (!is_sync && !(*async_bfqq)) {
+- atomic_inc(&bfqq->ref);
++ if (async_bfqq) {
++ bfqq->ref++;
+ bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
+- bfqq, atomic_read(&bfqq->ref));
++ bfqq, bfqq->ref);
+ *async_bfqq = bfqq;
+ }
+
+- atomic_inc(&bfqq->ref);
+- bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,
+- atomic_read(&bfqq->ref));
++out:
++ bfqq->ref++;
++ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref);
++ rcu_read_unlock();
+ return bfqq;
+ }
+
+@@ -3316,37 +3777,21 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
+ bic->ttime.ttime_samples;
+ }
+
+-static void bfq_update_io_seektime(struct bfq_data *bfqd,
+- struct bfq_queue *bfqq,
+- struct request *rq)
+-{
+- sector_t sdist;
+- u64 total;
+-
+- if (bfqq->last_request_pos < blk_rq_pos(rq))
+- sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
+- else
+- sdist = bfqq->last_request_pos - blk_rq_pos(rq);
+-
+- /*
+- * Don't allow the seek distance to get too large from the
+- * odd fragment, pagein, etc.
+- */
+- if (bfqq->seek_samples == 0) /* first request, not really a seek */
+- sdist = 0;
+- else if (bfqq->seek_samples <= 60) /* second & third seek */
+- sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);
+- else
+- sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);
+
+- bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;
+- bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;
+- total = bfqq->seek_total + (bfqq->seek_samples/2);
+- do_div(total, bfqq->seek_samples);
+- bfqq->seek_mean = (sector_t)total;
++static void
++bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ struct request *rq)
++{
++ sector_t sdist = 0;
++ if (bfqq->last_request_pos) {
++ if (bfqq->last_request_pos < blk_rq_pos(rq))
++ sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
++ else
++ sdist = bfqq->last_request_pos - blk_rq_pos(rq);
++ }
+
+- bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,
+- (u64)bfqq->seek_mean);
++ bfqq->seek_history <<= 1;
++ bfqq->seek_history |= (sdist > BFQQ_SEEK_THR);
+ }
+
+ /*
+@@ -3364,7 +3809,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
+ return;
+
+ /* Idle window just restored, statistics are meaningless. */
+- if (bfq_bfqq_just_split(bfqq))
++ if (time_is_after_eq_jiffies(bfqq->split_time +
++ bfqd->bfq_wr_min_idle_time))
+ return;
+
+ enable_idle = bfq_bfqq_idle_window(bfqq);
+@@ -3404,22 +3850,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+
+ bfq_update_io_thinktime(bfqd, bic);
+ bfq_update_io_seektime(bfqd, bfqq, rq);
+- if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) {
+- bfq_clear_bfqq_constantly_seeky(bfqq);
+- if (!blk_queue_nonrot(bfqd->queue)) {
+- BUG_ON(!bfqd->const_seeky_busy_in_flight_queues);
+- bfqd->const_seeky_busy_in_flight_queues--;
+- }
+- }
+ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
+ !BFQQ_SEEKY(bfqq))
+ bfq_update_idle_window(bfqd, bfqq, bic);
+- bfq_clear_bfqq_just_split(bfqq);
+
+ bfq_log_bfqq(bfqd, bfqq,
+- "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
+- bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),
+- (long long unsigned)bfqq->seek_mean);
++ "rq_enqueued: idle_window=%d (seeky %d)",
++ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq));
+
+ bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+
+@@ -3433,14 +3870,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ * is small and the queue is not to be expired, then
+ * just exit.
+ *
+- * In this way, if the disk is being idled to wait for
+- * a new request from the in-service queue, we avoid
+- * unplugging the device and committing the disk to serve
+- * just a small request. On the contrary, we wait for
+- * the block layer to decide when to unplug the device:
+- * hopefully, new requests will be merged to this one
+- * quickly, then the device will be unplugged and
+- * larger requests will be dispatched.
++ * In this way, if the device is being idled to wait
++ * for a new request from the in-service queue, we
++ * avoid unplugging the device and committing the
++ * device to serve just a small request. On the
++ * contrary, we wait for the block layer to decide
++ * when to unplug the device: hopefully, new requests
++ * will be merged to this one quickly, then the device
++ * will be unplugged and larger requests will be
++ * dispatched.
+ */
+ if (small_req && !budget_timeout)
+ return;
+@@ -3453,9 +3891,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ */
+ bfq_clear_bfqq_wait_request(bfqq);
+ del_timer(&bfqd->idle_slice_timer);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_idle_time(bfqq_group(bfqq));
+-#endif
+
+ /*
+ * The queue is not empty, because a new request just
+@@ -3499,27 +3935,19 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+ */
+ new_bfqq->allocated[rq_data_dir(rq)]++;
+ bfqq->allocated[rq_data_dir(rq)]--;
+- atomic_inc(&new_bfqq->ref);
++ new_bfqq->ref++;
++ bfq_clear_bfqq_just_created(bfqq);
+ bfq_put_queue(bfqq);
+ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
+ bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
+ bfqq, new_bfqq);
+ rq->elv.priv[1] = new_bfqq;
+ bfqq = new_bfqq;
+- } else
+- bfq_bfqq_increase_failed_cooperations(bfqq);
++ }
+ }
+
+ bfq_add_request(rq);
+
+- /*
+- * Here a newly-created bfq_queue has already started a weight-raising
+- * period: clear raising_time_left to prevent bfq_bfqq_save_state()
+- * from assigning it a full weight-raising period. See the detailed
+- * comments about this field in bfq_init_icq().
+- */
+- if (bfqq->bic)
+- bfqq->bic->wr_time_left = 0;
+ rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+ list_add_tail(&rq->queuelist, &bfqq->fifo);
+
+@@ -3528,8 +3956,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+
+ static void bfq_update_hw_tag(struct bfq_data *bfqd)
+ {
+- bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,
+- bfqd->rq_in_driver);
++ bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver,
++ bfqd->rq_in_driver);
+
+ if (bfqd->hw_tag == 1)
+ return;
+@@ -3560,43 +3988,41 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
+ bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)",
+ blk_rq_sectors(rq), sync);
+
++ assert_spin_locked(bfqd->queue->queue_lock);
+ bfq_update_hw_tag(bfqd);
+
+ BUG_ON(!bfqd->rq_in_driver);
+ BUG_ON(!bfqq->dispatched);
+ bfqd->rq_in_driver--;
+ bfqq->dispatched--;
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_completion(bfqq_group(bfqq),
+ rq_start_time_ns(rq),
+ rq_io_start_time_ns(rq), rq->cmd_flags);
+-#endif
+
+ if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
++ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++ /*
++ * Set budget_timeout (which we overload to store the
++ * time at which the queue remains with no backlog and
++ * no outstanding request; used by the weight-raising
++ * mechanism).
++ */
++ bfqq->budget_timeout = jiffies;
++
+ bfq_weights_tree_remove(bfqd, &bfqq->entity,
+ &bfqd->queue_weights_tree);
+- if (!blk_queue_nonrot(bfqd->queue)) {
+- BUG_ON(!bfqd->busy_in_flight_queues);
+- bfqd->busy_in_flight_queues--;
+- if (bfq_bfqq_constantly_seeky(bfqq)) {
+- BUG_ON(!bfqd->
+- const_seeky_busy_in_flight_queues);
+- bfqd->const_seeky_busy_in_flight_queues--;
+- }
+- }
+ }
+
+- if (sync) {
+- bfqd->sync_flight--;
+- RQ_BIC(rq)->ttime.last_end_request = jiffies;
+- }
++ RQ_BIC(rq)->ttime.last_end_request = jiffies;
+
+ /*
+- * If we are waiting to discover whether the request pattern of the
+- * task associated with the queue is actually isochronous, and
+- * both requisites for this condition to hold are satisfied, then
+- * compute soft_rt_next_start (see the comments to the function
+- * bfq_bfqq_softrt_next_start()).
++ * If we are waiting to discover whether the request pattern
++ * of the task associated with the queue is actually
++ * isochronous, and both requisites for this condition to hold
++ * are now satisfied, then compute soft_rt_next_start (see the
++ * comments on the function bfq_bfqq_softrt_next_start()). We
++ * schedule this delayed check when bfqq expires, if it still
++ * has in-flight requests.
+ */
+ if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
+ RB_EMPTY_ROOT(&bfqq->sort_list))
+@@ -3608,10 +4034,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
+ * or if we want to idle in case it has no pending requests.
+ */
+ if (bfqd->in_service_queue == bfqq) {
+- if (bfq_bfqq_budget_new(bfqq))
+- bfq_set_budget_timeout(bfqd);
+-
+- if (bfq_bfqq_must_idle(bfqq)) {
++ if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) {
+ bfq_arm_slice_timer(bfqd);
+ goto out;
+ } else if (bfq_may_expire_for_budg_timeout(bfqq))
+@@ -3682,14 +4105,14 @@ static void bfq_put_request(struct request *rq)
+ rq->elv.priv[1] = NULL;
+
+ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
+- bfqq, atomic_read(&bfqq->ref));
++ bfqq, bfqq->ref);
+ bfq_put_queue(bfqq);
+ }
+ }
+
+ /*
+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
+- * was the last process referring to said bfqq.
++ * was the last process referring to that bfqq.
+ */
+ static struct bfq_queue *
+ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
+@@ -3727,11 +4150,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ unsigned long flags;
+ bool split = false;
+
+- might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+-
+- bfq_check_ioprio_change(bic, bio);
+-
+ spin_lock_irqsave(q->queue_lock, flags);
++ bfq_check_ioprio_change(bic, bio);
+
+ if (!bic)
+ goto queue_fail;
+@@ -3741,23 +4161,47 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ new_queue:
+ bfqq = bic_to_bfqq(bic, is_sync);
+ if (!bfqq || bfqq == &bfqd->oom_bfqq) {
+- bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask);
++ if (bfqq)
++ bfq_put_queue(bfqq);
++ bfqq = bfq_get_queue(bfqd, bio, is_sync, bic);
++ BUG_ON(!hlist_unhashed(&bfqq->burst_list_node));
++
+ bic_set_bfqq(bic, bfqq, is_sync);
+ if (split && is_sync) {
++ bfq_log_bfqq(bfqd, bfqq,
++ "set_request: was_in_list %d "
++ "was_in_large_burst %d "
++ "large burst in progress %d",
++ bic->was_in_burst_list,
++ bic->saved_in_large_burst,
++ bfqd->large_burst);
++
+ if ((bic->was_in_burst_list && bfqd->large_burst) ||
+- bic->saved_in_large_burst)
++ bic->saved_in_large_burst) {
++ bfq_log_bfqq(bfqd, bfqq,
++ "set_request: marking in "
++ "large burst");
+ bfq_mark_bfqq_in_large_burst(bfqq);
+- else {
+- bfq_clear_bfqq_in_large_burst(bfqq);
+- if (bic->was_in_burst_list)
+- hlist_add_head(&bfqq->burst_list_node,
+- &bfqd->burst_list);
++ } else {
++ bfq_log_bfqq(bfqd, bfqq,
++ "set_request: clearing in "
++ "large burst");
++ bfq_clear_bfqq_in_large_burst(bfqq);
++ if (bic->was_in_burst_list)
++ hlist_add_head(&bfqq->burst_list_node,
++ &bfqd->burst_list);
+ }
++ bfqq->split_time = jiffies;
+ }
+ } else {
+ /* If the queue was seeky for too long, break it apart. */
+ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
+ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
++
++ /* Update bic before losing reference to bfqq */
++ if (bfq_bfqq_in_large_burst(bfqq))
++ bic->saved_in_large_burst = true;
++
+ bfqq = bfq_split_bfqq(bic, bfqq);
+ split = true;
+ if (!bfqq)
+@@ -3766,9 +4210,8 @@ new_queue:
+ }
+
+ bfqq->allocated[rw]++;
+- atomic_inc(&bfqq->ref);
+- bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,
+- atomic_read(&bfqq->ref));
++ bfqq->ref++;
++ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, bfqq->ref);
+
+ rq->elv.priv[0] = bic;
+ rq->elv.priv[1] = bfqq;
+@@ -3783,7 +4226,6 @@ new_queue:
+ if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
+ bfqq->bic = bic;
+ if (split) {
+- bfq_mark_bfqq_just_split(bfqq);
+ /*
+ * If the queue has just been split from a shared
+ * queue, restore the idle window and the possible
+@@ -3793,6 +4235,9 @@ new_queue:
+ }
+ }
+
++ if (unlikely(bfq_bfqq_just_created(bfqq)))
++ bfq_handle_burst(bfqd, bfqq);
++
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return 0;
+@@ -3872,6 +4317,7 @@ static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
+ cancel_work_sync(&bfqd->unplug_work);
+ }
+
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+ struct bfq_queue **bfqq_ptr)
+ {
+@@ -3880,9 +4326,9 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+
+ bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
+ if (bfqq) {
+- bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
++ bfq_bfqq_move(bfqd, bfqq, root_group);
+ bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
+- bfqq, atomic_read(&bfqq->ref));
++ bfqq, bfqq->ref);
+ bfq_put_queue(bfqq);
+ *bfqq_ptr = NULL;
+ }
+@@ -3904,6 +4350,7 @@ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
+
+ __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
+ }
++#endif
+
+ static void bfq_exit_queue(struct elevator_queue *e)
+ {
+@@ -3923,8 +4370,6 @@ static void bfq_exit_queue(struct elevator_queue *e)
+
+ bfq_shutdown_timer_wq(bfqd);
+
+- synchronize_rcu();
+-
+ BUG_ON(timer_pending(&bfqd->idle_slice_timer));
+
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+@@ -3973,11 +4418,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ * will not attempt to free it.
+ */
+ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
+- atomic_inc(&bfqd->oom_bfqq.ref);
++ bfqd->oom_bfqq.ref++;
+ bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+ bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
+ bfqd->oom_bfqq.entity.new_weight =
+ bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
++
++ /* oom_bfqq does not participate to bursts */
++ bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
+ /*
+ * Trigger weight initialization, according to ioprio, at the
+ * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
+@@ -3996,9 +4444,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ goto out_free;
+ bfq_init_root_group(bfqd->root_group, bfqd);
+ bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+- bfqd->active_numerous_groups = 0;
+-#endif
+
+ init_timer(&bfqd->idle_slice_timer);
+ bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
+@@ -4023,20 +4468,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ bfqd->bfq_back_penalty = bfq_back_penalty;
+ bfqd->bfq_slice_idle = bfq_slice_idle;
+ bfqd->bfq_class_idle_last_service = 0;
+- bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
+- bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
+- bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
++ bfqd->bfq_timeout = bfq_timeout;
+
+- bfqd->bfq_coop_thresh = 2;
+- bfqd->bfq_failed_cooperations = 7000;
+ bfqd->bfq_requests_within_timer = 120;
+
+- bfqd->bfq_large_burst_thresh = 11;
+- bfqd->bfq_burst_interval = msecs_to_jiffies(500);
++ bfqd->bfq_large_burst_thresh = 8;
++ bfqd->bfq_burst_interval = msecs_to_jiffies(180);
+
+ bfqd->low_latency = true;
+
+- bfqd->bfq_wr_coeff = 20;
++ /*
++ * Trade-off between responsiveness and fairness.
++ */
++ bfqd->bfq_wr_coeff = 30;
+ bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
+ bfqd->bfq_wr_max_time = 0;
+ bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
+@@ -4048,16 +4492,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ * video.
+ */
+ bfqd->wr_busy_queues = 0;
+- bfqd->busy_in_flight_queues = 0;
+- bfqd->const_seeky_busy_in_flight_queues = 0;
+
+ /*
+- * Begin by assuming, optimistically, that the device peak rate is
+- * equal to the highest reference rate.
++ * Begin by assuming, optimistically, that the device is a
++ * high-speed one, and that its peak rate is equal to 2/3 of
++ * the highest reference rate.
+ */
+ bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
+ T_fast[blk_queue_nonrot(bfqd->queue)];
+- bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)];
++ bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
+ bfqd->device_speed = BFQ_BFQD_FAST;
+
+ return 0;
+@@ -4161,10 +4604,8 @@ SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
+ SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
+ SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
+ SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
+-SHOW_FUNCTION(bfq_max_budget_async_rq_show,
+- bfqd->bfq_max_budget_async_rq, 0);
+-SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
+-SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
++SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1);
++SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0);
+ SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
+ SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
+ SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
+@@ -4199,10 +4640,6 @@ STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
+ STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
+ INT_MAX, 0);
+ STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
+-STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,
+- 1, INT_MAX, 0);
+-STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,
+- INT_MAX, 1);
+ STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
+ STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
+ STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
+@@ -4224,10 +4661,8 @@ static ssize_t bfq_weights_store(struct elevator_queue *e,
+
+ static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
+ {
+- u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
+-
+ if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)
+- return bfq_calc_max_budget(bfqd->peak_rate, timeout);
++ return bfq_calc_max_budget(bfqd);
+ else
+ return bfq_default_max_budget;
+ }
+@@ -4252,6 +4687,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
+ return ret;
+ }
+
++/*
++ * Leaving this name to preserve name compatibility with cfq
++ * parameters, but this timeout is used for both sync and async.
++ */
+ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+ const char *page, size_t count)
+ {
+@@ -4264,13 +4703,31 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+ else if (__data > INT_MAX)
+ __data = INT_MAX;
+
+- bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);
++ bfqd->bfq_timeout = msecs_to_jiffies(__data);
+ if (bfqd->bfq_user_max_budget == 0)
+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
+
+ return ret;
+ }
+
++static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
++ const char *page, size_t count)
++{
++ struct bfq_data *bfqd = e->elevator_data;
++ unsigned long uninitialized_var(__data);
++ int ret = bfq_var_store(&__data, (page), count);
++
++ if (__data > 1)
++ __data = 1;
++ if (!bfqd->strict_guarantees && __data == 1
++ && bfqd->bfq_slice_idle < msecs_to_jiffies(8))
++ bfqd->bfq_slice_idle = msecs_to_jiffies(8);
++
++ bfqd->strict_guarantees = __data;
++
++ return ret;
++}
++
+ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
+ const char *page, size_t count)
+ {
+@@ -4297,9 +4754,8 @@ static struct elv_fs_entry bfq_attrs[] = {
+ BFQ_ATTR(back_seek_penalty),
+ BFQ_ATTR(slice_idle),
+ BFQ_ATTR(max_budget),
+- BFQ_ATTR(max_budget_async_rq),
+ BFQ_ATTR(timeout_sync),
+- BFQ_ATTR(timeout_async),
++ BFQ_ATTR(strict_guarantees),
+ BFQ_ATTR(low_latency),
+ BFQ_ATTR(wr_coeff),
+ BFQ_ATTR(wr_max_time),
+@@ -4342,9 +4798,28 @@ static struct elevator_type iosched_bfq = {
+ .elevator_owner = THIS_MODULE,
+ };
+
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static struct blkcg_policy blkcg_policy_bfq = {
++ .dfl_cftypes = bfq_blkg_files,
++ .legacy_cftypes = bfq_blkcg_legacy_files,
++
++ .cpd_alloc_fn = bfq_cpd_alloc,
++ .cpd_init_fn = bfq_cpd_init,
++ .cpd_bind_fn = bfq_cpd_init,
++ .cpd_free_fn = bfq_cpd_free,
++
++ .pd_alloc_fn = bfq_pd_alloc,
++ .pd_init_fn = bfq_pd_init,
++ .pd_offline_fn = bfq_pd_offline,
++ .pd_free_fn = bfq_pd_free,
++ .pd_reset_stats_fn = bfq_pd_reset_stats,
++};
++#endif
++
+ static int __init bfq_init(void)
+ {
+ int ret;
++ char msg[50] = "BFQ I/O-scheduler: v8";
+
+ /*
+ * Can be 0 on HZ < 1000 setups.
+@@ -4352,9 +4827,6 @@ static int __init bfq_init(void)
+ if (bfq_slice_idle == 0)
+ bfq_slice_idle = 1;
+
+- if (bfq_timeout_async == 0)
+- bfq_timeout_async = 1;
+-
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ ret = blkcg_policy_register(&blkcg_policy_bfq);
+ if (ret)
+@@ -4370,23 +4842,34 @@ static int __init bfq_init(void)
+ * installed on the reference devices (see the comments before the
+ * definitions of the two arrays).
+ */
+- T_slow[0] = msecs_to_jiffies(2600);
+- T_slow[1] = msecs_to_jiffies(1000);
+- T_fast[0] = msecs_to_jiffies(5500);
+- T_fast[1] = msecs_to_jiffies(2000);
++ T_slow[0] = msecs_to_jiffies(3500);
++ T_slow[1] = msecs_to_jiffies(1500);
++ T_fast[0] = msecs_to_jiffies(8000);
++ T_fast[1] = msecs_to_jiffies(3000);
+
+ /*
+- * Thresholds that determine the switch between speed classes (see
+- * the comments before the definition of the array).
++ * Thresholds that determine the switch between speed classes
++ * (see the comments before the definition of the array
++ * device_speed_thresh). These thresholds are biased towards
++ * transitions to the fast class. This is safer than the
++ * opposite bias. In fact, a wrong transition to the slow
++ * class results in short weight-raising periods, because the
++ * speed of the device then tends to be higher that the
++ * reference peak rate. On the opposite end, a wrong
++ * transition to the fast class tends to increase
++ * weight-raising periods, because of the opposite reason.
+ */
+- device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2;
+- device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2;
++ device_speed_thresh[0] = (4 * R_slow[0]) / 3;
++ device_speed_thresh[1] = (4 * R_slow[1]) / 3;
+
+ ret = elv_register(&iosched_bfq);
+ if (ret)
+ goto err_pol_unreg;
+
+- pr_info("BFQ I/O-scheduler: v7r11");
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ strcat(msg, " (with cgroups support)");
++#endif
++ pr_info("%s", msg);
+
+ return 0;
+
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index a64fec1..e54b149 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -7,9 +7,11 @@
+ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+ * Paolo Valente <paolo.valente@unimore.it>
+ *
+- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ * Copyright (C) 2016 Paolo Valente <paolo.valente@unimore.it>
+ */
+
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
++
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ #define for_each_entity(entity) \
+ for (; entity ; entity = entity->parent)
+@@ -22,8 +24,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+ int extract,
+ struct bfq_data *bfqd);
+
+-static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+-
+ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ {
+ struct bfq_entity *bfqg_entity;
+@@ -48,6 +48,7 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ {
+ struct bfq_entity *next_in_service;
++ struct bfq_queue *bfqq;
+
+ if (sd->in_service_entity)
+ /* will update/requeue at the end of service */
+@@ -65,14 +66,29 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+
+ if (next_in_service)
+ bfq_update_budget(next_in_service);
++ else
++ goto exit;
+
++ bfqq = bfq_entity_to_bfqq(next_in_service);
++ if (bfqq)
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "update_next_in_service: chosen this queue");
++ else {
++ struct bfq_group *bfqg =
++ container_of(next_in_service,
++ struct bfq_group, entity);
++
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "update_next_in_service: chosen this entity");
++ }
++exit:
+ return 1;
+ }
+
+ static void bfq_check_next_in_service(struct bfq_sched_data *sd,
+ struct bfq_entity *entity)
+ {
+- BUG_ON(sd->next_in_service != entity);
++ WARN_ON(sd->next_in_service != entity);
+ }
+ #else
+ #define for_each_entity(entity) \
+@@ -151,20 +167,35 @@ static u64 bfq_delta(unsigned long service, unsigned long weight)
+ static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
+ {
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+-
++ unsigned long long start, finish, delta ;
+ BUG_ON(entity->weight == 0);
+
+ entity->finish = entity->start +
+ bfq_delta(service, entity->weight);
+
++ start = ((entity->start>>10)*1000)>>12;
++ finish = ((entity->finish>>10)*1000)>>12;
++ delta = ((bfq_delta(service, entity->weight)>>10)*1000)>>12;
++
+ if (bfqq) {
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+ "calc_finish: serv %lu, w %d",
+ service, entity->weight);
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+ "calc_finish: start %llu, finish %llu, delta %llu",
+- entity->start, entity->finish,
+- bfq_delta(service, entity->weight));
++ start, finish, delta);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ } else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "calc_finish group: serv %lu, w %d",
++ service, entity->weight);
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "calc_finish group: start %llu, finish %llu, delta %llu",
++ start, finish, delta);
++#endif
+ }
+ }
+
+@@ -386,8 +417,6 @@ static void bfq_active_insert(struct bfq_service_tree *st,
+ BUG_ON(!bfqg);
+ BUG_ON(!bfqd);
+ bfqg->active_entities++;
+- if (bfqg->active_entities == 2)
+- bfqd->active_numerous_groups++;
+ }
+ #endif
+ }
+@@ -399,7 +428,7 @@ static void bfq_active_insert(struct bfq_service_tree *st,
+ static unsigned short bfq_ioprio_to_weight(int ioprio)
+ {
+ BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
+- return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - ioprio;
++ return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF ;
+ }
+
+ /**
+@@ -422,9 +451,9 @@ static void bfq_get_entity(struct bfq_entity *entity)
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
+ if (bfqq) {
+- atomic_inc(&bfqq->ref);
++ bfqq->ref++;
+ bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
+- bfqq, atomic_read(&bfqq->ref));
++ bfqq, bfqq->ref);
+ }
+ }
+
+@@ -499,10 +528,6 @@ static void bfq_active_extract(struct bfq_service_tree *st,
+ BUG_ON(!bfqd);
+ BUG_ON(!bfqg->active_entities);
+ bfqg->active_entities--;
+- if (bfqg->active_entities == 1) {
+- BUG_ON(!bfqd->active_numerous_groups);
+- bfqd->active_numerous_groups--;
+- }
+ }
+ #endif
+ }
+@@ -552,7 +577,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
+ if (bfqq) {
+ sd = entity->sched_data;
+ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
+- bfqq, atomic_read(&bfqq->ref));
++ bfqq, bfqq->ref);
+ bfq_put_queue(bfqq);
+ }
+ }
+@@ -628,12 +653,14 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
+ if (entity->new_weight != entity->orig_weight) {
+ if (entity->new_weight < BFQ_MIN_WEIGHT ||
+ entity->new_weight > BFQ_MAX_WEIGHT) {
+- printk(KERN_CRIT "update_weight_prio: "
+- "new_weight %d\n",
++ pr_crit("update_weight_prio: new_weight %d\n",
+ entity->new_weight);
+- BUG();
++ if (entity->new_weight < BFQ_MIN_WEIGHT)
++ entity->new_weight = BFQ_MIN_WEIGHT;
++ else
++ entity->new_weight = BFQ_MAX_WEIGHT;
+ }
+- entity->orig_weight = entity->new_weight;
++ entity->orig_weight = entity->new_weight;
+ if (bfqq)
+ bfqq->ioprio =
+ bfq_weight_to_ioprio(entity->orig_weight);
+@@ -708,7 +735,7 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
+ st = bfq_entity_service_tree(entity);
+
+ entity->service += served;
+- BUG_ON(entity->service > entity->budget);
++
+ BUG_ON(st->wsum == 0);
+
+ st->vtime += bfq_delta(served, st->wsum);
+@@ -717,31 +744,69 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
+ #endif
+- bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
++ st = bfq_entity_service_tree(&bfqq->entity);
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs, vtime %llu on %p",
++ served, ((st->vtime>>10)*1000)>>12, st);
+ }
+
+ /**
+- * bfq_bfqq_charge_full_budget - set the service to the entity budget.
++ * bfq_bfqq_charge_time - charge an amount of service equivalent to the length
++ * of the time interval during which bfqq has been in
++ * service.
++ * @bfqd: the device
+ * @bfqq: the queue that needs a service update.
++ * @time_ms: the amount of time during which the queue has received service
++ *
++ * If a queue does not consume its budget fast enough, then providing
++ * the queue with service fairness may impair throughput, more or less
++ * severely. For this reason, queues that consume their budget slowly
++ * are provided with time fairness instead of service fairness. This
++ * goal is achieved through the BFQ scheduling engine, even if such an
++ * engine works in the service, and not in the time domain. The trick
++ * is charging these queues with an inflated amount of service, equal
++ * to the amount of service that they would have received during their
++ * service slot if they had been fast, i.e., if their requests had
++ * been dispatched at a rate equal to the estimated peak rate.
+ *
+- * When it's not possible to be fair in the service domain, because
+- * a queue is not consuming its budget fast enough (the meaning of
+- * fast depends on the timeout parameter), we charge it a full
+- * budget. In this way we should obtain a sort of time-domain
+- * fairness among all the seeky/slow queues.
++ * It is worth noting that time fairness can cause important
++ * distortions in terms of bandwidth distribution, on devices with
++ * internal queueing. The reason is that I/O requests dispatched
++ * during the service slot of a queue may be served after that service
++ * slot is finished, and may have a total processing time loosely
++ * correlated with the duration of the service slot. This is
++ * especially true for short service slots.
+ */
+-static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
++static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ unsigned long time_ms)
+ {
+ struct bfq_entity *entity = &bfqq->entity;
++ int tot_serv_to_charge = entity->service;
++ unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout);
++
++ if (time_ms > 0 && time_ms < timeout_ms)
++ tot_serv_to_charge =
++ (bfqd->bfq_max_budget * time_ms) / timeout_ms;
++
++ if (tot_serv_to_charge < entity->service)
++ tot_serv_to_charge = entity->service;
++
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "charge_time: %lu/%u ms, %d/%d/%d sectors",
++ time_ms, timeout_ms, entity->service,
++ tot_serv_to_charge, entity->budget);
+
+- bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
++ /* Increase budget to avoid inconsistencies */
++ if (tot_serv_to_charge > entity->budget)
++ entity->budget = tot_serv_to_charge;
+
+- bfq_bfqq_served(bfqq, entity->budget - entity->service);
++ bfq_bfqq_served(bfqq,
++ max_t(int, 0, tot_serv_to_charge - entity->service));
+ }
+
+ /**
+ * __bfq_activate_entity - activate an entity.
+ * @entity: the entity being activated.
++ * @non_blocking_wait_rq: true if this entity was waiting for a request
+ *
+ * Called whenever an entity is activated, i.e., it is not active and one
+ * of its children receives a new request, or has to be reactivated due to
+@@ -749,11 +814,16 @@ static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
+ * service received if @entity is active) of the queue to calculate its
+ * timestamps.
+ */
+-static void __bfq_activate_entity(struct bfq_entity *entity)
++static void __bfq_activate_entity(struct bfq_entity *entity,
++ bool non_blocking_wait_rq)
+ {
+ struct bfq_sched_data *sd = entity->sched_data;
+ struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ bool backshifted = false;
+
++ BUG_ON(!sd);
++ BUG_ON(!st);
+ if (entity == sd->in_service_entity) {
+ BUG_ON(entity->tree);
+ /*
+@@ -771,45 +841,133 @@ static void __bfq_activate_entity(struct bfq_entity *entity)
+ * old start time.
+ */
+ bfq_active_extract(st, entity);
+- } else if (entity->tree == &st->idle) {
+- /*
+- * Must be on the idle tree, bfq_idle_extract() will
+- * check for that.
+- */
+- bfq_idle_extract(st, entity);
+- entity->start = bfq_gt(st->vtime, entity->finish) ?
+- st->vtime : entity->finish;
+ } else {
+- /*
+- * The finish time of the entity may be invalid, and
+- * it is in the past for sure, otherwise the queue
+- * would have been on the idle tree.
+- */
+- entity->start = st->vtime;
+- st->wsum += entity->weight;
+- bfq_get_entity(entity);
++ unsigned long long min_vstart;
++
++ /* See comments on bfq_fqq_update_budg_for_activation */
++ if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
++ backshifted = true;
++ min_vstart = entity->finish;
++ } else
++ min_vstart = st->vtime;
+
+- BUG_ON(entity->on_st);
+- entity->on_st = 1;
++ if (entity->tree == &st->idle) {
++ /*
++ * Must be on the idle tree, bfq_idle_extract() will
++ * check for that.
++ */
++ bfq_idle_extract(st, entity);
++ entity->start = bfq_gt(min_vstart, entity->finish) ?
++ min_vstart : entity->finish;
++ } else {
++ /*
++ * The finish time of the entity may be invalid, and
++ * it is in the past for sure, otherwise the queue
++ * would have been on the idle tree.
++ */
++ entity->start = min_vstart;
++ st->wsum += entity->weight;
++ bfq_get_entity(entity);
++
++ BUG_ON(entity->on_st);
++ entity->on_st = 1;
++ }
+ }
+
+ st = __bfq_entity_update_weight_prio(st, entity);
+ bfq_calc_finish(entity, entity->budget);
++
++ /*
++ * If some queues enjoy backshifting for a while, then their
++ * (virtual) finish timestamps may happen to become lower and
++ * lower than the system virtual time. In particular, if
++ * these queues often happen to be idle for short time
++ * periods, and during such time periods other queues with
++ * higher timestamps happen to be busy, then the backshifted
++ * timestamps of the former queues can become much lower than
++ * the system virtual time. In fact, to serve the queues with
++ * higher timestamps while the ones with lower timestamps are
++ * idle, the system virtual time may be pushed-up to much
++ * higher values than the finish timestamps of the idle
++ * queues. As a consequence, the finish timestamps of all new
++ * or newly activated queues may end up being much larger than
++ * those of lucky queues with backshifted timestamps. The
++ * latter queues may then monopolize the device for a lot of
++ * time. This would simply break service guarantees.
++ *
++ * To reduce this problem, push up a little bit the
++ * backshifted timestamps of the queue associated with this
++ * entity (only a queue can happen to have the backshifted
++ * flag set): just enough to let the finish timestamp of the
++ * queue be equal to the current value of the system virtual
++ * time. This may introduce a little unfairness among queues
++ * with backshifted timestamps, but it does not break
++ * worst-case fairness guarantees.
++ *
++ * As a special case, if bfqq is weight-raised, push up
++ * timestamps much less, to keep very low the probability that
++ * this push up causes the backshifted finish timestamps of
++ * weight-raised queues to become higher than the backshifted
++ * finish timestamps of non weight-raised queues.
++ */
++ if (backshifted && bfq_gt(st->vtime, entity->finish)) {
++ unsigned long delta = st->vtime - entity->finish;
++
++ if (bfqq)
++ delta /= bfqq->wr_coeff;
++
++ entity->start += delta;
++ entity->finish += delta;
++
++ if (bfqq) {
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "__activate_entity: new queue finish %llu",
++ ((entity->finish>>10)*1000)>>12);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ } else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "__activate_entity: new group finish %llu",
++ ((entity->finish>>10)*1000)>>12);
++#endif
++ }
++ }
++
+ bfq_active_insert(st, entity);
++
++ if (bfqq) {
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "__activate_entity: queue %seligible in st %p",
++ entity->start <= st->vtime ? "" : "non ", st);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ } else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "__activate_entity: group %seligible in st %p",
++ entity->start <= st->vtime ? "" : "non ", st);
++#endif
++ }
+ }
+
+ /**
+ * bfq_activate_entity - activate an entity and its ancestors if necessary.
+ * @entity: the entity to activate.
++ * @non_blocking_wait_rq: true if this entity was waiting for a request
+ *
+ * Activate @entity and all the entities on the path from it to the root.
+ */
+-static void bfq_activate_entity(struct bfq_entity *entity)
++static void bfq_activate_entity(struct bfq_entity *entity,
++ bool non_blocking_wait_rq)
+ {
+ struct bfq_sched_data *sd;
+
+ for_each_entity(entity) {
+- __bfq_activate_entity(entity);
++ BUG_ON(!entity);
++ __bfq_activate_entity(entity, non_blocking_wait_rq);
+
+ sd = entity->sched_data;
+ if (!bfq_update_next_in_service(sd))
+@@ -890,23 +1048,24 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+
+ if (!__bfq_deactivate_entity(entity, requeue))
+ /*
+- * The parent entity is still backlogged, and
+- * we don't need to update it as it is still
+- * in service.
++ * next_in_service has not been changed, so
++ * no upwards update is needed
+ */
+ break;
+
+ if (sd->next_in_service)
+ /*
+- * The parent entity is still backlogged and
+- * the budgets on the path towards the root
+- * need to be updated.
++ * The parent entity is still backlogged,
++ * because next_in_service is not NULL, and
++ * next_in_service has been updated (see
++ * comment on the body of the above if):
++ * upwards update of the schedule is needed.
+ */
+ goto update;
+
+ /*
+- * If we reach there the parent is no more backlogged and
+- * we want to propagate the dequeue upwards.
++ * If we get here, then the parent is no more backlogged and
++ * we want to propagate the deactivation upwards.
+ */
+ requeue = 1;
+ }
+@@ -916,9 +1075,23 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ update:
+ entity = parent;
+ for_each_entity(entity) {
+- __bfq_activate_entity(entity);
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ __bfq_activate_entity(entity, false);
+
+ sd = entity->sched_data;
++ if (bfqq)
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "invoking udpdate_next for this queue");
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else {
++ struct bfq_group *bfqg =
++ container_of(entity,
++ struct bfq_group, entity);
++
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "invoking udpdate_next for this entity");
++ }
++#endif
+ if (!bfq_update_next_in_service(sd))
+ break;
+ }
+@@ -997,10 +1170,11 @@ left:
+ * Update the virtual time in @st and return the first eligible entity
+ * it contains.
+ */
+-static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
+- bool force)
++static struct bfq_entity *
++__bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+ {
+ struct bfq_entity *entity, *new_next_in_service = NULL;
++ struct bfq_queue *bfqq;
+
+ if (RB_EMPTY_ROOT(&st->active))
+ return NULL;
+@@ -1009,6 +1183,24 @@ static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
+ entity = bfq_first_active_entity(st);
+ BUG_ON(bfq_gt(entity->start, st->vtime));
+
++ bfqq = bfq_entity_to_bfqq(entity);
++ if (bfqq)
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "__lookup_next: start %llu vtime %llu st %p",
++ ((entity->start>>10)*1000)>>12,
++ ((st->vtime>>10)*1000)>>12, st);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "__lookup_next: start %llu vtime %llu st %p",
++ ((entity->start>>10)*1000)>>12,
++ ((st->vtime>>10)*1000)>>12, st);
++ }
++#endif
++
+ /*
+ * If the chosen entity does not match with the sched_data's
+ * next_in_service and we are forcedly serving the IDLE priority
+@@ -1045,10 +1237,28 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+ BUG_ON(sd->in_service_entity);
+
+ if (bfqd &&
+- jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
++ jiffies - bfqd->bfq_class_idle_last_service >
++ BFQ_CL_IDLE_TIMEOUT) {
+ entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
+ true);
+ if (entity) {
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ if (bfqq)
++ bfq_log_bfqq(bfqd, bfqq,
++ "idle chosen from st %p %d",
++ st + BFQ_IOPRIO_CLASSES - 1,
++ BFQ_IOPRIO_CLASSES - 1) ;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg(bfqd, bfqg,
++ "idle chosen from st %p %d",
++ st + BFQ_IOPRIO_CLASSES - 1,
++ BFQ_IOPRIO_CLASSES - 1) ;
++ }
++#endif
+ i = BFQ_IOPRIO_CLASSES - 1;
+ bfqd->bfq_class_idle_last_service = jiffies;
+ sd->next_in_service = entity;
+@@ -1057,6 +1267,24 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+ for (; i < BFQ_IOPRIO_CLASSES; i++) {
+ entity = __bfq_lookup_next_entity(st + i, false);
+ if (entity) {
++ if (bfqd != NULL) {
++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++ if (bfqq)
++ bfq_log_bfqq(bfqd, bfqq,
++ "chosen from st %p %d",
++ st + i, i) ;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg(bfqd, bfqg,
++ "chosen from st %p %d",
++ st + i, i) ;
++ }
++#endif
++ }
++
+ if (extract) {
+ bfq_check_next_in_service(sd, entity);
+ bfq_active_extract(st + i, entity);
+@@ -1070,6 +1298,13 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+ return entity;
+ }
+
++static bool next_queue_may_preempt(struct bfq_data *bfqd)
++{
++ struct bfq_sched_data *sd = &bfqd->root_group->sched_data;
++
++ return sd->next_in_service != sd->in_service_entity;
++}
++
+ /*
+ * Get next queue for service.
+ */
+@@ -1086,7 +1321,36 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+
+ sd = &bfqd->root_group->sched_data;
+ for (; sd ; sd = entity->my_sched_data) {
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ if (entity) {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg(bfqd, bfqg,
++ "get_next_queue: lookup in this group");
++ } else
++ bfq_log_bfqg(bfqd, bfqd->root_group,
++ "get_next_queue: lookup in root group");
++#endif
++
+ entity = bfq_lookup_next_entity(sd, 1, bfqd);
++
++ bfqq = bfq_entity_to_bfqq(entity);
++ if (bfqq)
++ bfq_log_bfqq(bfqd, bfqq,
++ "get_next_queue: this queue, finish %llu",
++ (((entity->finish>>10)*1000)>>10)>>2);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg(bfqd, bfqg,
++ "get_next_queue: this entity, finish %llu",
++ (((entity->finish>>10)*1000)>>10)>>2);
++ }
++#endif
++
+ BUG_ON(!entity);
+ entity->service = 0;
+ }
+@@ -1113,9 +1377,7 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ struct bfq_entity *entity = &bfqq->entity;
+
+- if (bfqq == bfqd->in_service_queue)
+- __bfq_bfqd_reset_in_service(bfqd);
+-
++ BUG_ON(bfqq == bfqd->in_service_queue);
+ bfq_deactivate_entity(entity, requeue);
+ }
+
+@@ -1123,12 +1385,11 @@ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ struct bfq_entity *entity = &bfqq->entity;
+
+- bfq_activate_entity(entity);
++ bfq_activate_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq));
++ bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+ }
+
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
+-#endif
+
+ /*
+ * Called when the bfqq no longer has requests pending, remove it from
+@@ -1139,6 +1400,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ BUG_ON(!bfq_bfqq_busy(bfqq));
+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++ BUG_ON(bfqq == bfqd->in_service_queue);
+
+ bfq_log_bfqq(bfqd, bfqq, "del from busy");
+
+@@ -1147,27 +1409,20 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ BUG_ON(bfqd->busy_queues == 0);
+ bfqd->busy_queues--;
+
+- if (!bfqq->dispatched) {
++ if (!bfqq->dispatched)
+ bfq_weights_tree_remove(bfqd, &bfqq->entity,
+ &bfqd->queue_weights_tree);
+- if (!blk_queue_nonrot(bfqd->queue)) {
+- BUG_ON(!bfqd->busy_in_flight_queues);
+- bfqd->busy_in_flight_queues--;
+- if (bfq_bfqq_constantly_seeky(bfqq)) {
+- BUG_ON(!bfqd->
+- const_seeky_busy_in_flight_queues);
+- bfqd->const_seeky_busy_in_flight_queues--;
+- }
+- }
+- }
++
+ if (bfqq->wr_coeff > 1)
+ bfqd->wr_busy_queues--;
+
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_dequeue(bfqq_group(bfqq));
+-#endif
+
++ BUG_ON(bfqq->entity.budget < 0);
++
+ bfq_deactivate_bfqq(bfqd, bfqq, requeue);
++
++ BUG_ON(bfqq->entity.budget < 0);
+ }
+
+ /*
+@@ -1185,16 +1440,11 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ bfq_mark_bfqq_busy(bfqq);
+ bfqd->busy_queues++;
+
+- if (!bfqq->dispatched) {
++ if (!bfqq->dispatched)
+ if (bfqq->wr_coeff == 1)
+ bfq_weights_tree_add(bfqd, &bfqq->entity,
+ &bfqd->queue_weights_tree);
+- if (!blk_queue_nonrot(bfqd->queue)) {
+- bfqd->busy_in_flight_queues++;
+- if (bfq_bfqq_constantly_seeky(bfqq))
+- bfqd->const_seeky_busy_in_flight_queues++;
+- }
+- }
++
+ if (bfqq->wr_coeff > 1)
+ bfqd->wr_busy_queues++;
+ }
+diff --git a/block/bfq.h b/block/bfq.h
+index f73c942..b8ad02a 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -1,5 +1,5 @@
+ /*
+- * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes.
++ * BFQ-v8 for 4.7.0: data structures and common functions prototypes.
+ *
+ * Based on ideas and code from CFQ:
+ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+@@ -28,7 +28,7 @@
+
+ #define BFQ_DEFAULT_QUEUE_IOPRIO 4
+
+-#define BFQ_DEFAULT_GRP_WEIGHT 10
++#define BFQ_WEIGHT_LEGACY_DFL 100
+ #define BFQ_DEFAULT_GRP_IOPRIO 0
+ #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
+
+@@ -36,12 +36,6 @@ struct bfq_entity;
+
+ /**
+ * struct bfq_service_tree - per ioprio_class service tree.
+- * @active: tree for active entities (i.e., those backlogged).
+- * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
+- * @first_idle: idle entity with minimum F_i.
+- * @last_idle: idle entity with maximum F_i.
+- * @vtime: scheduler virtual time.
+- * @wsum: scheduler weight sum; active and idle entities contribute to it.
+ *
+ * Each service tree represents a B-WF2Q+ scheduler on its own. Each
+ * ioprio_class has its own independent scheduler, and so its own
+@@ -49,27 +43,28 @@ struct bfq_entity;
+ * of the containing bfqd.
+ */
+ struct bfq_service_tree {
++ /* tree for active entities (i.e., those backlogged) */
+ struct rb_root active;
++ /* tree for idle entities (i.e., not backlogged, with V <= F_i)*/
+ struct rb_root idle;
+
+- struct bfq_entity *first_idle;
+- struct bfq_entity *last_idle;
++ struct bfq_entity *first_idle; /* idle entity with minimum F_i */
++ struct bfq_entity *last_idle; /* idle entity with maximum F_i */
+
+- u64 vtime;
++ u64 vtime; /* scheduler virtual time */
++ /* scheduler weight sum; active and idle entities contribute to it */
+ unsigned long wsum;
+ };
+
+ /**
+ * struct bfq_sched_data - multi-class scheduler.
+- * @in_service_entity: entity in service.
+- * @next_in_service: head-of-the-line entity in the scheduler.
+- * @service_tree: array of service trees, one per ioprio_class.
+ *
+ * bfq_sched_data is the basic scheduler queue. It supports three
+- * ioprio_classes, and can be used either as a toplevel queue or as
+- * an intermediate queue on a hierarchical setup.
+- * @next_in_service points to the active entity of the sched_data
+- * service trees that will be scheduled next.
++ * ioprio_classes, and can be used either as a toplevel queue or as an
++ * intermediate queue on a hierarchical setup. @next_in_service
++ * points to the active entity of the sched_data service trees that
++ * will be scheduled next. It is used to reduce the number of steps
++ * needed for each hierarchical-schedule update.
+ *
+ * The supported ioprio_classes are the same as in CFQ, in descending
+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
+@@ -79,48 +74,29 @@ struct bfq_service_tree {
+ * All the fields are protected by the queue lock of the containing bfqd.
+ */
+ struct bfq_sched_data {
+- struct bfq_entity *in_service_entity;
++ struct bfq_entity *in_service_entity; /* entity in service */
++ /* head-of-the-line entity in the scheduler (see comments above) */
+ struct bfq_entity *next_in_service;
++ /* array of service trees, one per ioprio_class */
+ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
+ };
+
+ /**
+ * struct bfq_weight_counter - counter of the number of all active entities
+ * with a given weight.
+- * @weight: weight of the entities that this counter refers to.
+- * @num_active: number of active entities with this weight.
+- * @weights_node: weights tree member (see bfq_data's @queue_weights_tree
+- * and @group_weights_tree).
+ */
+ struct bfq_weight_counter {
+- short int weight;
+- unsigned int num_active;
++ short int weight; /* weight of the entities this counter refers to */
++ unsigned int num_active; /* nr of active entities with this weight */
++ /*
++ * Weights tree member (see bfq_data's @queue_weights_tree and
++ * @group_weights_tree)
++ */
+ struct rb_node weights_node;
+ };
+
+ /**
+ * struct bfq_entity - schedulable entity.
+- * @rb_node: service_tree member.
+- * @weight_counter: pointer to the weight counter associated with this entity.
+- * @on_st: flag, true if the entity is on a tree (either the active or
+- * the idle one of its service_tree).
+- * @finish: B-WF2Q+ finish timestamp (aka F_i).
+- * @start: B-WF2Q+ start timestamp (aka S_i).
+- * @tree: tree the entity is enqueued into; %NULL if not on a tree.
+- * @min_start: minimum start time of the (active) subtree rooted at
+- * this entity; used for O(log N) lookups into active trees.
+- * @service: service received during the last round of service.
+- * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
+- * @weight: weight of the queue
+- * @parent: parent entity, for hierarchical scheduling.
+- * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
+- * associated scheduler queue, %NULL on leaf nodes.
+- * @sched_data: the scheduler queue this entity belongs to.
+- * @ioprio: the ioprio in use.
+- * @new_weight: when a weight change is requested, the new weight value.
+- * @orig_weight: original weight, used to implement weight boosting
+- * @prio_changed: flag, true when the user requested a weight, ioprio or
+- * ioprio_class change.
+ *
+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
+ * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each
+@@ -147,27 +123,52 @@ struct bfq_weight_counter {
+ * containing bfqd.
+ */
+ struct bfq_entity {
+- struct rb_node rb_node;
++ struct rb_node rb_node; /* service_tree member */
++ /* pointer to the weight counter associated with this entity */
+ struct bfq_weight_counter *weight_counter;
+
++ /*
++ * flag, true if the entity is on a tree (either the active or
++ * the idle one of its service_tree).
++ */
+ int on_st;
+
+- u64 finish;
+- u64 start;
++ u64 finish; /* B-WF2Q+ finish timestamp (aka F_i) */
++ u64 start; /* B-WF2Q+ start timestamp (aka S_i) */
+
++ /* tree the entity is enqueued into; %NULL if not on a tree */
+ struct rb_root *tree;
+
++ /*
++ * minimum start time of the (active) subtree rooted at this
++ * entity; used for O(log N) lookups into active trees
++ */
+ u64 min_start;
+
+- int service, budget;
+- unsigned short weight, new_weight;
++ /* amount of service received during the last service slot */
++ int service;
++
++ /* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
++ int budget;
++
++ unsigned short weight; /* weight of the queue */
++ unsigned short new_weight; /* next weight if a change is in progress */
++
++ /* original weight, used to implement weight boosting */
+ unsigned short orig_weight;
+
++ /* parent entity, for hierarchical scheduling */
+ struct bfq_entity *parent;
+
++ /*
++ * For non-leaf nodes in the hierarchy, the associated
++ * scheduler queue, %NULL on leaf nodes.
++ */
+ struct bfq_sched_data *my_sched_data;
++ /* the scheduler queue this entity belongs to */
+ struct bfq_sched_data *sched_data;
+
++ /* flag, set to request a weight, ioprio or ioprio_class change */
+ int prio_changed;
+ };
+
+@@ -175,56 +176,6 @@ struct bfq_group;
+
+ /**
+ * struct bfq_queue - leaf schedulable entity.
+- * @ref: reference counter.
+- * @bfqd: parent bfq_data.
+- * @new_ioprio: when an ioprio change is requested, the new ioprio value.
+- * @ioprio_class: the ioprio_class in use.
+- * @new_ioprio_class: when an ioprio_class change is requested, the new
+- * ioprio_class value.
+- * @new_bfqq: shared bfq_queue if queue is cooperating with
+- * one or more other queues.
+- * @pos_node: request-position tree member (see bfq_group's @rq_pos_tree).
+- * @pos_root: request-position tree root (see bfq_group's @rq_pos_tree).
+- * @sort_list: sorted list of pending requests.
+- * @next_rq: if fifo isn't expired, next request to serve.
+- * @queued: nr of requests queued in @sort_list.
+- * @allocated: currently allocated requests.
+- * @meta_pending: pending metadata requests.
+- * @fifo: fifo list of requests in sort_list.
+- * @entity: entity representing this queue in the scheduler.
+- * @max_budget: maximum budget allowed from the feedback mechanism.
+- * @budget_timeout: budget expiration (in jiffies).
+- * @dispatched: number of requests on the dispatch list or inside driver.
+- * @flags: status flags.
+- * @bfqq_list: node for active/idle bfqq list inside our bfqd.
+- * @burst_list_node: node for the device's burst list.
+- * @seek_samples: number of seeks sampled
+- * @seek_total: sum of the distances of the seeks sampled
+- * @seek_mean: mean seek distance
+- * @last_request_pos: position of the last request enqueued
+- * @requests_within_timer: number of consecutive pairs of request completion
+- * and arrival, such that the queue becomes idle
+- * after the completion, but the next request arrives
+- * within an idle time slice; used only if the queue's
+- * IO_bound has been cleared.
+- * @pid: pid of the process owning the queue, used for logging purposes.
+- * @last_wr_start_finish: start time of the current weight-raising period if
+- * the @bfq-queue is being weight-raised, otherwise
+- * finish time of the last weight-raising period
+- * @wr_cur_max_time: current max raising time for this queue
+- * @soft_rt_next_start: minimum time instant such that, only if a new
+- * request is enqueued after this time instant in an
+- * idle @bfq_queue with no outstanding requests, then
+- * the task associated with the queue it is deemed as
+- * soft real-time (see the comments to the function
+- * bfq_bfqq_softrt_next_start())
+- * @last_idle_bklogged: time of the last transition of the @bfq_queue from
+- * idle to backlogged
+- * @service_from_backlogged: cumulative service received from the @bfq_queue
+- * since the last transition from idle to
+- * backlogged
+- * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
+- * queue is shared
+ *
+ * A bfq_queue is a leaf request queue; it can be associated with an
+ * io_context or more, if it is async or shared between cooperating
+@@ -235,117 +186,163 @@ struct bfq_group;
+ * All the fields are protected by the queue lock of the containing bfqd.
+ */
+ struct bfq_queue {
+- atomic_t ref;
++ /* reference counter */
++ int ref;
++ /* parent bfq_data */
+ struct bfq_data *bfqd;
+
+- unsigned short ioprio, new_ioprio;
+- unsigned short ioprio_class, new_ioprio_class;
++ /* current ioprio and ioprio class */
++ unsigned short ioprio, ioprio_class;
++ /* next ioprio and ioprio class if a change is in progress */
++ unsigned short new_ioprio, new_ioprio_class;
+
+- /* fields for cooperating queues handling */
++ /*
++ * Shared bfq_queue if queue is cooperating with one or more
++ * other queues.
++ */
+ struct bfq_queue *new_bfqq;
++ /* request-position tree member (see bfq_group's @rq_pos_tree) */
+ struct rb_node pos_node;
++ /* request-position tree root (see bfq_group's @rq_pos_tree) */
+ struct rb_root *pos_root;
+
++ /* sorted list of pending requests */
+ struct rb_root sort_list;
++ /* if fifo isn't expired, next request to serve */
+ struct request *next_rq;
++ /* number of sync and async requests queued */
+ int queued[2];
++ /* number of sync and async requests currently allocated */
+ int allocated[2];
++ /* number of pending metadata requests */
+ int meta_pending;
++ /* fifo list of requests in sort_list */
+ struct list_head fifo;
+
++ /* entity representing this queue in the scheduler */
+ struct bfq_entity entity;
+
++ /* maximum budget allowed from the feedback mechanism */
+ int max_budget;
++ /* budget expiration (in jiffies) */
+ unsigned long budget_timeout;
+
++ /* number of requests on the dispatch list or inside driver */
+ int dispatched;
+
+- unsigned int flags;
++ unsigned int flags; /* status flags.*/
+
++ /* node for active/idle bfqq list inside parent bfqd */
+ struct list_head bfqq_list;
+
++ /* bit vector: a 1 for each seeky requests in history */
++ u32 seek_history;
++
++ /* node for the device's burst list */
+ struct hlist_node burst_list_node;
+
+- unsigned int seek_samples;
+- u64 seek_total;
+- sector_t seek_mean;
++ /* position of the last request enqueued */
+ sector_t last_request_pos;
+
++ /* Number of consecutive pairs of request completion and
++ * arrival, such that the queue becomes idle after the
++ * completion, but the next request arrives within an idle
++ * time slice; used only if the queue's IO_bound flag has been
++ * cleared.
++ */
+ unsigned int requests_within_timer;
+
++ /* pid of the process owning the queue, used for logging purposes */
+ pid_t pid;
++
++ /*
++ * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
++ * if the queue is shared.
++ */
+ struct bfq_io_cq *bic;
+
+- /* weight-raising fields */
++ /* current maximum weight-raising time for this queue */
+ unsigned long wr_cur_max_time;
++ /*
++ * Minimum time instant such that, only if a new request is
++ * enqueued after this time instant in an idle @bfq_queue with
++ * no outstanding requests, then the task associated with the
++ * queue it is deemed as soft real-time (see the comments on
++ * the function bfq_bfqq_softrt_next_start())
++ */
+ unsigned long soft_rt_next_start;
++ /*
++ * Start time of the current weight-raising period if
++ * the @bfq-queue is being weight-raised, otherwise
++ * finish time of the last weight-raising period.
++ */
+ unsigned long last_wr_start_finish;
++ /* factor by which the weight of this queue is multiplied */
+ unsigned int wr_coeff;
++ /*
++ * Time of the last transition of the @bfq_queue from idle to
++ * backlogged.
++ */
+ unsigned long last_idle_bklogged;
++ /*
++ * Cumulative service received from the @bfq_queue since the
++ * last transition from idle to backlogged.
++ */
+ unsigned long service_from_backlogged;
++
++ unsigned long split_time; /* time of last split */
+ };
+
+ /**
+ * struct bfq_ttime - per process thinktime stats.
+- * @ttime_total: total process thinktime
+- * @ttime_samples: number of thinktime samples
+- * @ttime_mean: average process thinktime
+ */
+ struct bfq_ttime {
+- unsigned long last_end_request;
++ unsigned long last_end_request; /* completion time of last request */
++
++ unsigned long ttime_total; /* total process thinktime */
++ unsigned long ttime_samples; /* number of thinktime samples */
++ unsigned long ttime_mean; /* average process thinktime */
+
+- unsigned long ttime_total;
+- unsigned long ttime_samples;
+- unsigned long ttime_mean;
+ };
+
+ /**
+ * struct bfq_io_cq - per (request_queue, io_context) structure.
+- * @icq: associated io_cq structure
+- * @bfqq: array of two process queues, the sync and the async
+- * @ttime: associated @bfq_ttime struct
+- * @ioprio: per (request_queue, blkcg) ioprio.
+- * @blkcg_id: id of the blkcg the related io_cq belongs to.
+- * @wr_time_left: snapshot of the time left before weight raising ends
+- * for the sync queue associated to this process; this
+- * snapshot is taken to remember this value while the weight
+- * raising is suspended because the queue is merged with a
+- * shared queue, and is used to set @raising_cur_max_time
+- * when the queue is split from the shared queue and its
+- * weight is raised again
+- * @saved_idle_window: same purpose as the previous field for the idle
+- * window
+- * @saved_IO_bound: same purpose as the previous two fields for the I/O
+- * bound classification of a queue
+- * @saved_in_large_burst: same purpose as the previous fields for the
+- * value of the field keeping the queue's belonging
+- * to a large burst
+- * @was_in_burst_list: true if the queue belonged to a burst list
+- * before its merge with another cooperating queue
+- * @cooperations: counter of consecutive successful queue merges underwent
+- * by any of the process' @bfq_queues
+- * @failed_cooperations: counter of consecutive failed queue merges of any
+- * of the process' @bfq_queues
+ */
+ struct bfq_io_cq {
++ /* associated io_cq structure */
+ struct io_cq icq; /* must be the first member */
++ /* array of two process queues, the sync and the async */
+ struct bfq_queue *bfqq[2];
++ /* associated @bfq_ttime struct */
+ struct bfq_ttime ttime;
++ /* per (request_queue, blkcg) ioprio */
+ int ioprio;
+-
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+- uint64_t blkcg_id; /* the current blkcg ID */
++ uint64_t blkcg_serial_nr; /* the current blkcg serial */
+ #endif
+
+- unsigned int wr_time_left;
++ /*
++ * Snapshot of the idle window before merging; taken to
++ * remember this value while the queue is merged, so as to be
++ * able to restore it in case of split.
++ */
+ bool saved_idle_window;
++ /*
++ * Same purpose as the previous two fields for the I/O bound
++ * classification of a queue.
++ */
+ bool saved_IO_bound;
+
++ /*
++ * Same purpose as the previous fields for the value of the
++ * field keeping the queue's belonging to a large burst
++ */
+ bool saved_in_large_burst;
++ /*
++ * True if the queue belonged to a burst list before its merge
++ * with another cooperating queue.
++ */
+ bool was_in_burst_list;
+-
+- unsigned int cooperations;
+- unsigned int failed_cooperations;
+ };
+
+ enum bfq_device_speed {
+@@ -354,224 +351,216 @@ enum bfq_device_speed {
+ };
+
+ /**
+- * struct bfq_data - per device data structure.
+- * @queue: request queue for the managed device.
+- * @root_group: root bfq_group for the device.
+- * @active_numerous_groups: number of bfq_groups containing more than one
+- * active @bfq_entity.
+- * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by
+- * weight. Used to keep track of whether all @bfq_queues
+- * have the same weight. The tree contains one counter
+- * for each distinct weight associated to some active
+- * and not weight-raised @bfq_queue (see the comments to
+- * the functions bfq_weights_tree_[add|remove] for
+- * further details).
+- * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted
+- * by weight. Used to keep track of whether all
+- * @bfq_groups have the same weight. The tree contains
+- * one counter for each distinct weight associated to
+- * some active @bfq_group (see the comments to the
+- * functions bfq_weights_tree_[add|remove] for further
+- * details).
+- * @busy_queues: number of bfq_queues containing requests (including the
+- * queue in service, even if it is idling).
+- * @busy_in_flight_queues: number of @bfq_queues containing pending or
+- * in-flight requests, plus the @bfq_queue in
+- * service, even if idle but waiting for the
+- * possible arrival of its next sync request. This
+- * field is updated only if the device is rotational,
+- * but used only if the device is also NCQ-capable.
+- * The reason why the field is updated also for non-
+- * NCQ-capable rotational devices is related to the
+- * fact that the value of @hw_tag may be set also
+- * later than when busy_in_flight_queues may need to
+- * be incremented for the first time(s). Taking also
+- * this possibility into account, to avoid unbalanced
+- * increments/decrements, would imply more overhead
+- * than just updating busy_in_flight_queues
+- * regardless of the value of @hw_tag.
+- * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues
+- * (that is, seeky queues that expired
+- * for budget timeout at least once)
+- * containing pending or in-flight
+- * requests, including the in-service
+- * @bfq_queue if constantly seeky. This
+- * field is updated only if the device
+- * is rotational, but used only if the
+- * device is also NCQ-capable (see the
+- * comments to @busy_in_flight_queues).
+- * @wr_busy_queues: number of weight-raised busy @bfq_queues.
+- * @queued: number of queued requests.
+- * @rq_in_driver: number of requests dispatched and waiting for completion.
+- * @sync_flight: number of sync requests in the driver.
+- * @max_rq_in_driver: max number of reqs in driver in the last
+- * @hw_tag_samples completed requests.
+- * @hw_tag_samples: nr of samples used to calculate hw_tag.
+- * @hw_tag: flag set to one if the driver is showing a queueing behavior.
+- * @budgets_assigned: number of budgets assigned.
+- * @idle_slice_timer: timer set when idling for the next sequential request
+- * from the queue in service.
+- * @unplug_work: delayed work to restart dispatching on the request queue.
+- * @in_service_queue: bfq_queue in service.
+- * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue.
+- * @last_position: on-disk position of the last served request.
+- * @last_budget_start: beginning of the last budget.
+- * @last_idling_start: beginning of the last idle slice.
+- * @peak_rate: peak transfer rate observed for a budget.
+- * @peak_rate_samples: number of samples used to calculate @peak_rate.
+- * @bfq_max_budget: maximum budget allotted to a bfq_queue before
+- * rescheduling.
+- * @active_list: list of all the bfq_queues active on the device.
+- * @idle_list: list of all the bfq_queues idle on the device.
+- * @bfq_fifo_expire: timeout for async/sync requests; when it expires
+- * requests are served in fifo order.
+- * @bfq_back_penalty: weight of backward seeks wrt forward ones.
+- * @bfq_back_max: maximum allowed backward seek.
+- * @bfq_slice_idle: maximum idling time.
+- * @bfq_user_max_budget: user-configured max budget value
+- * (0 for auto-tuning).
+- * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
+- * async queues.
+- * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
+- * to prevent seeky queues to impose long latencies to well
+- * behaved ones (this also implies that seeky queues cannot
+- * receive guarantees in the service domain; after a timeout
+- * they are charged for the whole allocated budget, to try
+- * to preserve a behavior reasonably fair among them, but
+- * without service-domain guarantees).
+- * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is
+- * no more granted any weight-raising.
+- * @bfq_failed_cooperations: number of consecutive failed cooperation
+- * chances after which weight-raising is restored
+- * to a queue subject to more than bfq_coop_thresh
+- * queue merges.
+- * @bfq_requests_within_timer: number of consecutive requests that must be
+- * issued within the idle time slice to set
+- * again idling to a queue which was marked as
+- * non-I/O-bound (see the definition of the
+- * IO_bound flag for further details).
+- * @last_ins_in_burst: last time at which a queue entered the current
+- * burst of queues being activated shortly after
+- * each other; for more details about this and the
+- * following parameters related to a burst of
+- * activations, see the comments to the function
+- * @bfq_handle_burst.
+- * @bfq_burst_interval: reference time interval used to decide whether a
+- * queue has been activated shortly after
+- * @last_ins_in_burst.
+- * @burst_size: number of queues in the current burst of queue activations.
+- * @bfq_large_burst_thresh: maximum burst size above which the current
+- * queue-activation burst is deemed as 'large'.
+- * @large_burst: true if a large queue-activation burst is in progress.
+- * @burst_list: head of the burst list (as for the above fields, more details
+- * in the comments to the function bfq_handle_burst).
+- * @low_latency: if set to true, low-latency heuristics are enabled.
+- * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised
+- * queue is multiplied.
+- * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies).
+- * @bfq_wr_rt_max_time: maximum duration for soft real-time processes.
+- * @bfq_wr_min_idle_time: minimum idle period after which weight-raising
+- * may be reactivated for a queue (in jiffies).
+- * @bfq_wr_min_inter_arr_async: minimum period between request arrivals
+- * after which weight-raising may be
+- * reactivated for an already busy queue
+- * (in jiffies).
+- * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue,
+- * sectors per seconds.
+- * @RT_prod: cached value of the product R*T used for computing the maximum
+- * duration of the weight raising automatically.
+- * @device_speed: device-speed class for the low-latency heuristic.
+- * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions.
++ * struct bfq_data - per-device data structure.
+ *
+ * All the fields are protected by the @queue lock.
+ */
+ struct bfq_data {
++ /* request queue for the device */
+ struct request_queue *queue;
+
++ /* root bfq_group for the device */
+ struct bfq_group *root_group;
+
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+- int active_numerous_groups;
+-#endif
+-
++ /*
++ * rbtree of weight counters of @bfq_queues, sorted by
++ * weight. Used to keep track of whether all @bfq_queues have
++ * the same weight. The tree contains one counter for each
++ * distinct weight associated to some active and not
++ * weight-raised @bfq_queue (see the comments to the functions
++ * bfq_weights_tree_[add|remove] for further details).
++ */
+ struct rb_root queue_weights_tree;
++ /*
++ * rbtree of non-queue @bfq_entity weight counters, sorted by
++ * weight. Used to keep track of whether all @bfq_groups have
++ * the same weight. The tree contains one counter for each
++ * distinct weight associated to some active @bfq_group (see
++ * the comments to the functions bfq_weights_tree_[add|remove]
++ * for further details).
++ */
+ struct rb_root group_weights_tree;
+
++ /*
++ * Number of bfq_queues containing requests (including the
++ * queue in service, even if it is idling).
++ */
+ int busy_queues;
+- int busy_in_flight_queues;
+- int const_seeky_busy_in_flight_queues;
++ /* number of weight-raised busy @bfq_queues */
+ int wr_busy_queues;
++ /* number of queued requests */
+ int queued;
++ /* number of requests dispatched and waiting for completion */
+ int rq_in_driver;
+- int sync_flight;
+
++ /*
++ * Maximum number of requests in driver in the last
++ * @hw_tag_samples completed requests.
++ */
+ int max_rq_in_driver;
++ /* number of samples used to calculate hw_tag */
+ int hw_tag_samples;
++ /* flag set to one if the driver is showing a queueing behavior */
+ int hw_tag;
+
++ /* number of budgets assigned */
+ int budgets_assigned;
+
++ /*
++ * Timer set when idling (waiting) for the next request from
++ * the queue in service.
++ */
+ struct timer_list idle_slice_timer;
++ /* delayed work to restart dispatching on the request queue */
+ struct work_struct unplug_work;
+
++ /* bfq_queue in service */
+ struct bfq_queue *in_service_queue;
++ /* bfq_io_cq (bic) associated with the @in_service_queue */
+ struct bfq_io_cq *in_service_bic;
+
++ /* on-disk position of the last served request */
+ sector_t last_position;
+
++ /* beginning of the last budget */
+ ktime_t last_budget_start;
++ /* beginning of the last idle slice */
+ ktime_t last_idling_start;
++ /* number of samples used to calculate @peak_rate */
+ int peak_rate_samples;
++ /* peak transfer rate observed for a budget */
+ u64 peak_rate;
++ /* maximum budget allotted to a bfq_queue before rescheduling */
+ int bfq_max_budget;
+
++ /* list of all the bfq_queues active on the device */
+ struct list_head active_list;
++ /* list of all the bfq_queues idle on the device */
+ struct list_head idle_list;
+
++ /*
++ * Timeout for async/sync requests; when it fires, requests
++ * are served in fifo order.
++ */
+ unsigned int bfq_fifo_expire[2];
++ /* weight of backward seeks wrt forward ones */
+ unsigned int bfq_back_penalty;
++ /* maximum allowed backward seek */
+ unsigned int bfq_back_max;
++ /* maximum idling time */
+ unsigned int bfq_slice_idle;
++ /* last time CLASS_IDLE was served */
+ u64 bfq_class_idle_last_service;
+
++ /* user-configured max budget value (0 for auto-tuning) */
+ int bfq_user_max_budget;
+- int bfq_max_budget_async_rq;
+- unsigned int bfq_timeout[2];
+-
+- unsigned int bfq_coop_thresh;
+- unsigned int bfq_failed_cooperations;
++ /*
++ * Timeout for bfq_queues to consume their budget; used to
++ * prevent seeky queues from imposing long latencies to
++ * sequential or quasi-sequential ones (this also implies that
++ * seeky queues cannot receive guarantees in the service
++ * domain; after a timeout they are charged for the time they
++ * have been in service, to preserve fairness among them, but
++ * without service-domain guarantees).
++ */
++ unsigned int bfq_timeout;
++
++ /*
++ * Number of consecutive requests that must be issued within
++ * the idle time slice to set again idling to a queue which
++ * was marked as non-I/O-bound (see the definition of the
++ * IO_bound flag for further details).
++ */
+ unsigned int bfq_requests_within_timer;
+
++ /*
++ * Force device idling whenever needed to provide accurate
++ * service guarantees, without caring about throughput
++ * issues. CAVEAT: this may even increase latencies, in case
++ * of useless idling for processes that did stop doing I/O.
++ */
++ bool strict_guarantees;
++
++ /*
++ * Last time at which a queue entered the current burst of
++ * queues being activated shortly after each other; for more
++ * details about this and the following parameters related to
++ * a burst of activations, see the comments on the function
++ * bfq_handle_burst.
++ */
+ unsigned long last_ins_in_burst;
++ /*
++ * Reference time interval used to decide whether a queue has
++ * been activated shortly after @last_ins_in_burst.
++ */
+ unsigned long bfq_burst_interval;
++ /* number of queues in the current burst of queue activations */
+ int burst_size;
++
++ /* common parent entity for the queues in the burst */
++ struct bfq_entity *burst_parent_entity;
++ /* Maximum burst size above which the current queue-activation
++ * burst is deemed as 'large'.
++ */
+ unsigned long bfq_large_burst_thresh;
++ /* true if a large queue-activation burst is in progress */
+ bool large_burst;
++ /*
++ * Head of the burst list (as for the above fields, more
++ * details in the comments on the function bfq_handle_burst).
++ */
+ struct hlist_head burst_list;
+
++ /* if set to true, low-latency heuristics are enabled */
+ bool low_latency;
+-
+- /* parameters of the low_latency heuristics */
++ /*
++ * Maximum factor by which the weight of a weight-raised queue
++ * is multiplied.
++ */
+ unsigned int bfq_wr_coeff;
++ /* maximum duration of a weight-raising period (jiffies) */
+ unsigned int bfq_wr_max_time;
++
++ /* Maximum weight-raising duration for soft real-time processes */
+ unsigned int bfq_wr_rt_max_time;
++ /*
++ * Minimum idle period after which weight-raising may be
++ * reactivated for a queue (in jiffies).
++ */
+ unsigned int bfq_wr_min_idle_time;
++ /*
++ * Minimum period between request arrivals after which
++ * weight-raising may be reactivated for an already busy async
++ * queue (in jiffies).
++ */
+ unsigned long bfq_wr_min_inter_arr_async;
++
++ /* Max service-rate for a soft real-time queue, in sectors/sec */
+ unsigned int bfq_wr_max_softrt_rate;
++ /*
++ * Cached value of the product R*T, used for computing the
++ * maximum duration of weight raising automatically.
++ */
+ u64 RT_prod;
++ /* device-speed class for the low-latency heuristic */
+ enum bfq_device_speed device_speed;
+
++ /* fallback dummy bfqq for extreme OOM conditions */
+ struct bfq_queue oom_bfqq;
+ };
+
+ enum bfqq_state_flags {
+- BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */
++ BFQ_BFQQ_FLAG_just_created = 0, /* queue just allocated */
++ BFQ_BFQQ_FLAG_busy, /* has requests or is in service */
+ BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */
++ BFQ_BFQQ_FLAG_non_blocking_wait_rq, /*
++ * waiting for a request
++ * without idling the device
++ */
+ BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
+ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
+ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */
+ BFQ_BFQQ_FLAG_sync, /* synchronous queue */
+- BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */
+ BFQ_BFQQ_FLAG_IO_bound, /*
+ * bfqq has timed-out at least once
+ * having consumed at most 2/10 of
+@@ -581,17 +570,12 @@ enum bfqq_state_flags {
+ * bfqq activated in a large burst,
+ * see comments to bfq_handle_burst.
+ */
+- BFQ_BFQQ_FLAG_constantly_seeky, /*
+- * bfqq has proved to be slow and
+- * seeky until budget timeout
+- */
+ BFQ_BFQQ_FLAG_softrt_update, /*
+ * may need softrt-next-start
+ * update
+ */
+ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */
+- BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */
+- BFQ_BFQQ_FLAG_just_split, /* queue has just been split */
++ BFQ_BFQQ_FLAG_split_coop /* shared bfqq will be split */
+ };
+
+ #define BFQ_BFQQ_FNS(name) \
+@@ -608,25 +592,53 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
+ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \
+ }
+
++BFQ_BFQQ_FNS(just_created);
+ BFQ_BFQQ_FNS(busy);
+ BFQ_BFQQ_FNS(wait_request);
++BFQ_BFQQ_FNS(non_blocking_wait_rq);
+ BFQ_BFQQ_FNS(must_alloc);
+ BFQ_BFQQ_FNS(fifo_expire);
+ BFQ_BFQQ_FNS(idle_window);
+ BFQ_BFQQ_FNS(sync);
+-BFQ_BFQQ_FNS(budget_new);
+ BFQ_BFQQ_FNS(IO_bound);
+ BFQ_BFQQ_FNS(in_large_burst);
+-BFQ_BFQQ_FNS(constantly_seeky);
+ BFQ_BFQQ_FNS(coop);
+ BFQ_BFQQ_FNS(split_coop);
+-BFQ_BFQQ_FNS(just_split);
+ BFQ_BFQQ_FNS(softrt_update);
+ #undef BFQ_BFQQ_FNS
+
+ /* Logging facilities. */
+-#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
+- blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
++static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
++
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
++ char __pbuf[128]; \
++ \
++ assert_spin_locked((bfqd)->queue->queue_lock); \
++ blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
++ blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, \
++ (bfqq)->pid, \
++ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
++ __pbuf, ##args); \
++} while (0)
++
++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
++ char __pbuf[128]; \
++ \
++ blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \
++ blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args); \
++} while (0)
++
++#else /* CONFIG_BFQ_GROUP_IOSCHED */
++
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
++ blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \
++ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
++ ##args)
++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0)
++
++#endif /* CONFIG_BFQ_GROUP_IOSCHED */
+
+ #define bfq_log(bfqd, fmt, args...) \
+ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
+@@ -640,15 +652,12 @@ enum bfqq_expiration {
+ BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */
+ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */
+ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */
++ BFQ_BFQQ_PREEMPTED /* preemption in progress */
+ };
+
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+
+ struct bfqg_stats {
+- /* total bytes transferred */
+- struct blkg_rwstat service_bytes;
+- /* total IOs serviced, post merge */
+- struct blkg_rwstat serviced;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ /* number of ios merged */
+ struct blkg_rwstat merged;
+ /* total time spent on device in ns, may not be accurate w/ queueing */
+@@ -657,12 +666,8 @@ struct bfqg_stats {
+ struct blkg_rwstat wait_time;
+ /* number of IOs queued up */
+ struct blkg_rwstat queued;
+- /* total sectors transferred */
+- struct blkg_stat sectors;
+ /* total disk time and nr sectors dispatched by this group */
+ struct blkg_stat time;
+- /* time not charged to this cgroup */
+- struct blkg_stat unaccounted_time;
+ /* sum of number of ios queued across all samples */
+ struct blkg_stat avg_queue_size_sum;
+ /* count of samples taken for average */
+@@ -680,8 +685,10 @@ struct bfqg_stats {
+ uint64_t start_idle_time;
+ uint64_t start_empty_time;
+ uint16_t flags;
++#endif
+ };
+
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ /*
+ * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
+ *
+@@ -712,7 +719,7 @@ struct bfq_group_data {
+ * unused for the root group. Used to know whether there
+ * are groups with more than one active @bfq_entity
+ * (see the comments to the function
+- * bfq_bfqq_must_not_expire()).
++ * bfq_bfqq_may_idle()).
+ * @rq_pos_tree: rbtree sorted by next_request position, used when
+ * determining if two or more queues have interleaving
+ * requests (see bfq_find_close_cooperator()).
+@@ -745,7 +752,6 @@ struct bfq_group {
+ struct rb_root rq_pos_tree;
+
+ struct bfqg_stats stats;
+- struct bfqg_stats dead_stats; /* stats pushed from dead children */
+ };
+
+ #else
+@@ -767,11 +773,25 @@ bfq_entity_service_tree(struct bfq_entity *entity)
+ struct bfq_sched_data *sched_data = entity->sched_data;
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
+- BFQ_DEFAULT_GRP_CLASS;
++ BFQ_DEFAULT_GRP_CLASS - 1;
+
+ BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
+ BUG_ON(sched_data == NULL);
+
++ if (bfqq)
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "entity_service_tree %p %d",
++ sched_data->service_tree + idx, idx) ;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++ else {
++ struct bfq_group *bfqg =
++ container_of(entity, struct bfq_group, entity);
++
++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++ "entity_service_tree %p %d",
++ sched_data->service_tree + idx, idx) ;
++ }
++#endif
+ return sched_data->service_tree + idx;
+ }
+
+@@ -791,47 +811,6 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
+ return bic->icq.q->elevator->elevator_data;
+ }
+
+-/**
+- * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
+- * @ptr: a pointer to a bfqd.
+- * @flags: storage for the flags to be saved.
+- *
+- * This function allows bfqg->bfqd to be protected by the
+- * queue lock of the bfqd they reference; the pointer is dereferenced
+- * under RCU, so the storage for bfqd is assured to be safe as long
+- * as the RCU read side critical section does not end. After the
+- * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
+- * sure that no other writer accessed it. If we raced with a writer,
+- * the function returns NULL, with the queue unlocked, otherwise it
+- * returns the dereferenced pointer, with the queue locked.
+- */
+-static struct bfq_data *bfq_get_bfqd_locked(void **ptr, unsigned long *flags)
+-{
+- struct bfq_data *bfqd;
+-
+- rcu_read_lock();
+- bfqd = rcu_dereference(*(struct bfq_data **)ptr);
+-
+- if (bfqd != NULL) {
+- spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
+- if (ptr == NULL)
+- printk(KERN_CRIT "get_bfqd_locked pointer NULL\n");
+- else if (*ptr == bfqd)
+- goto out;
+- spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
+- }
+-
+- bfqd = NULL;
+-out:
+- rcu_read_unlock();
+- return bfqd;
+-}
+-
+-static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags)
+-{
+- spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
+-}
+-
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+
+ static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+@@ -857,11 +836,13 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
+ static void bfq_put_queue(struct bfq_queue *bfqq);
+ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
+ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+- struct bio *bio, int is_sync,
+- struct bfq_io_cq *bic, gfp_t gfp_mask);
++ struct bio *bio, bool is_sync,
++ struct bfq_io_cq *bic);
+ static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+ struct bfq_group *bfqg);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
++#endif
+ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+
+ #endif /* _BFQ_H */
+--
+1.9.1
+
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-08-11 23:43 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-08-11 23:43 UTC (permalink / raw
To: gentoo-commits
commit: d8f5eba0b45a4111ecceecda2e24cd7a8ecfaa62
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Thu Aug 11 23:43:17 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Thu Aug 11 23:43:17 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=d8f5eba0
Enable control of the unaligned access control policy from sysctl. Thanks to Tobias Klausmann (klausman <AT> gentoo.org).
0000_README | 4 ++
4400_alpha-sysctl-uac.patch | 142 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 146 insertions(+)
diff --git a/0000_README b/0000_README
index 1b5179e..ed55a98 100644
--- a/0000_README
+++ b/0000_README
@@ -59,6 +59,10 @@ Patch: 4200_fbcondecor-3.19.patch
From: http://www.mepiscommunity.org/fbcondecor
Desc: Bootsplash ported by Marco. (Bug #539616)
+Patch: 4400_alpha-sysctl-uac.patch
+From: Tobias Klausmann (klausman@gentoo.org) and http://bugs.gentoo.org/show_bug.cgi?id=217323
+Desc: Enable control of the unaligned access control policy from sysctl
+
Patch: 4567_distro-Gentoo-Kconfig.patch
From: Tom Wijsman <TomWij@gentoo.org>
Desc: Add Gentoo Linux support config settings and defaults.
diff --git a/4400_alpha-sysctl-uac.patch b/4400_alpha-sysctl-uac.patch
new file mode 100644
index 0000000..d42b4ed
--- /dev/null
+++ b/4400_alpha-sysctl-uac.patch
@@ -0,0 +1,142 @@
+diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
+index 7f312d8..1eb686b 100644
+--- a/arch/alpha/Kconfig
++++ b/arch/alpha/Kconfig
+@@ -697,6 +697,33 @@ config HZ
+ default 1200 if HZ_1200
+ default 1024
+
++config ALPHA_UAC_SYSCTL
++ bool "Configure UAC policy via sysctl"
++ depends on SYSCTL
++ default y
++ ---help---
++ Configuring the UAC (unaligned access control) policy on a Linux
++ system usually involves setting a compile time define. If you say
++ Y here, you will be able to modify the UAC policy at runtime using
++ the /proc interface.
++
++ The UAC policy defines the action Linux should take when an
++ unaligned memory access occurs. The action can include printing a
++ warning message (NOPRINT), sending a signal to the offending
++ program to help developers debug their applications (SIGBUS), or
++ disabling the transparent fixing (NOFIX).
++
++ The sysctls will be initialized to the compile-time defined UAC
++ policy. You can change these manually, or with the sysctl(8)
++ userspace utility.
++
++ To disable the warning messages at runtime, you would use
++
++ echo 1 > /proc/sys/kernel/uac/noprint
++
++ This is pretty harmless. Say Y if you're not sure.
++
++
+ source "drivers/pci/Kconfig"
+ source "drivers/eisa/Kconfig"
+
+diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
+index 74aceea..cb35d80 100644
+--- a/arch/alpha/kernel/traps.c
++++ b/arch/alpha/kernel/traps.c
+@@ -103,6 +103,49 @@ static char * ireg_name[] = {"v0", "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+ "t10", "t11", "ra", "pv", "at", "gp", "sp", "zero"};
+ #endif
+
++#ifdef CONFIG_ALPHA_UAC_SYSCTL
++
++#include <linux/sysctl.h>
++
++static int enabled_noprint = 0;
++static int enabled_sigbus = 0;
++static int enabled_nofix = 0;
++
++struct ctl_table uac_table[] = {
++ {
++ .procname = "noprint",
++ .data = &enabled_noprint,
++ .maxlen = sizeof (int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .procname = "sigbus",
++ .data = &enabled_sigbus,
++ .maxlen = sizeof (int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
++ .procname = "nofix",
++ .data = &enabled_nofix,
++ .maxlen = sizeof (int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ { }
++};
++
++static int __init init_uac_sysctl(void)
++{
++ /* Initialize sysctls with the #defined UAC policy */
++ enabled_noprint = (test_thread_flag (TS_UAC_NOPRINT)) ? 1 : 0;
++ enabled_sigbus = (test_thread_flag (TS_UAC_SIGBUS)) ? 1 : 0;
++ enabled_nofix = (test_thread_flag (TS_UAC_NOFIX)) ? 1 : 0;
++ return 0;
++}
++#endif
++
+ static void
+ dik_show_code(unsigned int *pc)
+ {
+@@ -785,7 +828,12 @@ do_entUnaUser(void __user * va, unsigned long opcode,
+ /* Check the UAC bits to decide what the user wants us to do
+ with the unaliged access. */
+
++#ifndef CONFIG_ALPHA_UAC_SYSCTL
+ if (!(current_thread_info()->status & TS_UAC_NOPRINT)) {
++#else /* CONFIG_ALPHA_UAC_SYSCTL */
++ if (!(current_thread_info()->status & TS_UAC_NOPRINT) &&
++ !(enabled_noprint)) {
++#endif /* CONFIG_ALPHA_UAC_SYSCTL */
+ if (__ratelimit(&ratelimit)) {
+ printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n",
+ current->comm, task_pid_nr(current),
+@@ -1090,3 +1138,6 @@ trap_init(void)
+ wrent(entSys, 5);
+ wrent(entDbg, 6);
+ }
++#ifdef CONFIG_ALPHA_UAC_SYSCTL
++ __initcall(init_uac_sysctl);
++#endif
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 87b2fc3..55021a8 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -152,6 +152,11 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
+ #ifdef CONFIG_INOTIFY_USER
+ #include <linux/inotify.h>
+ #endif
++
++#ifdef CONFIG_ALPHA_UAC_SYSCTL
++extern struct ctl_table uac_table[];
++#endif
++
+ #ifdef CONFIG_SPARC
+ #endif
+
+@@ -1844,6 +1849,13 @@ static struct ctl_table debug_table[] = {
+ .extra2 = &one,
+ },
+ #endif
++#ifdef CONFIG_ALPHA_UAC_SYSCTL
++ {
++ .procname = "uac",
++ .mode = 0555,
++ .child = uac_table,
++ },
++#endif /* CONFIG_ALPHA_UAC_SYSCTL */
+ { }
+ };
+
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-08-16 23:24 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-08-16 23:24 UTC (permalink / raw
To: gentoo-commits
commit: 413f5fd067017ebfb9f25329adf7eaed35a8cc21
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Tue Aug 16 23:23:56 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Tue Aug 16 23:23:56 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=413f5fd0
Linux patch 4.7.1
0000_README | 4 +
1000_linux-4.7.1.patch | 1141 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 1145 insertions(+)
diff --git a/0000_README b/0000_README
index ed55a98..0aa3187 100644
--- a/0000_README
+++ b/0000_README
@@ -43,6 +43,10 @@ EXPERIMENTAL
Individual Patch Descriptions:
--------------------------------------------------------------------------
+Patch: 1000_linux-4.7.1.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.1
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1000_linux-4.7.1.patch b/1000_linux-4.7.1.patch
new file mode 100644
index 0000000..264997d
--- /dev/null
+++ b/1000_linux-4.7.1.patch
@@ -0,0 +1,1141 @@
+diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt
+index 0a94224ad296..9e3c3b33514c 100644
+--- a/Documentation/cpu-freq/pcc-cpufreq.txt
++++ b/Documentation/cpu-freq/pcc-cpufreq.txt
+@@ -159,8 +159,8 @@ to be strictly associated with a P-state.
+
+ 2.2 cpuinfo_transition_latency:
+ -------------------------------
+-The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
+-does not include a field to expose this value currently.
++The cpuinfo_transition_latency field is 0. The PCC specification does
++not include a field to expose this value currently.
+
+ 2.3 cpuinfo_cur_freq:
+ ---------------------
+diff --git a/Makefile b/Makefile
+index 66da9a38b13b..84335c0b2eda 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 0
++SUBLEVEL = 1
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
+index 087acb569b63..5f221acd21ae 100644
+--- a/arch/arm/kernel/sys_oabi-compat.c
++++ b/arch/arm/kernel/sys_oabi-compat.c
+@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
+ mm_segment_t fs;
+ long ret, err, i;
+
+- if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
++ if (maxevents <= 0 ||
++ maxevents > (INT_MAX/sizeof(*kbuf)) ||
++ maxevents > (INT_MAX/sizeof(*events)))
+ return -EINVAL;
++ if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
++ return -EFAULT;
+ kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
+
+ if (nsops < 1 || nsops > SEMOPM)
+ return -EINVAL;
++ if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
++ return -EFAULT;
+ sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
+ if (!sops)
+ return -ENOMEM;
+diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
+index 9c0b387d6427..51d3988933f8 100644
+--- a/arch/mips/kernel/scall64-n32.S
++++ b/arch/mips/kernel/scall64-n32.S
+@@ -348,7 +348,7 @@ EXPORT(sysn32_call_table)
+ PTR sys_ni_syscall /* available, was setaltroot */
+ PTR sys_add_key
+ PTR sys_request_key
+- PTR sys_keyctl /* 6245 */
++ PTR compat_sys_keyctl /* 6245 */
+ PTR sys_set_thread_area
+ PTR sys_inotify_init
+ PTR sys_inotify_add_watch
+diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
+index f4f28b1580de..6efa7136748f 100644
+--- a/arch/mips/kernel/scall64-o32.S
++++ b/arch/mips/kernel/scall64-o32.S
+@@ -504,7 +504,7 @@ EXPORT(sys32_call_table)
+ PTR sys_ni_syscall /* available, was setaltroot */
+ PTR sys_add_key /* 4280 */
+ PTR sys_request_key
+- PTR sys_keyctl
++ PTR compat_sys_keyctl
+ PTR sys_set_thread_area
+ PTR sys_inotify_init
+ PTR sys_inotify_add_watch /* 4285 */
+diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
+index 4cddd17153fb..f848572169ea 100644
+--- a/arch/x86/entry/syscalls/syscall_32.tbl
++++ b/arch/x86/entry/syscalls/syscall_32.tbl
+@@ -294,7 +294,7 @@
+ # 285 sys_setaltroot
+ 286 i386 add_key sys_add_key
+ 287 i386 request_key sys_request_key
+-288 i386 keyctl sys_keyctl
++288 i386 keyctl sys_keyctl compat_sys_keyctl
+ 289 i386 ioprio_set sys_ioprio_set
+ 290 i386 ioprio_get sys_ioprio_get
+ 291 i386 inotify_init sys_inotify_init
+diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
+index 9d3a96c4da78..01c2d14ec05f 100644
+--- a/arch/x86/include/asm/microcode.h
++++ b/arch/x86/include/asm/microcode.h
+@@ -133,13 +133,11 @@ static inline unsigned int x86_cpuid_family(void)
+ #ifdef CONFIG_MICROCODE
+ extern void __init load_ucode_bsp(void);
+ extern void load_ucode_ap(void);
+-extern int __init save_microcode_in_initrd(void);
+ void reload_early_microcode(void);
+ extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
+ #else
+ static inline void __init load_ucode_bsp(void) { }
+ static inline void load_ucode_ap(void) { }
+-static inline int __init save_microcode_in_initrd(void) { return 0; }
+ static inline void reload_early_microcode(void) { }
+ static inline bool
+ get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
+diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
+index ac360bfbbdb6..12823b6ebd6d 100644
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -175,7 +175,7 @@ void load_ucode_ap(void)
+ }
+ }
+
+-int __init save_microcode_in_initrd(void)
++static int __init save_microcode_in_initrd(void)
+ {
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+@@ -691,4 +691,5 @@ int __init microcode_init(void)
+ return error;
+
+ }
++fs_initcall(save_microcode_in_initrd);
+ late_initcall(microcode_init);
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 372aad2b3291..dffd162db0a4 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -696,13 +696,6 @@ void free_initmem(void)
+ void __init free_initrd_mem(unsigned long start, unsigned long end)
+ {
+ /*
+- * Remember, initrd memory may contain microcode or other useful things.
+- * Before we lose initrd mem, we need to find a place to hold them
+- * now that normal virtual memory is enabled.
+- */
+- save_microcode_in_initrd();
+-
+- /*
+ * end could be not aligned, and We can not align that,
+ * decompresser could be confused by aligned initrd_end
+ * We already reserve the end partial page before in
+diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
+index 3177c2bc26f6..8eee0e9c93f0 100644
+--- a/arch/x86/power/hibernate_asm_64.S
++++ b/arch/x86/power/hibernate_asm_64.S
+@@ -24,7 +24,6 @@
+ #include <asm/frame.h>
+
+ ENTRY(swsusp_arch_suspend)
+- FRAME_BEGIN
+ movq $saved_context, %rax
+ movq %rsp, pt_regs_sp(%rax)
+ movq %rbp, pt_regs_bp(%rax)
+@@ -48,6 +47,7 @@ ENTRY(swsusp_arch_suspend)
+ movq %cr3, %rax
+ movq %rax, restore_cr3(%rip)
+
++ FRAME_BEGIN
+ call swsusp_save
+ FRAME_END
+ ret
+@@ -104,7 +104,6 @@ ENTRY(core_restore_code)
+ /* code below belongs to the image kernel */
+ .align PAGE_SIZE
+ ENTRY(restore_registers)
+- FRAME_BEGIN
+ /* go back to the original page tables */
+ movq %r9, %cr3
+
+@@ -145,6 +144,5 @@ ENTRY(restore_registers)
+ /* tell the hibernation core that we've just restored the memory */
+ movq %rax, in_suspend(%rip)
+
+- FRAME_END
+ ret
+ ENDPROC(restore_registers)
+diff --git a/block/genhd.c b/block/genhd.c
+index 9f42526b4d62..3eebd256b765 100644
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -856,6 +856,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v)
+ if (iter) {
+ class_dev_iter_exit(iter);
+ kfree(iter);
++ seqf->private = NULL;
+ }
+ }
+
+diff --git a/crypto/gcm.c b/crypto/gcm.c
+index bec329b3de8d..d9ea5f9c0574 100644
+--- a/crypto/gcm.c
++++ b/crypto/gcm.c
+@@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
+
+ ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
+ CRYPTO_ALG_TYPE_HASH,
+- CRYPTO_ALG_TYPE_AHASH_MASK);
++ CRYPTO_ALG_TYPE_AHASH_MASK |
++ crypto_requires_sync(algt->type,
++ algt->mask));
+ if (IS_ERR(ghash_alg))
+ return PTR_ERR(ghash_alg);
+
+diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
+index ea5815c5e128..bc769c448d4a 100644
+--- a/crypto/scatterwalk.c
++++ b/crypto/scatterwalk.c
+@@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
+
+ void scatterwalk_done(struct scatter_walk *walk, int out, int more)
+ {
+- if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
++ if (!more || walk->offset >= walk->sg->offset + walk->sg->length ||
++ !(walk->offset & (PAGE_SIZE - 1)))
+ scatterwalk_pagedone(walk, out, more);
+ }
+ EXPORT_SYMBOL_GPL(scatterwalk_done);
+diff --git a/drivers/char/random.c b/drivers/char/random.c
+index 0158d3bff7e5..87ab9f6b4112 100644
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -723,15 +723,18 @@ retry:
+ }
+ }
+
+-static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
++static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
+ {
+ const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));
+
++ if (nbits < 0)
++ return -EINVAL;
++
+ /* Cap the value to avoid overflows */
+ nbits = min(nbits, nbits_max);
+- nbits = max(nbits, -nbits_max);
+
+ credit_entropy_bits(r, nbits);
++ return 0;
+ }
+
+ /*********************************************************************
+@@ -1543,8 +1546,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ return -EPERM;
+ if (get_user(ent_count, p))
+ return -EFAULT;
+- credit_entropy_bits_safe(&input_pool, ent_count);
+- return 0;
++ return credit_entropy_bits_safe(&input_pool, ent_count);
+ case RNDADDENTROPY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+@@ -1558,8 +1560,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ size);
+ if (retval < 0)
+ return retval;
+- credit_entropy_bits_safe(&input_pool, ent_count);
+- return 0;
++ return credit_entropy_bits_safe(&input_pool, ent_count);
+ case RNDZAPENTCNT:
+ case RNDCLEARPOOL:
+ /*
+diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
+index a7ecb9a84c15..3f0ce2ae35ee 100644
+--- a/drivers/cpufreq/pcc-cpufreq.c
++++ b/drivers/cpufreq/pcc-cpufreq.c
+@@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
+ policy->min = policy->cpuinfo.min_freq =
+ ioread32(&pcch_hdr->minimum_frequency) * 1000;
+
+- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+-
+ pr_debug("init: policy->max is %d, policy->min is %d\n",
+ policy->max, policy->min);
+ out:
+diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
+index a925fb0db706..f846fd51b85b 100644
+--- a/drivers/infiniband/hw/hfi1/Kconfig
++++ b/drivers/infiniband/hw/hfi1/Kconfig
+@@ -3,7 +3,6 @@ config INFINIBAND_HFI1
+ depends on X86_64 && INFINIBAND_RDMAVT
+ select MMU_NOTIFIER
+ select CRC32
+- default m
+ ---help---
+ This is a low-level driver for Intel OPA Gen1 adapter.
+ config HFI1_DEBUG_SDMA_ORDER
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index a2afa3be17a4..4d7981946f79 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1422,7 +1422,16 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ return -EINVAL;
+ }
+
+- if (slave_ops->ndo_set_mac_address == NULL) {
++ if (slave_dev->type == ARPHRD_INFINIBAND &&
++ BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
++ netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n",
++ slave_dev->type);
++ res = -EOPNOTSUPP;
++ goto err_undo_flags;
++ }
++
++ if (!slave_ops->ndo_set_mac_address ||
++ slave_dev->type == ARPHRD_INFINIBAND) {
+ netdev_warn(bond_dev, "The slave device specified does not support setting the MAC address\n");
+ if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
+ bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
+index b122f6013b6c..03601dfc0642 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
+@@ -809,13 +809,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
+ * in a bitmap and increasing the chain consumer only
+ * for the first successive completed entries.
+ */
+- bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
++ __set_bit(pos, p_spq->p_comp_bitmap);
+
+ while (test_bit(p_spq->comp_bitmap_idx,
+ p_spq->p_comp_bitmap)) {
+- bitmap_clear(p_spq->p_comp_bitmap,
+- p_spq->comp_bitmap_idx,
+- SPQ_RING_SIZE);
++ __clear_bit(p_spq->comp_bitmap_idx,
++ p_spq->p_comp_bitmap);
+ p_spq->comp_bitmap_idx++;
+ qed_chain_return_produced(&p_spq->chain);
+ }
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 8bcd78f94966..a70b6c460178 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -942,7 +942,6 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
+ }
+
+ macsec_skb_cb(skb)->req = req;
+- macsec_skb_cb(skb)->rx_sa = rx_sa;
+ skb->dev = dev;
+ aead_request_set_callback(req, 0, macsec_decrypt_done, skb);
+
+@@ -1169,6 +1168,8 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
+ }
+ }
+
++ macsec_skb_cb(skb)->rx_sa = rx_sa;
++
+ /* Disabled && !changed text => skip validation */
+ if (hdr->tci_an & MACSEC_TCI_C ||
+ secy->validate_frames != MACSEC_VALIDATE_DISABLED)
+diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
+index ce362bd51de7..45b57c294d13 100644
+--- a/drivers/tty/serial/mvebu-uart.c
++++ b/drivers/tty/serial/mvebu-uart.c
+@@ -300,6 +300,8 @@ static int mvebu_uart_startup(struct uart_port *port)
+ static void mvebu_uart_shutdown(struct uart_port *port)
+ {
+ writel(0, port->membase + UART_CTRL);
++
++ free_irq(port->irq, port);
+ }
+
+ static void mvebu_uart_set_termios(struct uart_port *port,
+diff --git a/fs/dcache.c b/fs/dcache.c
+index d6847d7b123d..1ed81bb80500 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -622,7 +622,6 @@ static struct dentry *dentry_kill(struct dentry *dentry)
+
+ failed:
+ spin_unlock(&dentry->d_lock);
+- cpu_relax();
+ return dentry; /* try again with same dentry */
+ }
+
+@@ -796,6 +795,8 @@ void dput(struct dentry *dentry)
+ return;
+
+ repeat:
++ might_sleep();
++
+ rcu_read_lock();
+ if (likely(fast_dput(dentry))) {
+ rcu_read_unlock();
+@@ -829,8 +830,10 @@ repeat:
+
+ kill_it:
+ dentry = dentry_kill(dentry);
+- if (dentry)
++ if (dentry) {
++ cond_resched();
+ goto repeat;
++ }
+ }
+ EXPORT_SYMBOL(dput);
+
+diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
+index 3020fd70c392..1ea505434a6e 100644
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb,
+ memset(bh->b_data, 0, sb->s_blocksize);
+
+ bit_max = ext4_num_base_meta_clusters(sb, block_group);
++ if ((bit_max >> 3) >= bh->b_size)
++ return -EFSCORRUPTED;
++
+ for (bit = 0; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 2a2eef9c14e4..d7ccb7f51dfc 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -381,9 +381,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
+ ext4_fsblk_t block = ext4_ext_pblock(ext);
+ int len = ext4_ext_get_actual_len(ext);
+ ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+- ext4_lblk_t last = lblock + len - 1;
+
+- if (len == 0 || lblock > last)
++ /*
++ * We allow neither:
++ * - zero length
++ * - overflow/wrap-around
++ */
++ if (lblock + len <= lblock)
+ return 0;
+ return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ }
+@@ -474,6 +478,10 @@ static int __ext4_ext_check(const char *function, unsigned int line,
+ error_msg = "invalid extent entries";
+ goto corrupted;
+ }
++ if (unlikely(depth > 32)) {
++ error_msg = "too large eh_depth";
++ goto corrupted;
++ }
+ /* Verify checksum on non-root extent tree nodes */
+ if (ext_depth(inode) != depth &&
+ !ext4_extent_block_csum_verify(inode, eh)) {
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index f7140ca66e3b..b747ec09c1ac 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inode)
+ * Note that directories do not have this problem because they
+ * don't use page cache.
+ */
+- if (ext4_should_journal_data(inode) &&
+- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+- inode->i_ino != EXT4_JOURNAL_INO) {
++ if (inode->i_ino != EXT4_JOURNAL_INO &&
++ ext4_should_journal_data(inode) &&
++ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+ tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
+
+@@ -2748,13 +2748,36 @@ retry:
+ done = true;
+ }
+ }
+- ext4_journal_stop(handle);
++ /*
++ * Caution: If the handle is synchronous,
++ * ext4_journal_stop() can wait for transaction commit
++ * to finish which may depend on writeback of pages to
++ * complete or on page lock to be released. In that
++ * case, we have to wait until after after we have
++ * submitted all the IO, released page locks we hold,
++ * and dropped io_end reference (for extent conversion
++ * to be able to complete) before stopping the handle.
++ */
++ if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
++ ext4_journal_stop(handle);
++ handle = NULL;
++ }
+ /* Submit prepared bio */
+ ext4_io_submit(&mpd.io_submit);
+ /* Unlock pages we didn't use */
+ mpage_release_unused_pages(&mpd, give_up_on_write);
+- /* Drop our io_end reference we got from init */
+- ext4_put_io_end(mpd.io_submit.io_end);
++ /*
++ * Drop our io_end reference we got from init. We have
++ * to be careful and use deferred io_end finishing if
++ * we are still holding the transaction as we can
++ * release the last reference to io_end which may end
++ * up doing unwritten extent conversion.
++ */
++ if (handle) {
++ ext4_put_io_end_defer(mpd.io_submit.io_end);
++ ext4_journal_stop(handle);
++ } else
++ ext4_put_io_end(mpd.io_submit.io_end);
+
+ if (ret == -ENOSPC && sbi->s_journal) {
+ /*
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index c1ab3ec30423..7f42eda52523 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2939,7 +2939,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
+ "fs metadata", block, block+len);
+ /* File system mounted not to panic on error
+- * Fix the bitmap and repeat the block allocation
++ * Fix the bitmap and return EFSCORRUPTED
+ * We leak some of the blocks here.
+ */
+ ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+@@ -2948,7 +2948,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+ if (!err)
+- err = -EAGAIN;
++ err = -EFSCORRUPTED;
+ goto out_err;
+ }
+
+@@ -4513,18 +4513,7 @@ repeat:
+ }
+ if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+ *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
+- if (*errp == -EAGAIN) {
+- /*
+- * drop the reference that we took
+- * in ext4_mb_use_best_found
+- */
+- ext4_mb_release_context(ac);
+- ac->ac_b_ex.fe_group = 0;
+- ac->ac_b_ex.fe_start = 0;
+- ac->ac_b_ex.fe_len = 0;
+- ac->ac_status = AC_STATUS_CONTINUE;
+- goto repeat;
+- } else if (*errp) {
++ if (*errp) {
+ ext4_discard_allocated_blocks(ac);
+ goto errout;
+ } else {
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 3822a5aedc61..639bd756a8d8 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2278,6 +2278,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
+ while (es->s_last_orphan) {
+ struct inode *inode;
+
++ /*
++ * We may have encountered an error during cleanup; if
++ * so, skip the rest.
++ */
++ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
++ jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
++ es->s_last_orphan = 0;
++ break;
++ }
++
+ inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
+ if (IS_ERR(inode)) {
+ es->s_last_orphan = 0;
+@@ -3416,6 +3426,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ goto failed_mount;
+ }
+
++ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
++ ext4_msg(sb, KERN_ERR,
++ "Number of reserved GDT blocks insanely large: %d",
++ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
++ goto failed_mount;
++ }
++
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ err = bdev_dax_supported(sb, blocksize);
+ if (err)
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index 9154f8679024..6cac3dc33521 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -417,6 +417,15 @@ static int fuse_flush(struct file *file, fl_owner_t id)
+ fuse_sync_writes(inode);
+ inode_unlock(inode);
+
++ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
++ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
++ err = -ENOSPC;
++ if (test_bit(AS_EIO, &file->f_mapping->flags) &&
++ test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
++ err = -EIO;
++ if (err)
++ return err;
++
+ req = fuse_get_req_nofail_nopages(fc, file);
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+@@ -462,6 +471,21 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
+ goto out;
+
+ fuse_sync_writes(inode);
++
++ /*
++ * Due to implementation of fuse writeback
++ * filemap_write_and_wait_range() does not catch errors.
++ * We have to do this directly after fuse_sync_writes()
++ */
++ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
++ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
++ err = -ENOSPC;
++ if (test_bit(AS_EIO, &file->f_mapping->flags) &&
++ test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
++ err = -EIO;
++ if (err)
++ goto out;
++
+ err = sync_inode_metadata(inode, 1);
+ if (err)
+ goto out;
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index 9961d8432ce3..9b7cb37b4ba8 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -942,7 +942,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
+ arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+ FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+ FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
+- FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
++ FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+ FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
+ FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
+ FUSE_PARALLEL_DIROPS;
+diff --git a/fs/inode.c b/fs/inode.c
+index 4ccbc21b30ce..9ea421948742 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -345,7 +345,7 @@ EXPORT_SYMBOL(inc_nlink);
+ void address_space_init_once(struct address_space *mapping)
+ {
+ memset(mapping, 0, sizeof(*mapping));
+- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
++ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
+ spin_lock_init(&mapping->tree_lock);
+ init_rwsem(&mapping->i_mmap_rwsem);
+ INIT_LIST_HEAD(&mapping->private_list);
+@@ -1740,8 +1740,8 @@ static int __remove_privs(struct dentry *dentry, int kill)
+ */
+ int file_remove_privs(struct file *file)
+ {
+- struct dentry *dentry = file->f_path.dentry;
+- struct inode *inode = d_inode(dentry);
++ struct dentry *dentry = file_dentry(file);
++ struct inode *inode = file_inode(file);
+ int kill;
+ int error = 0;
+
+@@ -1749,7 +1749,7 @@ int file_remove_privs(struct file *file)
+ if (IS_NOSEC(inode))
+ return 0;
+
+- kill = file_needs_remove_privs(file);
++ kill = dentry_needs_remove_privs(dentry);
+ if (kill < 0)
+ return kill;
+ if (kill)
+diff --git a/fs/ioctl.c b/fs/ioctl.c
+index 116a333e9c77..0f56deb24ce6 100644
+--- a/fs/ioctl.c
++++ b/fs/ioctl.c
+@@ -590,6 +590,7 @@ static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+ goto out;
+ }
+
++ same->dest_count = count;
+ ret = vfs_dedupe_file_range(file, same);
+ if (ret)
+ goto out;
+diff --git a/ipc/msg.c b/ipc/msg.c
+index 1471db9a7e61..c6521c205cb4 100644
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
+ rcu_read_lock();
+ ipc_lock_object(&msq->q_perm);
+
+- ipc_rcu_putref(msq, ipc_rcu_free);
++ ipc_rcu_putref(msq, msg_rcu_free);
+ /* raced with RMID? */
+ if (!ipc_valid_object(&msq->q_perm)) {
+ err = -EIDRM;
+diff --git a/ipc/sem.c b/ipc/sem.c
+index b3757ea0694b..5d2f875e8e2e 100644
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -449,7 +449,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
+ static inline void sem_lock_and_putref(struct sem_array *sma)
+ {
+ sem_lock(sma, NULL, -1);
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ }
+
+ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+@@ -1392,7 +1392,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+ rcu_read_unlock();
+ sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ if (sem_io == NULL) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ return -ENOMEM;
+ }
+
+@@ -1426,20 +1426,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+ if (nsems > SEMMSL_FAST) {
+ sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ if (sem_io == NULL) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ return -ENOMEM;
+ }
+ }
+
+ if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ err = -EFAULT;
+ goto out_free;
+ }
+
+ for (i = 0; i < nsems; i++) {
+ if (sem_io[i] > SEMVMX) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ err = -ERANGE;
+ goto out_free;
+ }
+@@ -1731,7 +1731,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
+ /* step 2: allocate new undo structure */
+ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+ if (!new) {
+- ipc_rcu_putref(sma, ipc_rcu_free);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ return ERR_PTR(-ENOMEM);
+ }
+
+diff --git a/lib/radix-tree.c b/lib/radix-tree.c
+index 8b7d8459bb9d..bc7852f95443 100644
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -274,10 +274,11 @@ radix_tree_node_alloc(struct radix_tree_root *root)
+
+ /*
+ * Even if the caller has preloaded, try to allocate from the
+- * cache first for the new node to get accounted.
++ * cache first for the new node to get accounted to the memory
++ * cgroup.
+ */
+ ret = kmem_cache_alloc(radix_tree_node_cachep,
+- gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN);
++ gfp_mask | __GFP_NOWARN);
+ if (ret)
+ goto out;
+
+@@ -300,8 +301,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
+ kmemleak_update_trace(ret);
+ goto out;
+ }
+- ret = kmem_cache_alloc(radix_tree_node_cachep,
+- gfp_mask | __GFP_ACCOUNT);
++ ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+ out:
+ BUG_ON(radix_tree_is_internal_node(ret));
+ return ret;
+@@ -348,6 +348,12 @@ static int __radix_tree_preload(gfp_t gfp_mask)
+ struct radix_tree_node *node;
+ int ret = -ENOMEM;
+
++ /*
++ * Nodes preloaded by one cgroup can be be used by another cgroup, so
++ * they should never be accounted to any particular memory cgroup.
++ */
++ gfp_mask &= ~__GFP_ACCOUNT;
++
+ preempt_disable();
+ rtp = this_cpu_ptr(&radix_tree_preloads);
+ while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 5339c89dff63..ca847d96a980 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -4083,14 +4083,32 @@ static struct cftype mem_cgroup_legacy_files[] = {
+
+ static DEFINE_IDR(mem_cgroup_idr);
+
+-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
++static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
+ {
+- atomic_inc(&memcg->id.ref);
++ atomic_add(n, &memcg->id.ref);
+ }
+
+-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
++static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
+ {
+- if (atomic_dec_and_test(&memcg->id.ref)) {
++ while (!atomic_inc_not_zero(&memcg->id.ref)) {
++ /*
++ * The root cgroup cannot be destroyed, so it's refcount must
++ * always be >= 1.
++ */
++ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
++ VM_BUG_ON(1);
++ break;
++ }
++ memcg = parent_mem_cgroup(memcg);
++ if (!memcg)
++ memcg = root_mem_cgroup;
++ }
++ return memcg;
++}
++
++static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
++{
++ if (atomic_sub_and_test(n, &memcg->id.ref)) {
+ idr_remove(&mem_cgroup_idr, memcg->id.id);
+ memcg->id.id = 0;
+
+@@ -4099,6 +4117,16 @@ static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+ }
+ }
+
++static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
++{
++ mem_cgroup_id_get_many(memcg, 1);
++}
++
++static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
++{
++ mem_cgroup_id_put_many(memcg, 1);
++}
++
+ /**
+ * mem_cgroup_from_id - look up a memcg from a memcg id
+ * @id: the memcg id to look up
+@@ -4736,6 +4764,8 @@ static void __mem_cgroup_clear_mc(void)
+ if (!mem_cgroup_is_root(mc.from))
+ page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
+
++ mem_cgroup_id_put_many(mc.from, mc.moved_swap);
++
+ /*
+ * we charged both to->memory and to->memsw, so we
+ * should uncharge to->memory.
+@@ -4743,9 +4773,9 @@ static void __mem_cgroup_clear_mc(void)
+ if (!mem_cgroup_is_root(mc.to))
+ page_counter_uncharge(&mc.to->memory, mc.moved_swap);
+
+- css_put_many(&mc.from->css, mc.moved_swap);
++ mem_cgroup_id_get_many(mc.to, mc.moved_swap);
++ css_put_many(&mc.to->css, mc.moved_swap);
+
+- /* we've already done css_get(mc.to) */
+ mc.moved_swap = 0;
+ }
+ memcg_oom_recover(from);
+@@ -5805,7 +5835,7 @@ subsys_initcall(mem_cgroup_init);
+ */
+ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ {
+- struct mem_cgroup *memcg;
++ struct mem_cgroup *memcg, *swap_memcg;
+ unsigned short oldid;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+@@ -5820,16 +5850,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ if (!memcg)
+ return;
+
+- mem_cgroup_id_get(memcg);
+- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
++ /*
++ * In case the memcg owning these pages has been offlined and doesn't
++ * have an ID allocated to it anymore, charge the closest online
++ * ancestor for the swap instead and transfer the memory+swap charge.
++ */
++ swap_memcg = mem_cgroup_id_get_online(memcg);
++ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
+ VM_BUG_ON_PAGE(oldid, page);
+- mem_cgroup_swap_statistics(memcg, true);
++ mem_cgroup_swap_statistics(swap_memcg, true);
+
+ page->mem_cgroup = NULL;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
+
++ if (memcg != swap_memcg) {
++ if (!mem_cgroup_is_root(swap_memcg))
++ page_counter_charge(&swap_memcg->memsw, 1);
++ page_counter_uncharge(&memcg->memsw, 1);
++ }
++
+ /*
+ * Interrupts should be disabled here because the caller holds the
+ * mapping->tree_lock lock which is taken with interrupts-off. It is
+@@ -5868,11 +5909,14 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+ if (!memcg)
+ return 0;
+
++ memcg = mem_cgroup_id_get_online(memcg);
++
+ if (!mem_cgroup_is_root(memcg) &&
+- !page_counter_try_charge(&memcg->swap, 1, &counter))
++ !page_counter_try_charge(&memcg->swap, 1, &counter)) {
++ mem_cgroup_id_put(memcg);
+ return -ENOMEM;
++ }
+
+- mem_cgroup_id_get(memcg);
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
+diff --git a/mm/mempool.c b/mm/mempool.c
+index 8f65464da5de..47a659dedd44 100644
+--- a/mm/mempool.c
++++ b/mm/mempool.c
+@@ -306,7 +306,7 @@ EXPORT_SYMBOL(mempool_resize);
+ * returns NULL. Note that due to preallocation, this function
+ * *never* fails when called from process contexts. (it might
+ * fail if called from an IRQ context.)
+- * Note: neither __GFP_NOMEMALLOC nor __GFP_ZERO are supported.
++ * Note: using __GFP_ZERO is not supported.
+ */
+ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+ {
+@@ -315,27 +315,16 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+ wait_queue_t wait;
+ gfp_t gfp_temp;
+
+- /* If oom killed, memory reserves are essential to prevent livelock */
+- VM_WARN_ON_ONCE(gfp_mask & __GFP_NOMEMALLOC);
+- /* No element size to zero on allocation */
+ VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
+-
+ might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
+
++ gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
+ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
+ gfp_mask |= __GFP_NOWARN; /* failures are OK */
+
+ gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
+
+ repeat_alloc:
+- if (likely(pool->curr_nr)) {
+- /*
+- * Don't allocate from emergency reserves if there are
+- * elements available. This check is racy, but it will
+- * be rechecked each loop.
+- */
+- gfp_temp |= __GFP_NOMEMALLOC;
+- }
+
+ element = pool->alloc(gfp_temp, pool->pool_data);
+ if (likely(element != NULL))
+@@ -359,12 +348,11 @@ repeat_alloc:
+ * We use gfp mask w/o direct reclaim or IO for the first round. If
+ * alloc failed with that and @pool was empty, retry immediately.
+ */
+- if ((gfp_temp & ~__GFP_NOMEMALLOC) != gfp_mask) {
++ if (gfp_temp != gfp_mask) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ gfp_temp = gfp_mask;
+ goto repeat_alloc;
+ }
+- gfp_temp = gfp_mask;
+
+ /* We must not sleep if !__GFP_DIRECT_RECLAIM */
+ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
+diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
+index 43d2cd862bc2..28d5ec269e48 100644
+--- a/net/bridge/br_input.c
++++ b/net/bridge/br_input.c
+@@ -288,6 +288,14 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
+ case 0x01: /* IEEE MAC (Pause) */
+ goto drop;
+
++ case 0x0E: /* 802.1AB LLDP */
++ fwd_mask |= p->br->group_fwd_mask;
++ if (fwd_mask & (1u << dest[5]))
++ goto forward;
++ *pskb = skb;
++ __br_handle_local_finish(skb);
++ return RX_HANDLER_PASS;
++
+ default:
+ /* Allow selective forwarding for most other protocols */
+ fwd_mask |= p->br->group_fwd_mask;
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index e00e972c4e6a..700b72ca5912 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -236,7 +236,8 @@ void tcp_select_initial_window(int __space, __u32 mss,
+ /* Set window scaling on max possible window
+ * See RFC1323 for an explanation of the limit to 14
+ */
+- space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
++ space = max_t(u32, space, sysctl_tcp_rmem[2]);
++ space = max_t(u32, space, sysctl_rmem_max);
+ space = min_t(u32, space, *window_clamp);
+ while (space > 65535 && (*rcv_wscale) < 14) {
+ space >>= 1;
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 4aed8fc23d32..e61f7cd65d08 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1581,9 +1581,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ udp_lib_checksum_complete(skb))
+ goto csum_error;
+
+- if (sk_filter(sk, skb))
+- goto drop;
+- if (unlikely(skb->len < sizeof(struct udphdr)))
++ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
+ goto drop;
+
+ udp_csum_pull_header(skb);
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 47f837a58e0a..047c75a798b1 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3562,6 +3562,10 @@ restart:
+ if (state != INET6_IFADDR_STATE_DEAD) {
+ __ipv6_ifa_notify(RTM_DELADDR, ifa);
+ inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
++ } else {
++ if (idev->cnf.forwarding)
++ addrconf_leave_anycast(ifa);
++ addrconf_leave_solict(ifa->idev, &ifa->addr);
+ }
+
+ write_lock_bh(&idev->lock);
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index acc09705618b..42a2edf7c9ef 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -618,9 +618,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ udp_lib_checksum_complete(skb))
+ goto csum_error;
+
+- if (sk_filter(sk, skb))
+- goto drop;
+- if (unlikely(skb->len < sizeof(struct udphdr)))
++ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
+ goto drop;
+
+ udp_csum_pull_header(skb);
+diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
+index 923abd6b3064..8d2f7c9b491d 100644
+--- a/net/irda/af_irda.c
++++ b/net/irda/af_irda.c
+@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
+ }
+
+ /* Check if we have opened a local TSAP */
+- if (!self->tsap)
+- irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++ if (!self->tsap) {
++ err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++ if (err)
++ goto out;
++ }
+
+ /* Move to connecting socket, start sending Connect Requests */
+ sock->state = SS_CONNECTING;
+diff --git a/net/sctp/input.c b/net/sctp/input.c
+index 47cf4604d19c..f093322560e6 100644
+--- a/net/sctp/input.c
++++ b/net/sctp/input.c
+@@ -328,6 +328,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+ */
+
+ sk = rcvr->sk;
++ local_bh_disable();
+ bh_lock_sock(sk);
+
+ if (sock_owned_by_user(sk)) {
+@@ -339,6 +340,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+ sctp_inq_push(inqueue, chunk);
+
+ bh_unlock_sock(sk);
++ local_bh_enable();
+
+ /* If the chunk was backloged again, don't drop refs */
+ if (backloged)
+diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
+index 9d87bba0ff1d..b335ffcef0b9 100644
+--- a/net/sctp/inqueue.c
++++ b/net/sctp/inqueue.c
+@@ -89,12 +89,10 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
+ * Eventually, we should clean up inqueue to not rely
+ * on the BH related data structures.
+ */
+- local_bh_disable();
+ list_add_tail(&chunk->list, &q->in_chunk_list);
+ if (chunk->asoc)
+ chunk->asoc->stats.ipackets++;
+ q->immediate.func(&q->immediate);
+- local_bh_enable();
+ }
+
+ /* Peek at the next chunk on the inqeue. */
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index 67154b848aa9..7f5689a93de9 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -4301,6 +4301,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter)
+
+ err = rhashtable_walk_start(iter);
+ if (err && err != -EAGAIN) {
++ rhashtable_walk_stop(iter);
+ rhashtable_walk_exit(iter);
+ return err;
+ }
+diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
+index ad4fa49ad1db..9068369f8a1b 100644
+--- a/security/apparmor/apparmorfs.c
++++ b/security/apparmor/apparmorfs.c
+@@ -331,6 +331,7 @@ static int aa_fs_seq_hash_show(struct seq_file *seq, void *v)
+ seq_printf(seq, "%.2x", profile->hash[i]);
+ seq_puts(seq, "\n");
+ }
++ aa_put_profile(profile);
+
+ return 0;
+ }
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-08-20 16:33 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-08-20 16:33 UTC (permalink / raw
To: gentoo-commits
commit: 418c531440e923758b1f19cbe6adb30954ecccbb
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sat Aug 20 16:33:11 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sat Aug 20 16:33:11 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=418c5314
Linux patch 4.7.2
0000_README | 4 +
1001_linux-4.7.2.patch | 7668 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 7672 insertions(+)
diff --git a/0000_README b/0000_README
index 0aa3187..23c35b0 100644
--- a/0000_README
+++ b/0000_README
@@ -47,6 +47,10 @@ Patch: 1000_linux-4.7.1.patch
From: http://www.kernel.org
Desc: Linux 4.7.1
+Patch: 1001_linux-4.7.2.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.2
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1001_linux-4.7.2.patch b/1001_linux-4.7.2.patch
new file mode 100644
index 0000000..e9febe7
--- /dev/null
+++ b/1001_linux-4.7.2.patch
@@ -0,0 +1,7668 @@
+diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
+index 696d5caf4fd8..f0e3361db20c 100644
+--- a/Documentation/module-signing.txt
++++ b/Documentation/module-signing.txt
+@@ -271,3 +271,9 @@ Since the private key is used to sign modules, viruses and malware could use
+ the private key to sign modules and compromise the operating system. The
+ private key must be either destroyed or moved to a secure location and not kept
+ in the root node of the kernel source tree.
++
++If you use the same private key to sign modules for multiple kernel
++configurations, you must ensure that the module version information is
++sufficient to prevent loading a module into a different kernel. Either
++set CONFIG_MODVERSIONS=y or ensure that each configuration has a different
++kernel release string by changing EXTRAVERSION or CONFIG_LOCALVERSION.
+diff --git a/Makefile b/Makefile
+index 84335c0b2eda..bb98f1ce854e 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 1
++SUBLEVEL = 2
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
+index 858f98ef7f1b..0f92d97432a2 100644
+--- a/arch/arc/include/asm/pgtable.h
++++ b/arch/arc/include/asm/pgtable.h
+@@ -110,7 +110,7 @@
+ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
+
+ /* Set of bits not changed in pte_modify */
+-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
++#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
+
+ /* More Abbrevaited helpers */
+ #define PAGE_U_NONE __pgprot(___DEF)
+diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
+index 73d7e4c75b7d..ab74b5d9186c 100644
+--- a/arch/arc/mm/dma.c
++++ b/arch/arc/mm/dma.c
+@@ -92,7 +92,8 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
+ static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, struct dma_attrs *attrs)
+ {
+- struct page *page = virt_to_page(dma_handle);
++ phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle);
++ struct page *page = virt_to_page(paddr);
+ int is_non_coh = 1;
+
+ is_non_coh = dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs) ||
+diff --git a/arch/arm/boot/dts/arm-realview-pbx-a9.dts b/arch/arm/boot/dts/arm-realview-pbx-a9.dts
+index db808f92dd79..90d00b407f85 100644
+--- a/arch/arm/boot/dts/arm-realview-pbx-a9.dts
++++ b/arch/arm/boot/dts/arm-realview-pbx-a9.dts
+@@ -70,13 +70,12 @@
+ * associativity as these may be erroneously set
+ * up by boot loader(s).
+ */
+- cache-size = <1048576>; // 1MB
+- cache-sets = <4096>;
++ cache-size = <131072>; // 128KB
++ cache-sets = <512>;
+ cache-line-size = <32>;
+ arm,parity-disable;
+- arm,tag-latency = <1>;
+- arm,data-latency = <1 1>;
+- arm,dirty-latency = <1>;
++ arm,tag-latency = <1 1 1>;
++ arm,data-latency = <1 1 1>;
+ };
+
+ scu: scu@1f000000 {
+diff --git a/arch/arm/boot/dts/sun4i-a10-a1000.dts b/arch/arm/boot/dts/sun4i-a10-a1000.dts
+index c92a1ae33a1e..fa70b8fbf221 100644
+--- a/arch/arm/boot/dts/sun4i-a10-a1000.dts
++++ b/arch/arm/boot/dts/sun4i-a10-a1000.dts
+@@ -84,6 +84,7 @@
+ regulator-name = "emac-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
++ startup-delay-us = <20000>;
+ enable-active-high;
+ gpio = <&pio 7 15 GPIO_ACTIVE_HIGH>;
+ };
+diff --git a/arch/arm/boot/dts/sun4i-a10-hackberry.dts b/arch/arm/boot/dts/sun4i-a10-hackberry.dts
+index 2b17c5199151..6de83a6187d0 100644
+--- a/arch/arm/boot/dts/sun4i-a10-hackberry.dts
++++ b/arch/arm/boot/dts/sun4i-a10-hackberry.dts
+@@ -66,6 +66,7 @@
+ regulator-name = "emac-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
++ startup-delay-us = <20000>;
+ enable-active-high;
+ gpio = <&pio 7 19 GPIO_ACTIVE_HIGH>;
+ };
+diff --git a/arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts b/arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts
+index 7afc7a64eef1..e28f080b1fd5 100644
+--- a/arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts
++++ b/arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts
+@@ -80,6 +80,7 @@
+ regulator-name = "emac-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
++ startup-delay-us = <20000>;
+ enable-active-high;
+ gpio = <&pio 7 19 GPIO_ACTIVE_HIGH>; /* PH19 */
+ };
+diff --git a/arch/arm/boot/dts/sun5i-a10s-wobo-i5.dts b/arch/arm/boot/dts/sun5i-a10s-wobo-i5.dts
+index 9fea918f949e..39731a78f087 100644
+--- a/arch/arm/boot/dts/sun5i-a10s-wobo-i5.dts
++++ b/arch/arm/boot/dts/sun5i-a10s-wobo-i5.dts
+@@ -79,6 +79,7 @@
+ regulator-name = "emac-3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
++ startup-delay-us = <20000>;
+ enable-active-high;
+ gpio = <&pio 0 2 GPIO_ACTIVE_HIGH>;
+ };
+diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
+index 941f36263c8f..f4d8125c1bfc 100644
+--- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts
++++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
+@@ -1386,7 +1386,7 @@
+ * Pin 41: BR_UART1_TXD
+ * Pin 44: BR_UART1_RXD
+ */
+- serial@70006000 {
++ serial@0,70006000 {
+ compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
+ status = "okay";
+ };
+@@ -1398,7 +1398,7 @@
+ * Pin 71: UART2_CTS_L
+ * Pin 74: UART2_RTS_L
+ */
+- serial@70006040 {
++ serial@0,70006040 {
+ compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
+ status = "okay";
+ };
+diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig
+index b6e54ee9bdbd..ca39c04fec6b 100644
+--- a/arch/arm/configs/aspeed_g4_defconfig
++++ b/arch/arm/configs/aspeed_g4_defconfig
+@@ -58,7 +58,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
+ # CONFIG_IOMMU_SUPPORT is not set
+ CONFIG_FIRMWARE_MEMMAP=y
+ CONFIG_FANOTIFY=y
+-CONFIG_PRINTK_TIME=1
++CONFIG_PRINTK_TIME=y
+ CONFIG_DYNAMIC_DEBUG=y
+ CONFIG_STRIP_ASM_SYMS=y
+ CONFIG_PAGE_POISONING=y
+diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig
+index 892605167357..4f366b0370e9 100644
+--- a/arch/arm/configs/aspeed_g5_defconfig
++++ b/arch/arm/configs/aspeed_g5_defconfig
+@@ -59,7 +59,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
+ # CONFIG_IOMMU_SUPPORT is not set
+ CONFIG_FIRMWARE_MEMMAP=y
+ CONFIG_FANOTIFY=y
+-CONFIG_PRINTK_TIME=1
++CONFIG_PRINTK_TIME=y
+ CONFIG_DYNAMIC_DEBUG=y
+ CONFIG_STRIP_ASM_SYMS=y
+ CONFIG_PAGE_POISONING=y
+diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
+index ff7ed5697d3e..d2485c749ad5 100644
+--- a/arch/arm/mm/dma-mapping.c
++++ b/arch/arm/mm/dma-mapping.c
+@@ -49,6 +49,7 @@ struct arm_dma_alloc_args {
+ pgprot_t prot;
+ const void *caller;
+ bool want_vaddr;
++ int coherent_flag;
+ };
+
+ struct arm_dma_free_args {
+@@ -59,6 +60,9 @@ struct arm_dma_free_args {
+ bool want_vaddr;
+ };
+
++#define NORMAL 0
++#define COHERENT 1
++
+ struct arm_dma_allocator {
+ void *(*alloc)(struct arm_dma_alloc_args *args,
+ struct page **ret_page);
+@@ -272,7 +276,7 @@ static u64 get_coherent_dma_mask(struct device *dev)
+ return mask;
+ }
+
+-static void __dma_clear_buffer(struct page *page, size_t size)
++static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag)
+ {
+ /*
+ * Ensure that the allocated pages are zeroed, and that any data
+@@ -284,17 +288,21 @@ static void __dma_clear_buffer(struct page *page, size_t size)
+ while (size > 0) {
+ void *ptr = kmap_atomic(page);
+ memset(ptr, 0, PAGE_SIZE);
+- dmac_flush_range(ptr, ptr + PAGE_SIZE);
++ if (coherent_flag != COHERENT)
++ dmac_flush_range(ptr, ptr + PAGE_SIZE);
+ kunmap_atomic(ptr);
+ page++;
+ size -= PAGE_SIZE;
+ }
+- outer_flush_range(base, end);
++ if (coherent_flag != COHERENT)
++ outer_flush_range(base, end);
+ } else {
+ void *ptr = page_address(page);
+ memset(ptr, 0, size);
+- dmac_flush_range(ptr, ptr + size);
+- outer_flush_range(__pa(ptr), __pa(ptr) + size);
++ if (coherent_flag != COHERENT) {
++ dmac_flush_range(ptr, ptr + size);
++ outer_flush_range(__pa(ptr), __pa(ptr) + size);
++ }
+ }
+ }
+
+@@ -302,7 +310,8 @@ static void __dma_clear_buffer(struct page *page, size_t size)
+ * Allocate a DMA buffer for 'dev' of size 'size' using the
+ * specified gfp mask. Note that 'size' must be page aligned.
+ */
+-static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp)
++static struct page *__dma_alloc_buffer(struct device *dev, size_t size,
++ gfp_t gfp, int coherent_flag)
+ {
+ unsigned long order = get_order(size);
+ struct page *page, *p, *e;
+@@ -318,7 +327,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf
+ for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
+ __free_page(p);
+
+- __dma_clear_buffer(page, size);
++ __dma_clear_buffer(page, size, coherent_flag);
+
+ return page;
+ }
+@@ -340,7 +349,8 @@ static void __dma_free_buffer(struct page *page, size_t size)
+
+ static void *__alloc_from_contiguous(struct device *dev, size_t size,
+ pgprot_t prot, struct page **ret_page,
+- const void *caller, bool want_vaddr);
++ const void *caller, bool want_vaddr,
++ int coherent_flag);
+
+ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
+ pgprot_t prot, struct page **ret_page,
+@@ -405,10 +415,13 @@ static int __init atomic_pool_init(void)
+ atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
+ if (!atomic_pool)
+ goto out;
+-
++ /*
++ * The atomic pool is only used for non-coherent allocations
++ * so we must pass NORMAL for coherent_flag.
++ */
+ if (dev_get_cma_area(NULL))
+ ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
+- &page, atomic_pool_init, true);
++ &page, atomic_pool_init, true, NORMAL);
+ else
+ ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
+ &page, atomic_pool_init, true);
+@@ -522,7 +535,11 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
+ {
+ struct page *page;
+ void *ptr = NULL;
+- page = __dma_alloc_buffer(dev, size, gfp);
++ /*
++ * __alloc_remap_buffer is only called when the device is
++ * non-coherent
++ */
++ page = __dma_alloc_buffer(dev, size, gfp, NORMAL);
+ if (!page)
+ return NULL;
+ if (!want_vaddr)
+@@ -577,7 +594,8 @@ static int __free_from_pool(void *start, size_t size)
+
+ static void *__alloc_from_contiguous(struct device *dev, size_t size,
+ pgprot_t prot, struct page **ret_page,
+- const void *caller, bool want_vaddr)
++ const void *caller, bool want_vaddr,
++ int coherent_flag)
+ {
+ unsigned long order = get_order(size);
+ size_t count = size >> PAGE_SHIFT;
+@@ -588,7 +606,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
+ if (!page)
+ return NULL;
+
+- __dma_clear_buffer(page, size);
++ __dma_clear_buffer(page, size, coherent_flag);
+
+ if (!want_vaddr)
+ goto out;
+@@ -638,7 +656,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
+ #define __get_dma_pgprot(attrs, prot) __pgprot(0)
+ #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL
+ #define __alloc_from_pool(size, ret_page) NULL
+-#define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL
++#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag) NULL
+ #define __free_from_pool(cpu_addr, size) do { } while (0)
+ #define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0)
+ #define __dma_free_remap(cpu_addr, size) do { } while (0)
+@@ -649,7 +667,8 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
+ struct page **ret_page)
+ {
+ struct page *page;
+- page = __dma_alloc_buffer(dev, size, gfp);
++ /* __alloc_simple_buffer is only called when the device is coherent */
++ page = __dma_alloc_buffer(dev, size, gfp, COHERENT);
+ if (!page)
+ return NULL;
+
+@@ -679,7 +698,7 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
+ {
+ return __alloc_from_contiguous(args->dev, args->size, args->prot,
+ ret_page, args->caller,
+- args->want_vaddr);
++ args->want_vaddr, args->coherent_flag);
+ }
+
+ static void cma_allocator_free(struct arm_dma_free_args *args)
+@@ -746,6 +765,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+ .prot = prot,
+ .caller = caller,
+ .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
++ .coherent_flag = is_coherent ? COHERENT : NORMAL,
+ };
+
+ #ifdef CONFIG_DMA_API_DEBUG
+@@ -1253,7 +1273,8 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping,
+ static const int iommu_order_array[] = { 9, 8, 4, 0 };
+
+ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
+- gfp_t gfp, struct dma_attrs *attrs)
++ gfp_t gfp, struct dma_attrs *attrs,
++ int coherent_flag)
+ {
+ struct page **pages;
+ int count = size >> PAGE_SHIFT;
+@@ -1277,7 +1298,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
+ if (!page)
+ goto error;
+
+- __dma_clear_buffer(page, size);
++ __dma_clear_buffer(page, size, coherent_flag);
+
+ for (i = 0; i < count; i++)
+ pages[i] = page + i;
+@@ -1327,7 +1348,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
+ pages[i + j] = pages[i] + j;
+ }
+
+- __dma_clear_buffer(pages[i], PAGE_SIZE << order);
++ __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag);
+ i += 1 << order;
+ count -= 1 << order;
+ }
+@@ -1505,7 +1526,8 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
+ */
+ gfp &= ~(__GFP_COMP);
+
+- pages = __iommu_alloc_buffer(dev, size, gfp, attrs);
++ /* For now always consider we are in a non-coherent case */
++ pages = __iommu_alloc_buffer(dev, size, gfp, attrs, NORMAL);
+ if (!pages)
+ return NULL;
+
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index 5a0a691d4220..20384925bb0f 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -872,7 +872,7 @@ config RELOCATABLE
+
+ config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
+- select ARM64_MODULE_PLTS
++ select ARM64_MODULE_PLTS if MODULES
+ select RELOCATABLE
+ help
+ Randomizes the virtual address at which the kernel image is
+diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+index 8b4a7c9154e9..080203e3aa2f 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+@@ -670,7 +670,7 @@
+ #address-cells = <0>;
+
+ reg = <0x0 0xffb71000 0x0 0x1000>,
+- <0x0 0xffb72000 0x0 0x1000>,
++ <0x0 0xffb72000 0x0 0x2000>,
+ <0x0 0xffb74000 0x0 0x2000>,
+ <0x0 0xffb76000 0x0 0x2000>;
+ interrupts = <GIC_PPI 9
+diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
+index 4fbf3c54275c..0800d23e2fdd 100644
+--- a/arch/arm64/kernel/debug-monitors.c
++++ b/arch/arm64/kernel/debug-monitors.c
+@@ -151,7 +151,6 @@ static int debug_monitors_init(void)
+ /* Clear the OS lock. */
+ on_each_cpu(clear_os_lock, NULL, 1);
+ isb();
+- local_dbg_enable();
+
+ /* Register hotplug handler. */
+ __register_cpu_notifier(&os_lock_nb);
+diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
+index 21ab5df9fa76..65d81f965e74 100644
+--- a/arch/arm64/kernel/hibernate.c
++++ b/arch/arm64/kernel/hibernate.c
+@@ -35,6 +35,7 @@
+ #include <asm/sections.h>
+ #include <asm/smp.h>
+ #include <asm/suspend.h>
++#include <asm/sysreg.h>
+ #include <asm/virt.h>
+
+ /*
+@@ -217,12 +218,22 @@ static int create_safe_exec_page(void *src_start, size_t length,
+ set_pte(pte, __pte(virt_to_phys((void *)dst) |
+ pgprot_val(PAGE_KERNEL_EXEC)));
+
+- /* Load our new page tables */
+- asm volatile("msr ttbr0_el1, %0;"
+- "isb;"
+- "tlbi vmalle1is;"
+- "dsb ish;"
+- "isb" : : "r"(virt_to_phys(pgd)));
++ /*
++ * Load our new page tables. A strict BBM approach requires that we
++ * ensure that TLBs are free of any entries that may overlap with the
++ * global mappings we are about to install.
++ *
++ * For a real hibernate/resume cycle TTBR0 currently points to a zero
++ * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
++ * runtime services), while for a userspace-driven test_resume cycle it
++ * points to userspace page tables (and we must point it at a zero page
++ * ourselves). Elsewhere we only (un)install the idmap with preemption
++ * disabled, so T0SZ should be as required regardless.
++ */
++ cpu_set_reserved_ttbr0();
++ local_flush_tlb_all();
++ write_sysreg(virt_to_phys(pgd), ttbr0_el1);
++ isb();
+
+ *phys_dst_addr = virt_to_phys((void *)dst);
+
+@@ -394,6 +405,38 @@ int swsusp_arch_resume(void)
+ void *, phys_addr_t, phys_addr_t);
+
+ /*
++ * Restoring the memory image will overwrite the ttbr1 page tables.
++ * Create a second copy of just the linear map, and use this when
++ * restoring.
++ */
++ tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
++ if (!tmp_pg_dir) {
++ pr_err("Failed to allocate memory for temporary page tables.");
++ rc = -ENOMEM;
++ goto out;
++ }
++ rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
++ if (rc)
++ goto out;
++
++ /*
++ * Since we only copied the linear map, we need to find restore_pblist's
++ * linear map address.
++ */
++ lm_restore_pblist = LMADDR(restore_pblist);
++
++ /*
++ * We need a zero page that is zero before & after resume in order to
++ * to break before make on the ttbr1 page tables.
++ */
++ zero_page = (void *)get_safe_page(GFP_ATOMIC);
++ if (!zero_page) {
++ pr_err("Failed to allocate zero page.");
++ rc = -ENOMEM;
++ goto out;
++ }
++
++ /*
+ * Locate the exit code in the bottom-but-one page, so that *NULL
+ * still has disastrous affects.
+ */
+@@ -419,27 +462,6 @@ int swsusp_arch_resume(void)
+ __flush_dcache_area(hibernate_exit, exit_size);
+
+ /*
+- * Restoring the memory image will overwrite the ttbr1 page tables.
+- * Create a second copy of just the linear map, and use this when
+- * restoring.
+- */
+- tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+- if (!tmp_pg_dir) {
+- pr_err("Failed to allocate memory for temporary page tables.");
+- rc = -ENOMEM;
+- goto out;
+- }
+- rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+- if (rc)
+- goto out;
+-
+- /*
+- * Since we only copied the linear map, we need to find restore_pblist's
+- * linear map address.
+- */
+- lm_restore_pblist = LMADDR(restore_pblist);
+-
+- /*
+ * KASLR will cause the el2 vectors to be in a different location in
+ * the resumed kernel. Load hibernate's temporary copy into el2.
+ *
+@@ -453,12 +475,6 @@ int swsusp_arch_resume(void)
+ __hyp_set_vectors(el2_vectors);
+ }
+
+- /*
+- * We need a zero page that is zero before & after resume in order to
+- * to break before make on the ttbr1 page tables.
+- */
+- zero_page = (void *)get_safe_page(GFP_ATOMIC);
+-
+ hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
+ resume_hdr.reenter_kernel, lm_restore_pblist,
+ resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
+diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
+index 62ff3c0622e2..490db85dec23 100644
+--- a/arch/arm64/kernel/smp.c
++++ b/arch/arm64/kernel/smp.c
+@@ -267,7 +267,6 @@ asmlinkage void secondary_start_kernel(void)
+ set_cpu_online(cpu, true);
+ complete(&cpu_running);
+
+- local_dbg_enable();
+ local_irq_enable();
+ local_async_enable();
+
+@@ -437,9 +436,9 @@ void __init smp_cpus_done(unsigned int max_cpus)
+
+ void __init smp_prepare_boot_cpu(void)
+ {
++ set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+ cpuinfo_store_boot_cpu();
+ save_boot_cpu_run_el();
+- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+ }
+
+ static u64 __init of_get_cpu_mpidr(struct device_node *dn)
+@@ -694,6 +693,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+ smp_store_cpu_info(smp_processor_id());
+
+ /*
++ * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
++ * secondary CPUs present.
++ */
++ if (max_cpus == 0)
++ return;
++
++ /*
+ * Initialise the present map (which describes the set of CPUs
+ * actually populated at the present time) and release the
+ * secondaries from the bootloader.
+diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
+index 435e820e898d..e564d4595998 100644
+--- a/arch/arm64/kernel/vmlinux.lds.S
++++ b/arch/arm64/kernel/vmlinux.lds.S
+@@ -181,9 +181,9 @@ SECTIONS
+ *(.hash)
+ }
+
+- __rela_offset = ADDR(.rela) - KIMAGE_VADDR;
++ __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR);
+ __rela_size = SIZEOF(.rela);
+- __dynsym_offset = ADDR(.dynsym) - KIMAGE_VADDR;
++ __dynsym_offset = ABSOLUTE(ADDR(.dynsym) - KIMAGE_VADDR);
+
+ . = ALIGN(SEGMENT_ALIGN);
+ __init_end = .;
+diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
+index 0f7c40eb3f53..934137647837 100644
+--- a/arch/arm64/kvm/hyp/sysreg-sr.c
++++ b/arch/arm64/kvm/hyp/sysreg-sr.c
+@@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
+ /*
+ * Non-VHE: Both host and guest must save everything.
+ *
+- * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and
+- * guest must save everything.
++ * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc,
++ * pstate, and guest must save everything.
+ */
+
+ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
+@@ -37,6 +37,7 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
+ ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
+ ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
+ ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
++ ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
+ ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
+ ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
+ ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
+@@ -61,7 +62,6 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+ ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
+ ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
+ ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
+- ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
+
+ ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
+ ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
+@@ -90,6 +90,7 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
+ write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
+ write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+ write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
++ write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
+ write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
+ write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
+@@ -114,7 +115,6 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+ write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
+ write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
+ write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
+- write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
+
+ write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
+ write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
+diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
+index e9e0e6db73f6..898c0e6aedd4 100644
+--- a/arch/arm64/kvm/inject_fault.c
++++ b/arch/arm64/kvm/inject_fault.c
+@@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
+ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
+ {
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+- bool is_aarch32;
++ bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
+ u32 esr = 0;
+
+- is_aarch32 = vcpu_mode_is_32bit(vcpu);
+-
+- *vcpu_spsr(vcpu) = cpsr;
+ *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+-
+ *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
++
+ *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
++ *vcpu_spsr(vcpu) = cpsr;
+
+ vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+
+@@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+ u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+- *vcpu_spsr(vcpu) = cpsr;
+ *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+-
+ *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
++
+ *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
++ *vcpu_spsr(vcpu) = cpsr;
+
+ /*
+ * Build an unknown exception, depending on the instruction
+diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
+index 0f85a46c3e18..3e90a2cad995 100644
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -748,9 +748,9 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
+ /*
+ * Check whether the physical FDT address is set and meets the minimum
+ * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
+- * at least 8 bytes so that we can always access the size field of the
+- * FDT header after mapping the first chunk, double check here if that
+- * is indeed the case.
++ * at least 8 bytes so that we can always access the magic and size
++ * fields of the FDT header after mapping the first chunk, double check
++ * here if that is indeed the case.
+ */
+ BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
+ if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
+@@ -778,7 +778,7 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
+ create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
+ dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
+
+- if (fdt_check_header(dt_virt) != 0)
++ if (fdt_magic(dt_virt) != FDT_MAGIC)
+ return NULL;
+
+ *size = fdt_totalsize(dt_virt);
+diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
+index c4317879b938..5bb61de23201 100644
+--- a/arch/arm64/mm/proc.S
++++ b/arch/arm64/mm/proc.S
+@@ -180,6 +180,8 @@ ENTRY(__cpu_setup)
+ msr cpacr_el1, x0 // Enable FP/ASIMD
+ mov x0, #1 << 12 // Reset mdscr_el1 and disable
+ msr mdscr_el1, x0 // access to the DCC from EL0
++ isb // Unmask debug exceptions now,
++ enable_dbg // since this is per-cpu
+ reset_pmuserenr_el0 x0 // Disable PMU access from EL0
+ /*
+ * Memory region attributes for LPAE:
+diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h
+index 0154e2807ebb..2369ad394876 100644
+--- a/arch/metag/include/asm/cmpxchg_lnkget.h
++++ b/arch/metag/include/asm/cmpxchg_lnkget.h
+@@ -73,7 +73,7 @@ static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+ " DCACHE [%2], %0\n"
+ #endif
+ "2:\n"
+- : "=&d" (temp), "=&da" (retval)
++ : "=&d" (temp), "=&d" (retval)
+ : "da" (m), "bd" (old), "da" (new)
+ : "cc"
+ );
+diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
+index e4c21bbf9422..804d2a2a19fe 100644
+--- a/arch/mips/kernel/cevt-r4k.c
++++ b/arch/mips/kernel/cevt-r4k.c
+@@ -276,12 +276,7 @@ int r4k_clockevent_init(void)
+ CLOCK_EVT_FEAT_C3STOP |
+ CLOCK_EVT_FEAT_PERCPU;
+
+- clockevent_set_clock(cd, mips_hpt_frequency);
+-
+- /* Calculate the min / max delta */
+- cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
+ min_delta = calculate_min_delta();
+- cd->min_delta_ns = clockevent_delta2ns(min_delta, cd);
+
+ cd->rating = 300;
+ cd->irq = irq;
+@@ -289,7 +284,7 @@ int r4k_clockevent_init(void)
+ cd->set_next_event = mips_next_event;
+ cd->event_handler = mips_event_handler;
+
+- clockevents_register_device(cd);
++ clockevents_config_and_register(cd, mips_hpt_frequency, min_delta, 0x7fffffff);
+
+ if (cp0_timer_irq_installed)
+ return 0;
+diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
+index 1f910563fdf6..d76275da54cb 100644
+--- a/arch/mips/kernel/csrc-r4k.c
++++ b/arch/mips/kernel/csrc-r4k.c
+@@ -23,7 +23,7 @@ static struct clocksource clocksource_mips = {
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ };
+
+-static u64 notrace r4k_read_sched_clock(void)
++static u64 __maybe_unused notrace r4k_read_sched_clock(void)
+ {
+ return read_c0_count();
+ }
+@@ -82,7 +82,9 @@ int __init init_r4k_clocksource(void)
+
+ clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
+
++#ifndef CONFIG_CPU_FREQ
+ sched_clock_register(r4k_read_sched_clock, 32, mips_hpt_frequency);
++#endif
+
+ return 0;
+ }
+diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
+index 645c8a1982a7..2b42a74ed771 100644
+--- a/arch/mips/kvm/emulate.c
++++ b/arch/mips/kvm/emulate.c
+@@ -1615,8 +1615,14 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
+
+ preempt_disable();
+ if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
+- if (kvm_mips_host_tlb_lookup(vcpu, va) < 0)
+- kvm_mips_handle_kseg0_tlb_fault(va, vcpu);
++ if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
++ kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
++ kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
++ __func__, va, vcpu, read_c0_entryhi());
++ er = EMULATE_FAIL;
++ preempt_enable();
++ goto done;
++ }
+ } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
+ KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
+ int index;
+@@ -1654,14 +1660,19 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
+ run, vcpu);
+ preempt_enable();
+ goto dont_update_pc;
+- } else {
+- /*
+- * We fault an entry from the guest tlb to the
+- * shadow host TLB
+- */
+- kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb,
+- NULL,
+- NULL);
++ }
++ /*
++ * We fault an entry from the guest tlb to the
++ * shadow host TLB
++ */
++ if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb,
++ NULL, NULL)) {
++ kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
++ __func__, va, index, vcpu,
++ read_c0_entryhi());
++ er = EMULATE_FAIL;
++ preempt_enable();
++ goto done;
+ }
+ }
+ } else {
+@@ -2625,8 +2636,13 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
+ * OK we have a Guest TLB entry, now inject it into the
+ * shadow host TLB
+ */
+- kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, NULL,
+- NULL);
++ if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb,
++ NULL, NULL)) {
++ kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
++ __func__, va, index, vcpu,
++ read_c0_entryhi());
++ er = EMULATE_FAIL;
++ }
+ }
+ }
+
+diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
+index ed021ae7867a..ad2270ff83d1 100644
+--- a/arch/mips/kvm/tlb.c
++++ b/arch/mips/kvm/tlb.c
+@@ -284,7 +284,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
+ }
+
+ gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
+- if (gfn >= kvm->arch.guest_pmap_npages) {
++ if ((gfn | 1) >= kvm->arch.guest_pmap_npages) {
+ kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
+ gfn, badvaddr);
+ kvm_mips_dump_host_tlbs();
+@@ -373,26 +373,40 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
+ unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
+ struct kvm *kvm = vcpu->kvm;
+ kvm_pfn_t pfn0, pfn1;
++ gfn_t gfn0, gfn1;
++ long tlb_lo[2];
+ int ret;
+
+- if ((tlb->tlb_hi & VPN2_MASK) == 0) {
+- pfn0 = 0;
+- pfn1 = 0;
+- } else {
+- if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo0)
+- >> PAGE_SHIFT) < 0)
+- return -1;
+-
+- if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo1)
+- >> PAGE_SHIFT) < 0)
+- return -1;
+-
+- pfn0 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo0)
+- >> PAGE_SHIFT];
+- pfn1 = kvm->arch.guest_pmap[mips3_tlbpfn_to_paddr(tlb->tlb_lo1)
+- >> PAGE_SHIFT];
++ tlb_lo[0] = tlb->tlb_lo0;
++ tlb_lo[1] = tlb->tlb_lo1;
++
++ /*
++ * The commpage address must not be mapped to anything else if the guest
++ * TLB contains entries nearby, or commpage accesses will break.
++ */
++ if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) &
++ VPN2_MASK & (PAGE_MASK << 1)))
++ tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0;
++
++ gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT;
++ gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT;
++ if (gfn0 >= kvm->arch.guest_pmap_npages ||
++ gfn1 >= kvm->arch.guest_pmap_npages) {
++ kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n",
++ __func__, gfn0, gfn1, tlb->tlb_hi);
++ kvm_mips_dump_guest_tlbs(vcpu);
++ return -1;
+ }
+
++ if (kvm_mips_map_page(kvm, gfn0) < 0)
++ return -1;
++
++ if (kvm_mips_map_page(kvm, gfn1) < 0)
++ return -1;
++
++ pfn0 = kvm->arch.guest_pmap[gfn0];
++ pfn1 = kvm->arch.guest_pmap[gfn1];
++
+ if (hpa0)
+ *hpa0 = pfn0 << PAGE_SHIFT;
+
+@@ -401,9 +415,9 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
+
+ /* Get attributes from the Guest TLB */
+ entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
+- (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V);
++ (tlb_lo[0] & MIPS3_PG_D) | (tlb_lo[0] & MIPS3_PG_V);
+ entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
+- (tlb->tlb_lo1 & MIPS3_PG_D) | (tlb->tlb_lo1 & MIPS3_PG_V);
++ (tlb_lo[1] & MIPS3_PG_D) | (tlb_lo[1] & MIPS3_PG_V);
+
+ kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
+ tlb->tlb_lo0, tlb->tlb_lo1);
+@@ -776,10 +790,16 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
+ local_irq_restore(flags);
+ return KVM_INVALID_INST;
+ }
+- kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
+- &vcpu->arch.
+- guest_tlb[index],
+- NULL, NULL);
++ if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
++ &vcpu->arch.guest_tlb[index],
++ NULL, NULL)) {
++ kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n",
++ __func__, opc, index, vcpu,
++ read_c0_entryhi());
++ kvm_mips_dump_guest_tlbs(vcpu);
++ local_irq_restore(flags);
++ return KVM_INVALID_INST;
++ }
+ inst = *(opc);
+ }
+ local_irq_restore(flags);
+diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/loongson-3/hpet.c
+index 249039af66c4..4788bea62a6a 100644
+--- a/arch/mips/loongson64/loongson-3/hpet.c
++++ b/arch/mips/loongson64/loongson-3/hpet.c
+@@ -13,8 +13,8 @@
+ #define SMBUS_PCI_REG64 0x64
+ #define SMBUS_PCI_REGB4 0xb4
+
+-#define HPET_MIN_CYCLES 64
+-#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
++#define HPET_MIN_CYCLES 16
++#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES * 12)
+
+ static DEFINE_SPINLOCK(hpet_lock);
+ DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
+@@ -157,14 +157,14 @@ static int hpet_tick_resume(struct clock_event_device *evt)
+ static int hpet_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+ {
+- unsigned int cnt;
+- int res;
++ u32 cnt;
++ s32 res;
+
+ cnt = hpet_read(HPET_COUNTER);
+- cnt += delta;
++ cnt += (u32) delta;
+ hpet_write(HPET_T0_CMP, cnt);
+
+- res = (int)(cnt - hpet_read(HPET_COUNTER));
++ res = (s32)(cnt - hpet_read(HPET_COUNTER));
+
+ return res < HPET_MIN_CYCLES ? -ETIME : 0;
+ }
+@@ -230,7 +230,7 @@ void __init setup_hpet_timer(void)
+
+ cd = &per_cpu(hpet_clockevent_device, cpu);
+ cd->name = "hpet";
+- cd->rating = 320;
++ cd->rating = 100;
+ cd->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+ cd->set_state_shutdown = hpet_set_state_shutdown;
+ cd->set_state_periodic = hpet_set_state_periodic;
+diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
+index 9c2220a45189..45e3b8799ed0 100644
+--- a/arch/mips/mm/uasm-mips.c
++++ b/arch/mips/mm/uasm-mips.c
+@@ -65,7 +65,7 @@ static struct insn insn_table[] = {
+ #ifndef CONFIG_CPU_MIPSR6
+ { insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
+ #else
+- { insn_cache, M6(cache_op, 0, 0, 0, cache6_op), RS | RT | SIMM9 },
++ { insn_cache, M6(spec3_op, 0, 0, 0, cache6_op), RS | RT | SIMM9 },
+ #endif
+ { insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
+ { insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
+diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
+index b7019b559ddb..298afcf3bf2a 100644
+--- a/arch/powerpc/kernel/tm.S
++++ b/arch/powerpc/kernel/tm.S
+@@ -338,8 +338,6 @@ _GLOBAL(__tm_recheckpoint)
+ */
+ subi r7, r7, STACK_FRAME_OVERHEAD
+
+- SET_SCRATCH0(r1)
+-
+ mfmsr r6
+ /* R4 = original MSR to indicate whether thread used FP/Vector etc. */
+
+@@ -468,6 +466,7 @@ restore_gprs:
+ * until we turn MSR RI back on.
+ */
+
++ SET_SCRATCH0(r1)
+ ld r5, -8(r1)
+ ld r1, -16(r1)
+
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index e571ad277398..38e108eaeafe 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -655,112 +655,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ BEGIN_FTR_SECTION
+- b skip_tm
+-END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+-
+- /* Turn on TM/FP/VSX/VMX so we can restore them. */
+- mfmsr r5
+- li r6, MSR_TM >> 32
+- sldi r6, r6, 32
+- or r5, r5, r6
+- ori r5, r5, MSR_FP
+- oris r5, r5, (MSR_VEC | MSR_VSX)@h
+- mtmsrd r5
+-
+- /*
+- * The user may change these outside of a transaction, so they must
+- * always be context switched.
+- */
+- ld r5, VCPU_TFHAR(r4)
+- ld r6, VCPU_TFIAR(r4)
+- ld r7, VCPU_TEXASR(r4)
+- mtspr SPRN_TFHAR, r5
+- mtspr SPRN_TFIAR, r6
+- mtspr SPRN_TEXASR, r7
+-
+- ld r5, VCPU_MSR(r4)
+- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+- beq skip_tm /* TM not active in guest */
+-
+- /* Make sure the failure summary is set, otherwise we'll program check
+- * when we trechkpt. It's possible that this might have been not set
+- * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+- * host.
+- */
+- oris r7, r7, (TEXASR_FS)@h
+- mtspr SPRN_TEXASR, r7
+-
+- /*
+- * We need to load up the checkpointed state for the guest.
+- * We need to do this early as it will blow away any GPRs, VSRs and
+- * some SPRs.
+- */
+-
+- mr r31, r4
+- addi r3, r31, VCPU_FPRS_TM
+- bl load_fp_state
+- addi r3, r31, VCPU_VRS_TM
+- bl load_vr_state
+- mr r4, r31
+- lwz r7, VCPU_VRSAVE_TM(r4)
+- mtspr SPRN_VRSAVE, r7
+-
+- ld r5, VCPU_LR_TM(r4)
+- lwz r6, VCPU_CR_TM(r4)
+- ld r7, VCPU_CTR_TM(r4)
+- ld r8, VCPU_AMR_TM(r4)
+- ld r9, VCPU_TAR_TM(r4)
+- mtlr r5
+- mtcr r6
+- mtctr r7
+- mtspr SPRN_AMR, r8
+- mtspr SPRN_TAR, r9
+-
+- /*
+- * Load up PPR and DSCR values but don't put them in the actual SPRs
+- * till the last moment to avoid running with userspace PPR and DSCR for
+- * too long.
+- */
+- ld r29, VCPU_DSCR_TM(r4)
+- ld r30, VCPU_PPR_TM(r4)
+-
+- std r2, PACATMSCRATCH(r13) /* Save TOC */
+-
+- /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+- li r5, 0
+- mtmsrd r5, 1
+-
+- /* Load GPRs r0-r28 */
+- reg = 0
+- .rept 29
+- ld reg, VCPU_GPRS_TM(reg)(r31)
+- reg = reg + 1
+- .endr
+-
+- mtspr SPRN_DSCR, r29
+- mtspr SPRN_PPR, r30
+-
+- /* Load final GPRs */
+- ld 29, VCPU_GPRS_TM(29)(r31)
+- ld 30, VCPU_GPRS_TM(30)(r31)
+- ld 31, VCPU_GPRS_TM(31)(r31)
+-
+- /* TM checkpointed state is now setup. All GPRs are now volatile. */
+- TRECHKPT
+-
+- /* Now let's get back the state we need. */
+- HMT_MEDIUM
+- GET_PACA(r13)
+- ld r29, HSTATE_DSCR(r13)
+- mtspr SPRN_DSCR, r29
+- ld r4, HSTATE_KVM_VCPU(r13)
+- ld r1, HSTATE_HOST_R1(r13)
+- ld r2, PACATMSCRATCH(r13)
+-
+- /* Set the MSR RI since we have our registers back. */
+- li r5, MSR_RI
+- mtmsrd r5, 1
+-skip_tm:
++ bl kvmppc_restore_tm
++END_FTR_SECTION_IFSET(CPU_FTR_TM)
+ #endif
+
+ /* Load guest PMU registers */
+@@ -841,12 +737,6 @@ BEGIN_FTR_SECTION
+ /* Skip next section on POWER7 */
+ b 8f
+ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+- /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
+- mfmsr r8
+- li r0, 1
+- rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+- mtmsrd r8
+-
+ /* Load up POWER8-specific registers */
+ ld r5, VCPU_IAMR(r4)
+ lwz r6, VCPU_PSPB(r4)
+@@ -1436,106 +1326,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ BEGIN_FTR_SECTION
+- b 2f
+-END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+- /* Turn on TM. */
+- mfmsr r8
+- li r0, 1
+- rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+- mtmsrd r8
+-
+- ld r5, VCPU_MSR(r9)
+- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+- beq 1f /* TM not active in guest. */
+-
+- li r3, TM_CAUSE_KVM_RESCHED
+-
+- /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+- li r5, 0
+- mtmsrd r5, 1
+-
+- /* All GPRs are volatile at this point. */
+- TRECLAIM(R3)
+-
+- /* Temporarily store r13 and r9 so we have some regs to play with */
+- SET_SCRATCH0(r13)
+- GET_PACA(r13)
+- std r9, PACATMSCRATCH(r13)
+- ld r9, HSTATE_KVM_VCPU(r13)
+-
+- /* Get a few more GPRs free. */
+- std r29, VCPU_GPRS_TM(29)(r9)
+- std r30, VCPU_GPRS_TM(30)(r9)
+- std r31, VCPU_GPRS_TM(31)(r9)
+-
+- /* Save away PPR and DSCR soon so don't run with user values. */
+- mfspr r31, SPRN_PPR
+- HMT_MEDIUM
+- mfspr r30, SPRN_DSCR
+- ld r29, HSTATE_DSCR(r13)
+- mtspr SPRN_DSCR, r29
+-
+- /* Save all but r9, r13 & r29-r31 */
+- reg = 0
+- .rept 29
+- .if (reg != 9) && (reg != 13)
+- std reg, VCPU_GPRS_TM(reg)(r9)
+- .endif
+- reg = reg + 1
+- .endr
+- /* ... now save r13 */
+- GET_SCRATCH0(r4)
+- std r4, VCPU_GPRS_TM(13)(r9)
+- /* ... and save r9 */
+- ld r4, PACATMSCRATCH(r13)
+- std r4, VCPU_GPRS_TM(9)(r9)
+-
+- /* Reload stack pointer and TOC. */
+- ld r1, HSTATE_HOST_R1(r13)
+- ld r2, PACATOC(r13)
+-
+- /* Set MSR RI now we have r1 and r13 back. */
+- li r5, MSR_RI
+- mtmsrd r5, 1
+-
+- /* Save away checkpinted SPRs. */
+- std r31, VCPU_PPR_TM(r9)
+- std r30, VCPU_DSCR_TM(r9)
+- mflr r5
+- mfcr r6
+- mfctr r7
+- mfspr r8, SPRN_AMR
+- mfspr r10, SPRN_TAR
+- std r5, VCPU_LR_TM(r9)
+- stw r6, VCPU_CR_TM(r9)
+- std r7, VCPU_CTR_TM(r9)
+- std r8, VCPU_AMR_TM(r9)
+- std r10, VCPU_TAR_TM(r9)
+-
+- /* Restore r12 as trap number. */
+- lwz r12, VCPU_TRAP(r9)
+-
+- /* Save FP/VSX. */
+- addi r3, r9, VCPU_FPRS_TM
+- bl store_fp_state
+- addi r3, r9, VCPU_VRS_TM
+- bl store_vr_state
+- mfspr r6, SPRN_VRSAVE
+- stw r6, VCPU_VRSAVE_TM(r9)
+-1:
+- /*
+- * We need to save these SPRs after the treclaim so that the software
+- * error code is recorded correctly in the TEXASR. Also the user may
+- * change these outside of a transaction, so they must always be
+- * context switched.
+- */
+- mfspr r5, SPRN_TFHAR
+- mfspr r6, SPRN_TFIAR
+- mfspr r7, SPRN_TEXASR
+- std r5, VCPU_TFHAR(r9)
+- std r6, VCPU_TFIAR(r9)
+- std r7, VCPU_TEXASR(r9)
+-2:
++ bl kvmppc_save_tm
++END_FTR_SECTION_IFSET(CPU_FTR_TM)
+ #endif
+
+ /* Increment yield count if they have a VPA */
+@@ -2245,6 +2037,13 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
+ /* save FP state */
+ bl kvmppc_save_fp
+
++#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
++BEGIN_FTR_SECTION
++ ld r9, HSTATE_KVM_VCPU(r13)
++ bl kvmppc_save_tm
++END_FTR_SECTION_IFSET(CPU_FTR_TM)
++#endif
++
+ /*
+ * Set DEC to the smaller of DEC and HDEC, so that we wake
+ * no later than the end of our timeslice (HDEC interrupts
+@@ -2321,6 +2120,12 @@ kvm_end_cede:
+ bl kvmhv_accumulate_time
+ #endif
+
++#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
++BEGIN_FTR_SECTION
++ bl kvmppc_restore_tm
++END_FTR_SECTION_IFSET(CPU_FTR_TM)
++#endif
++
+ /* load up FP state */
+ bl kvmppc_load_fp
+
+@@ -2631,6 +2436,239 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ mr r4,r31
+ blr
+
++#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
++/*
++ * Save transactional state and TM-related registers.
++ * Called with r9 pointing to the vcpu struct.
++ * This can modify all checkpointed registers, but
++ * restores r1, r2 and r9 (vcpu pointer) before exit.
++ */
++kvmppc_save_tm:
++ mflr r0
++ std r0, PPC_LR_STKOFF(r1)
++
++ /* Turn on TM. */
++ mfmsr r8
++ li r0, 1
++ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
++ mtmsrd r8
++
++ ld r5, VCPU_MSR(r9)
++ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
++ beq 1f /* TM not active in guest. */
++
++ std r1, HSTATE_HOST_R1(r13)
++ li r3, TM_CAUSE_KVM_RESCHED
++
++ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
++ li r5, 0
++ mtmsrd r5, 1
++
++ /* All GPRs are volatile at this point. */
++ TRECLAIM(R3)
++
++ /* Temporarily store r13 and r9 so we have some regs to play with */
++ SET_SCRATCH0(r13)
++ GET_PACA(r13)
++ std r9, PACATMSCRATCH(r13)
++ ld r9, HSTATE_KVM_VCPU(r13)
++
++ /* Get a few more GPRs free. */
++ std r29, VCPU_GPRS_TM(29)(r9)
++ std r30, VCPU_GPRS_TM(30)(r9)
++ std r31, VCPU_GPRS_TM(31)(r9)
++
++ /* Save away PPR and DSCR soon so don't run with user values. */
++ mfspr r31, SPRN_PPR
++ HMT_MEDIUM
++ mfspr r30, SPRN_DSCR
++ ld r29, HSTATE_DSCR(r13)
++ mtspr SPRN_DSCR, r29
++
++ /* Save all but r9, r13 & r29-r31 */
++ reg = 0
++ .rept 29
++ .if (reg != 9) && (reg != 13)
++ std reg, VCPU_GPRS_TM(reg)(r9)
++ .endif
++ reg = reg + 1
++ .endr
++ /* ... now save r13 */
++ GET_SCRATCH0(r4)
++ std r4, VCPU_GPRS_TM(13)(r9)
++ /* ... and save r9 */
++ ld r4, PACATMSCRATCH(r13)
++ std r4, VCPU_GPRS_TM(9)(r9)
++
++ /* Reload stack pointer and TOC. */
++ ld r1, HSTATE_HOST_R1(r13)
++ ld r2, PACATOC(r13)
++
++ /* Set MSR RI now we have r1 and r13 back. */
++ li r5, MSR_RI
++ mtmsrd r5, 1
++
++ /* Save away checkpinted SPRs. */
++ std r31, VCPU_PPR_TM(r9)
++ std r30, VCPU_DSCR_TM(r9)
++ mflr r5
++ mfcr r6
++ mfctr r7
++ mfspr r8, SPRN_AMR
++ mfspr r10, SPRN_TAR
++ std r5, VCPU_LR_TM(r9)
++ stw r6, VCPU_CR_TM(r9)
++ std r7, VCPU_CTR_TM(r9)
++ std r8, VCPU_AMR_TM(r9)
++ std r10, VCPU_TAR_TM(r9)
++
++ /* Restore r12 as trap number. */
++ lwz r12, VCPU_TRAP(r9)
++
++ /* Save FP/VSX. */
++ addi r3, r9, VCPU_FPRS_TM
++ bl store_fp_state
++ addi r3, r9, VCPU_VRS_TM
++ bl store_vr_state
++ mfspr r6, SPRN_VRSAVE
++ stw r6, VCPU_VRSAVE_TM(r9)
++1:
++ /*
++ * We need to save these SPRs after the treclaim so that the software
++ * error code is recorded correctly in the TEXASR. Also the user may
++ * change these outside of a transaction, so they must always be
++ * context switched.
++ */
++ mfspr r5, SPRN_TFHAR
++ mfspr r6, SPRN_TFIAR
++ mfspr r7, SPRN_TEXASR
++ std r5, VCPU_TFHAR(r9)
++ std r6, VCPU_TFIAR(r9)
++ std r7, VCPU_TEXASR(r9)
++
++ ld r0, PPC_LR_STKOFF(r1)
++ mtlr r0
++ blr
++
++/*
++ * Restore transactional state and TM-related registers.
++ * Called with r4 pointing to the vcpu struct.
++ * This potentially modifies all checkpointed registers.
++ * It restores r1, r2, r4 from the PACA.
++ */
++kvmppc_restore_tm:
++ mflr r0
++ std r0, PPC_LR_STKOFF(r1)
++
++ /* Turn on TM/FP/VSX/VMX so we can restore them. */
++ mfmsr r5
++ li r6, MSR_TM >> 32
++ sldi r6, r6, 32
++ or r5, r5, r6
++ ori r5, r5, MSR_FP
++ oris r5, r5, (MSR_VEC | MSR_VSX)@h
++ mtmsrd r5
++
++ /*
++ * The user may change these outside of a transaction, so they must
++ * always be context switched.
++ */
++ ld r5, VCPU_TFHAR(r4)
++ ld r6, VCPU_TFIAR(r4)
++ ld r7, VCPU_TEXASR(r4)
++ mtspr SPRN_TFHAR, r5
++ mtspr SPRN_TFIAR, r6
++ mtspr SPRN_TEXASR, r7
++
++ ld r5, VCPU_MSR(r4)
++ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
++ beqlr /* TM not active in guest */
++ std r1, HSTATE_HOST_R1(r13)
++
++ /* Make sure the failure summary is set, otherwise we'll program check
++ * when we trechkpt. It's possible that this might have been not set
++ * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
++ * host.
++ */
++ oris r7, r7, (TEXASR_FS)@h
++ mtspr SPRN_TEXASR, r7
++
++ /*
++ * We need to load up the checkpointed state for the guest.
++ * We need to do this early as it will blow away any GPRs, VSRs and
++ * some SPRs.
++ */
++
++ mr r31, r4
++ addi r3, r31, VCPU_FPRS_TM
++ bl load_fp_state
++ addi r3, r31, VCPU_VRS_TM
++ bl load_vr_state
++ mr r4, r31
++ lwz r7, VCPU_VRSAVE_TM(r4)
++ mtspr SPRN_VRSAVE, r7
++
++ ld r5, VCPU_LR_TM(r4)
++ lwz r6, VCPU_CR_TM(r4)
++ ld r7, VCPU_CTR_TM(r4)
++ ld r8, VCPU_AMR_TM(r4)
++ ld r9, VCPU_TAR_TM(r4)
++ mtlr r5
++ mtcr r6
++ mtctr r7
++ mtspr SPRN_AMR, r8
++ mtspr SPRN_TAR, r9
++
++ /*
++ * Load up PPR and DSCR values but don't put them in the actual SPRs
++ * till the last moment to avoid running with userspace PPR and DSCR for
++ * too long.
++ */
++ ld r29, VCPU_DSCR_TM(r4)
++ ld r30, VCPU_PPR_TM(r4)
++
++ std r2, PACATMSCRATCH(r13) /* Save TOC */
++
++ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
++ li r5, 0
++ mtmsrd r5, 1
++
++ /* Load GPRs r0-r28 */
++ reg = 0
++ .rept 29
++ ld reg, VCPU_GPRS_TM(reg)(r31)
++ reg = reg + 1
++ .endr
++
++ mtspr SPRN_DSCR, r29
++ mtspr SPRN_PPR, r30
++
++ /* Load final GPRs */
++ ld 29, VCPU_GPRS_TM(29)(r31)
++ ld 30, VCPU_GPRS_TM(30)(r31)
++ ld 31, VCPU_GPRS_TM(31)(r31)
++
++ /* TM checkpointed state is now setup. All GPRs are now volatile. */
++ TRECHKPT
++
++ /* Now let's get back the state we need. */
++ HMT_MEDIUM
++ GET_PACA(r13)
++ ld r29, HSTATE_DSCR(r13)
++ mtspr SPRN_DSCR, r29
++ ld r4, HSTATE_KVM_VCPU(r13)
++ ld r1, HSTATE_HOST_R1(r13)
++ ld r2, PACATMSCRATCH(r13)
++
++ /* Set the MSR RI since we have our registers back. */
++ li r5, MSR_RI
++ mtmsrd r5, 1
++
++ ld r0, PPC_LR_STKOFF(r1)
++ mtlr r0
++ blr
++#endif
++
+ /*
+ * We come here if we get any exception or interrupt while we are
+ * executing host real mode code while in guest MMU context.
+diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
+index 18d2beb89340..42b968a85863 100644
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -893,7 +893,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
+ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, bool nq);
+-unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
++unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
+
+ /*
+ * Certain architectures need to do special things when PTEs
+diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
+index a2e6ef32e054..0a2031618f7f 100644
+--- a/arch/s390/include/asm/tlbflush.h
++++ b/arch/s390/include/asm/tlbflush.h
+@@ -81,7 +81,8 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
+ }
+
+ /*
+- * Flush TLB entries for a specific ASCE on all CPUs.
++ * Flush TLB entries for a specific ASCE on all CPUs. Should never be used
++ * when more than one asce (e.g. gmap) ran on this mm.
+ */
+ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+ {
+diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
+index cace818d86eb..313c3b8cf7dd 100644
+--- a/arch/s390/mm/gmap.c
++++ b/arch/s390/mm/gmap.c
+@@ -85,7 +85,7 @@ EXPORT_SYMBOL_GPL(gmap_alloc);
+ static void gmap_flush_tlb(struct gmap *gmap)
+ {
+ if (MACHINE_HAS_IDTE)
+- __tlb_flush_asce(gmap->mm, gmap->asce);
++ __tlb_flush_idte(gmap->asce);
+ else
+ __tlb_flush_global();
+ }
+@@ -124,7 +124,7 @@ void gmap_free(struct gmap *gmap)
+
+ /* Flush tlb. */
+ if (MACHINE_HAS_IDTE)
+- __tlb_flush_asce(gmap->mm, gmap->asce);
++ __tlb_flush_idte(gmap->asce);
+ else
+ __tlb_flush_global();
+
+diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
+index 9f0ce0e6eeb4..ebb4f87112f4 100644
+--- a/arch/s390/mm/pgtable.c
++++ b/arch/s390/mm/pgtable.c
+@@ -543,7 +543,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ }
+ EXPORT_SYMBOL(set_guest_storage_key);
+
+-unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
++unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+ {
+ unsigned char key;
+ spinlock_t *ptl;
+diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
+index 8acaf4e384c0..a86d7cc2c2d8 100644
+--- a/arch/um/os-Linux/signal.c
++++ b/arch/um/os-Linux/signal.c
+@@ -15,6 +15,7 @@
+ #include <kern_util.h>
+ #include <os.h>
+ #include <sysdep/mcontext.h>
++#include <um_malloc.h>
+
+ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
+ [SIGTRAP] = relay_signal,
+@@ -32,7 +33,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
+ struct uml_pt_regs *r;
+ int save_errno = errno;
+
+- r = malloc(sizeof(struct uml_pt_regs));
++ r = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC);
+ if (!r)
+ panic("out of memory");
+
+@@ -91,7 +92,7 @@ static void timer_real_alarm_handler(mcontext_t *mc)
+ {
+ struct uml_pt_regs *regs;
+
+- regs = malloc(sizeof(struct uml_pt_regs));
++ regs = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC);
+ if (!regs)
+ panic("out of memory");
+
+diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
+index e35632ef23c7..62dfc644c908 100644
+--- a/arch/unicore32/include/asm/mmu_context.h
++++ b/arch/unicore32/include/asm/mmu_context.h
+@@ -98,7 +98,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
+ }
+
+ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+- bool write, bool foreign)
++ bool write, bool execute, bool foreign)
+ {
+ /* by default, allow everything */
+ return true;
+diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
+index 874e8bd64d1d..bd136ac140be 100644
+--- a/arch/x86/events/intel/uncore_snbep.c
++++ b/arch/x86/events/intel/uncore_snbep.c
+@@ -2546,7 +2546,7 @@ void hswep_uncore_cpu_init(void)
+
+ static struct intel_uncore_type hswep_uncore_ha = {
+ .name = "ha",
+- .num_counters = 5,
++ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+@@ -2565,7 +2565,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = {
+
+ static struct intel_uncore_type hswep_uncore_imc = {
+ .name = "imc",
+- .num_counters = 5,
++ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+@@ -2611,7 +2611,7 @@ static struct intel_uncore_type hswep_uncore_irp = {
+
+ static struct intel_uncore_type hswep_uncore_qpi = {
+ .name = "qpi",
+- .num_counters = 5,
++ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+@@ -2693,7 +2693,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
+
+ static struct intel_uncore_type hswep_uncore_r3qpi = {
+ .name = "r3qpi",
+- .num_counters = 4,
++ .num_counters = 3,
+ .num_boxes = 3,
+ .perf_ctr_bits = 44,
+ .constraints = hswep_uncore_r3qpi_constraints,
+@@ -2892,7 +2892,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
+
+ static struct intel_uncore_type bdx_uncore_imc = {
+ .name = "imc",
+- .num_counters = 5,
++ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
+index c146f3c262c3..0149ac59c273 100644
+--- a/arch/x86/kvm/mtrr.c
++++ b/arch/x86/kvm/mtrr.c
+@@ -539,6 +539,7 @@ static void mtrr_lookup_var_start(struct mtrr_iter *iter)
+
+ iter->fixed = false;
+ iter->start_max = iter->start;
++ iter->range = NULL;
+ iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
+
+ __mtrr_lookup_var_next(iter);
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 64a79f271276..8326d6891395 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -8224,6 +8224,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
+ if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
+ exit_reason != EXIT_REASON_EPT_VIOLATION &&
++ exit_reason != EXIT_REASON_PML_FULL &&
+ exit_reason != EXIT_REASON_TASK_SWITCH)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
+@@ -8854,6 +8855,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
+ put_cpu();
+ }
+
++/*
++ * Ensure that the current vmcs of the logical processor is the
++ * vmcs01 of the vcpu before calling free_nested().
++ */
++static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
++{
++ struct vcpu_vmx *vmx = to_vmx(vcpu);
++ int r;
++
++ r = vcpu_load(vcpu);
++ BUG_ON(r);
++ vmx_load_vmcs01(vcpu);
++ free_nested(vmx);
++ vcpu_put(vcpu);
++}
++
+ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+@@ -8862,8 +8879,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
+ vmx_destroy_pml_buffer(vmx);
+ free_vpid(vmx->vpid);
+ leave_guest_mode(vcpu);
+- vmx_load_vmcs01(vcpu);
+- free_nested(vmx);
++ vmx_free_vcpu_nested(vcpu);
+ free_loaded_vmcs(vmx->loaded_vmcs);
+ kfree(vmx->guest_msrs);
+ kvm_vcpu_uninit(vcpu);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 7da5dd2057a9..fea2c5717ec1 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -91,6 +91,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
+
+ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
+ static void process_nmi(struct kvm_vcpu *vcpu);
++static void process_smi(struct kvm_vcpu *vcpu);
+ static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+
+ struct kvm_x86_ops *kvm_x86_ops __read_mostly;
+@@ -5296,13 +5297,8 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
+ /* This is a good place to trace that we are exiting SMM. */
+ trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
+
+- if (unlikely(vcpu->arch.smi_pending)) {
+- kvm_make_request(KVM_REQ_SMI, vcpu);
+- vcpu->arch.smi_pending = 0;
+- } else {
+- /* Process a latched INIT, if any. */
+- kvm_make_request(KVM_REQ_EVENT, vcpu);
+- }
++ /* Process a latched INIT or SMI, if any. */
++ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
+
+ kvm_mmu_reset_context(vcpu);
+@@ -6102,7 +6098,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
+ }
+
+ /* try to inject new event if pending */
+- if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
++ if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
++ vcpu->arch.smi_pending = false;
++ process_smi(vcpu);
++ } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+ --vcpu->arch.nmi_pending;
+ vcpu->arch.nmi_injected = true;
+ kvm_x86_ops->set_nmi(vcpu);
+@@ -6312,11 +6311,6 @@ static void process_smi(struct kvm_vcpu *vcpu)
+ char buf[512];
+ u32 cr0;
+
+- if (is_smm(vcpu)) {
+- vcpu->arch.smi_pending = true;
+- return;
+- }
+-
+ trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ memset(buf, 0, 512);
+@@ -6379,6 +6373,12 @@ static void process_smi(struct kvm_vcpu *vcpu)
+ kvm_mmu_reset_context(vcpu);
+ }
+
++static void process_smi_request(struct kvm_vcpu *vcpu)
++{
++ vcpu->arch.smi_pending = true;
++ kvm_make_request(KVM_REQ_EVENT, vcpu);
++}
++
+ void kvm_make_scan_ioapic_request(struct kvm *kvm)
+ {
+ kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+@@ -6500,7 +6500,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+ if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
+ record_steal_time(vcpu);
+ if (kvm_check_request(KVM_REQ_SMI, vcpu))
+- process_smi(vcpu);
++ process_smi_request(vcpu);
+ if (kvm_check_request(KVM_REQ_NMI, vcpu))
+ process_nmi(vcpu);
+ if (kvm_check_request(KVM_REQ_PMU, vcpu))
+@@ -6573,8 +6573,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+
+ if (inject_pending_event(vcpu, req_int_win) != 0)
+ req_immediate_exit = true;
+- /* enable NMI/IRQ window open exits if needed */
+ else {
++ /* Enable NMI/IRQ window open exits if needed.
++ *
++ * SMIs have two cases: 1) they can be nested, and
++ * then there is nothing to do here because RSM will
++ * cause a vmexit anyway; 2) or the SMI can be pending
++ * because inject_pending_event has completed the
++ * injection of an IRQ or NMI from the previous vmexit,
++ * and then we request an immediate exit to inject the SMI.
++ */
++ if (vcpu->arch.smi_pending && !is_smm(vcpu))
++ req_immediate_exit = true;
+ if (vcpu->arch.nmi_pending)
+ kvm_x86_ops->enable_nmi_window(vcpu);
+ if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
+@@ -6625,8 +6635,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
+
+ kvm_load_guest_xcr0(vcpu);
+
+- if (req_immediate_exit)
++ if (req_immediate_exit) {
++ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ smp_send_reschedule(vcpu->cpu);
++ }
+
+ trace_kvm_entry(vcpu->vcpu_id);
+ wait_lapic_expire(vcpu);
+@@ -7427,6 +7439,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ {
+ vcpu->arch.hflags = 0;
+
++ vcpu->arch.smi_pending = 0;
+ atomic_set(&vcpu->arch.nmi_queued, 0);
+ vcpu->arch.nmi_pending = 0;
+ vcpu->arch.nmi_injected = false;
+diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
+index 8b93e634af84..ae97f24a4371 100644
+--- a/arch/x86/pci/intel_mid_pci.c
++++ b/arch/x86/pci/intel_mid_pci.c
+@@ -37,6 +37,7 @@
+
+ /* Quirks for the listed devices */
+ #define PCI_DEVICE_ID_INTEL_MRFL_MMC 0x1190
++#define PCI_DEVICE_ID_INTEL_MRFL_HSU 0x1191
+
+ /* Fixed BAR fields */
+ #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */
+@@ -225,13 +226,20 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
+ /* Special treatment for IRQ0 */
+ if (dev->irq == 0) {
+ /*
++ * Skip HS UART common registers device since it has
++ * IRQ0 assigned and not used by the kernel.
++ */
++ if (dev->device == PCI_DEVICE_ID_INTEL_MRFL_HSU)
++ return -EBUSY;
++ /*
+ * TNG has IRQ0 assigned to eMMC controller. But there
+ * are also other devices with bogus PCI configuration
+ * that have IRQ0 assigned. This check ensures that
+- * eMMC gets it.
++ * eMMC gets it. The rest of devices still could be
++ * enabled without interrupt line being allocated.
+ */
+ if (dev->device != PCI_DEVICE_ID_INTEL_MRFL_MMC)
+- return -EBUSY;
++ return 0;
+ }
+ break;
+ default:
+diff --git a/block/bio.c b/block/bio.c
+index 0e4aa42bc30d..462386908835 100644
+--- a/block/bio.c
++++ b/block/bio.c
+@@ -579,6 +579,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
+ bio->bi_rw = bio_src->bi_rw;
+ bio->bi_iter = bio_src->bi_iter;
+ bio->bi_io_vec = bio_src->bi_io_vec;
++
++ bio_clone_blkcg_association(bio, bio_src);
+ }
+ EXPORT_SYMBOL(__bio_clone_fast);
+
+@@ -684,6 +686,8 @@ integrity_clone:
+ }
+ }
+
++ bio_clone_blkcg_association(bio, bio_src);
++
+ return bio;
+ }
+ EXPORT_SYMBOL(bio_clone_bioset);
+@@ -2005,6 +2009,17 @@ void bio_disassociate_task(struct bio *bio)
+ }
+ }
+
++/**
++ * bio_clone_blkcg_association - clone blkcg association from src to dst bio
++ * @dst: destination bio
++ * @src: source bio
++ */
++void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
++{
++ if (src->bi_css)
++ WARN_ON(bio_associate_blkcg(dst, src->bi_css));
++}
++
+ #endif /* CONFIG_BLK_CGROUP */
+
+ static void __init biovec_init_slabs(void)
+diff --git a/block/genhd.c b/block/genhd.c
+index 3eebd256b765..086f1a357734 100644
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -613,7 +613,7 @@ void add_disk(struct gendisk *disk)
+
+ /* Register BDI before referencing it from bdev */
+ bdi = &disk->queue->backing_dev_info;
+- bdi_register_dev(bdi, disk_devt(disk));
++ bdi_register_owner(bdi, disk_to_dev(disk));
+
+ blk_register_region(disk_devt(disk), disk->minors, NULL,
+ exact_match, exact_lock, disk);
+diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
+index 290d6f5be44b..f4218df00883 100644
+--- a/drivers/acpi/ec.c
++++ b/drivers/acpi/ec.c
+@@ -101,6 +101,7 @@ enum ec_command {
+ #define ACPI_EC_UDELAY_POLL 550 /* Wait 1ms for EC transaction polling */
+ #define ACPI_EC_CLEAR_MAX 100 /* Maximum number of events to query
+ * when trying to clear the EC */
++#define ACPI_EC_MAX_QUERIES 16 /* Maximum number of parallel queries */
+
+ enum {
+ EC_FLAGS_QUERY_PENDING, /* Query is pending */
+@@ -121,6 +122,10 @@ static unsigned int ec_delay __read_mostly = ACPI_EC_DELAY;
+ module_param(ec_delay, uint, 0644);
+ MODULE_PARM_DESC(ec_delay, "Timeout(ms) waited until an EC command completes");
+
++static unsigned int ec_max_queries __read_mostly = ACPI_EC_MAX_QUERIES;
++module_param(ec_max_queries, uint, 0644);
++MODULE_PARM_DESC(ec_max_queries, "Maximum parallel _Qxx evaluations");
++
+ static bool ec_busy_polling __read_mostly;
+ module_param(ec_busy_polling, bool, 0644);
+ MODULE_PARM_DESC(ec_busy_polling, "Use busy polling to advance EC transaction");
+@@ -174,6 +179,7 @@ static void acpi_ec_event_processor(struct work_struct *work);
+
+ struct acpi_ec *boot_ec, *first_ec;
+ EXPORT_SYMBOL(first_ec);
++static struct workqueue_struct *ec_query_wq;
+
+ static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
+ static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */
+@@ -1098,7 +1104,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data)
+ * work queue execution.
+ */
+ ec_dbg_evt("Query(0x%02x) scheduled", value);
+- if (!schedule_work(&q->work)) {
++ if (!queue_work(ec_query_wq, &q->work)) {
+ ec_dbg_evt("Query(0x%02x) overlapped", value);
+ result = -EBUSY;
+ }
+@@ -1660,15 +1666,41 @@ static struct acpi_driver acpi_ec_driver = {
+ },
+ };
+
++static inline int acpi_ec_query_init(void)
++{
++ if (!ec_query_wq) {
++ ec_query_wq = alloc_workqueue("kec_query", 0,
++ ec_max_queries);
++ if (!ec_query_wq)
++ return -ENODEV;
++ }
++ return 0;
++}
++
++static inline void acpi_ec_query_exit(void)
++{
++ if (ec_query_wq) {
++ destroy_workqueue(ec_query_wq);
++ ec_query_wq = NULL;
++ }
++}
++
+ int __init acpi_ec_init(void)
+ {
+- int result = 0;
++ int result;
+
++ /* register workqueue for _Qxx evaluations */
++ result = acpi_ec_query_init();
++ if (result)
++ goto err_exit;
+ /* Now register the driver for the EC */
+ result = acpi_bus_register_driver(&acpi_ec_driver);
+- if (result < 0)
+- return -ENODEV;
++ if (result)
++ goto err_exit;
+
++err_exit:
++ if (result)
++ acpi_ec_query_exit();
+ return result;
+ }
+
+@@ -1678,5 +1710,6 @@ static void __exit acpi_ec_exit(void)
+ {
+
+ acpi_bus_unregister_driver(&acpi_ec_driver);
++ acpi_ec_query_exit();
+ }
+ #endif /* 0 */
+diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
+index cae5385cf499..bd46569e0e52 100644
+--- a/drivers/bcma/host_pci.c
++++ b/drivers/bcma/host_pci.c
+@@ -295,6 +295,7 @@ static const struct pci_device_id bcma_pci_bridge_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4359) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4360) },
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0016) },
++ { PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_FOXCONN, 0xe092) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a0) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a9) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43aa) },
+diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
+index 84708a5f8c52..a1dcf12d3dad 100644
+--- a/drivers/block/floppy.c
++++ b/drivers/block/floppy.c
+@@ -3663,11 +3663,6 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
+
+ opened_bdev[drive] = bdev;
+
+- if (!(mode & (FMODE_READ|FMODE_WRITE))) {
+- res = -EINVAL;
+- goto out;
+- }
+-
+ res = -ENXIO;
+
+ if (!floppy_track_buffer) {
+@@ -3711,13 +3706,15 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
+ if (UFDCS->rawcmd == 1)
+ UFDCS->rawcmd = 2;
+
+- UDRS->last_checked = 0;
+- clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+- check_disk_change(bdev);
+- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+- goto out;
+- if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+- goto out;
++ if (mode & (FMODE_READ|FMODE_WRITE)) {
++ UDRS->last_checked = 0;
++ clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
++ check_disk_change(bdev);
++ if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
++ goto out;
++ if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
++ goto out;
++ }
+
+ res = -EROFS;
+
+diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
+index 25894687c168..fadba88745dc 100644
+--- a/drivers/bluetooth/ath3k.c
++++ b/drivers/bluetooth/ath3k.c
+@@ -123,6 +123,7 @@ static const struct usb_device_id ath3k_table[] = {
+ { USB_DEVICE(0x13d3, 0x3472) },
+ { USB_DEVICE(0x13d3, 0x3474) },
+ { USB_DEVICE(0x13d3, 0x3487) },
++ { USB_DEVICE(0x13d3, 0x3490) },
+
+ /* Atheros AR5BBU12 with sflash firmware */
+ { USB_DEVICE(0x0489, 0xE02C) },
+@@ -190,6 +191,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
+ { USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x13d3, 0x3487), .driver_info = BTUSB_ATH3012 },
++ { USB_DEVICE(0x13d3, 0x3490), .driver_info = BTUSB_ATH3012 },
+
+ /* Atheros AR5BBU22 with sflash firmware */
+ { USB_DEVICE(0x0489, 0xE036), .driver_info = BTUSB_ATH3012 },
+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
+index a3be65e6231a..9f40c3426f0c 100644
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -237,6 +237,7 @@ static const struct usb_device_id blacklist_table[] = {
+ { USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x13d3, 0x3487), .driver_info = BTUSB_ATH3012 },
++ { USB_DEVICE(0x13d3, 0x3490), .driver_info = BTUSB_ATH3012 },
+
+ /* Atheros AR5BBU12 with sflash firmware */
+ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },
+diff --git a/drivers/char/random.c b/drivers/char/random.c
+index 87ab9f6b4112..d72c6d14a1c9 100644
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -949,6 +949,7 @@ void add_interrupt_randomness(int irq, int irq_flags)
+ /* award one bit for the contents of the fast pool */
+ credit_entropy_bits(r, credit + 1);
+ }
++EXPORT_SYMBOL_GPL(add_interrupt_randomness);
+
+ #ifdef CONFIG_BLOCK
+ void add_disk_randomness(struct gendisk *disk)
+@@ -1461,12 +1462,16 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+ static ssize_t
+ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+ {
++ static int maxwarn = 10;
+ int ret;
+
+- if (unlikely(nonblocking_pool.initialized == 0))
+- printk_once(KERN_NOTICE "random: %s urandom read "
+- "with %d bits of entropy available\n",
+- current->comm, nonblocking_pool.entropy_total);
++ if (unlikely(nonblocking_pool.initialized == 0) &&
++ maxwarn > 0) {
++ maxwarn--;
++ printk(KERN_NOTICE "random: %s: uninitialized urandom read "
++ "(%zd bytes read, %d bits of entropy available)\n",
++ current->comm, nbytes, nonblocking_pool.entropy_total);
++ }
+
+ nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3));
+ ret = extract_entropy_user(&nonblocking_pool, buf, nbytes);
+@@ -1774,13 +1779,15 @@ int random_int_secret_init(void)
+ return 0;
+ }
+
++static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
++ __aligned(sizeof(unsigned long));
++
+ /*
+ * Get a random word for internal kernel use only. Similar to urandom but
+ * with the goal of minimal entropy pool depletion. As a result, the random
+ * value is not cryptographically secure but for several uses the cost of
+ * depleting entropy is too high
+ */
+-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash);
+ unsigned int get_random_int(void)
+ {
+ __u32 *hash;
+@@ -1850,12 +1857,18 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
+ {
+ struct entropy_store *poolp = &input_pool;
+
+- /* Suspend writing if we're above the trickle threshold.
+- * We'll be woken up again once below random_write_wakeup_thresh,
+- * or when the calling thread is about to terminate.
+- */
+- wait_event_interruptible(random_write_wait, kthread_should_stop() ||
++ if (unlikely(nonblocking_pool.initialized == 0))
++ poolp = &nonblocking_pool;
++ else {
++ /* Suspend writing if we're above the trickle
++ * threshold. We'll be woken up again once below
++ * random_write_wakeup_thresh, or when the calling
++ * thread is about to terminate.
++ */
++ wait_event_interruptible(random_write_wait,
++ kthread_should_stop() ||
+ ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
++ }
+ mix_pool_bytes(poolp, buffer, count);
+ credit_entropy_bits(poolp, entropy);
+ }
+diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
+index a12b31940344..e9fd1d83f9f1 100644
+--- a/drivers/char/tpm/tpm_crb.c
++++ b/drivers/char/tpm/tpm_crb.c
+@@ -246,7 +246,7 @@ static void __iomem *crb_map_res(struct device *dev, struct crb_priv *priv,
+
+ /* Detect a 64 bit address on a 32 bit system */
+ if (start != new_res.start)
+- return ERR_PTR(-EINVAL);
++ return (void __iomem *) ERR_PTR(-EINVAL);
+
+ if (!resource_contains(&priv->res, &new_res))
+ return devm_ioremap_resource(dev, &new_res);
+diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
+index 8059a8d3ea36..31b77f71313f 100644
+--- a/drivers/clk/rockchip/clk-rk3399.c
++++ b/drivers/clk/rockchip/clk-rk3399.c
+@@ -586,7 +586,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
+ RK3399_CLKGATE_CON(8), 15, GFLAGS),
+
+ COMPOSITE(SCLK_SPDIF_REC_DPTX, "clk_spdif_rec_dptx", mux_pll_src_cpll_gpll_p, 0,
+- RK3399_CLKSEL_CON(32), 15, 1, MFLAGS, 0, 5, DFLAGS,
++ RK3399_CLKSEL_CON(32), 15, 1, MFLAGS, 8, 5, DFLAGS,
+ RK3399_CLKGATE_CON(10), 6, GFLAGS),
+ /* i2s */
+ COMPOSITE(0, "clk_i2s0_div", mux_pll_src_cpll_gpll_p, 0,
+diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
+index 1fa1a32928d7..1b159171f1f9 100644
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -944,7 +944,7 @@ static int core_get_max_pstate(void)
+ if (err)
+ goto skip_tar;
+
+- tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
++ tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3);
+ err = rdmsrl_safe(tdp_msr, &tdp_ratio);
+ if (err)
+ goto skip_tar;
+diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
+index 10c305b4a2e1..4e0f8e720ad9 100644
+--- a/drivers/edac/edac_mc_sysfs.c
++++ b/drivers/edac/edac_mc_sysfs.c
+@@ -313,7 +313,6 @@ static struct device_type csrow_attr_type = {
+ * possible dynamic channel DIMM Label attribute files
+ *
+ */
+-
+ DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 0);
+ DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR,
+@@ -326,6 +325,10 @@ DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 4);
+ DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 5);
++DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 6);
++DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 7);
+
+ /* Total possible dynamic DIMM Label attribute file table */
+ static struct attribute *dynamic_csrow_dimm_attr[] = {
+@@ -335,6 +338,8 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
+ &dev_attr_legacy_ch3_dimm_label.attr.attr,
+ &dev_attr_legacy_ch4_dimm_label.attr.attr,
+ &dev_attr_legacy_ch5_dimm_label.attr.attr,
++ &dev_attr_legacy_ch6_dimm_label.attr.attr,
++ &dev_attr_legacy_ch7_dimm_label.attr.attr,
+ NULL
+ };
+
+@@ -351,6 +356,10 @@ DEVICE_CHANNEL(ch4_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 4);
+ DEVICE_CHANNEL(ch5_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 5);
++DEVICE_CHANNEL(ch6_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 6);
++DEVICE_CHANNEL(ch7_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 7);
+
+ /* Total possible dynamic ce_count attribute file table */
+ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+@@ -360,6 +369,8 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+ &dev_attr_legacy_ch3_ce_count.attr.attr,
+ &dev_attr_legacy_ch4_ce_count.attr.attr,
+ &dev_attr_legacy_ch5_ce_count.attr.attr,
++ &dev_attr_legacy_ch6_ce_count.attr.attr,
++ &dev_attr_legacy_ch7_ce_count.attr.attr,
+ NULL
+ };
+
+@@ -371,9 +382,16 @@ static umode_t csrow_dev_is_visible(struct kobject *kobj,
+
+ if (idx >= csrow->nr_channels)
+ return 0;
++
++ if (idx >= ARRAY_SIZE(dynamic_csrow_ce_count_attr) - 1) {
++ WARN_ONCE(1, "idx: %d\n", idx);
++ return 0;
++ }
++
+ /* Only expose populated DIMMs */
+ if (!csrow->channels[idx]->dimm->nr_pages)
+ return 0;
++
+ return attr->mode;
+ }
+
+diff --git a/drivers/gpio/gpio-intel-mid.c b/drivers/gpio/gpio-intel-mid.c
+index cdaba13cb8e8..c0f7cce23f62 100644
+--- a/drivers/gpio/gpio-intel-mid.c
++++ b/drivers/gpio/gpio-intel-mid.c
+@@ -17,7 +17,6 @@
+ * Moorestown platform Langwell chip.
+ * Medfield platform Penwell chip.
+ * Clovertrail platform Cloverview chip.
+- * Merrifield platform Tangier chip.
+ */
+
+ #include <linux/module.h>
+@@ -64,10 +63,6 @@ enum GPIO_REG {
+ /* intel_mid gpio driver data */
+ struct intel_mid_gpio_ddata {
+ u16 ngpio; /* number of gpio pins */
+- u32 gplr_offset; /* offset of first GPLR register from base */
+- u32 flis_base; /* base address of FLIS registers */
+- u32 flis_len; /* length of FLIS registers */
+- u32 (*get_flis_offset)(int gpio);
+ u32 chip_irq_type; /* chip interrupt type */
+ };
+
+@@ -252,15 +247,6 @@ static const struct intel_mid_gpio_ddata gpio_cloverview_core = {
+ .chip_irq_type = INTEL_MID_IRQ_TYPE_EDGE,
+ };
+
+-static const struct intel_mid_gpio_ddata gpio_tangier = {
+- .ngpio = 192,
+- .gplr_offset = 4,
+- .flis_base = 0xff0c0000,
+- .flis_len = 0x8000,
+- .get_flis_offset = NULL,
+- .chip_irq_type = INTEL_MID_IRQ_TYPE_EDGE,
+-};
+-
+ static const struct pci_device_id intel_gpio_ids[] = {
+ {
+ /* Lincroft */
+@@ -287,11 +273,6 @@ static const struct pci_device_id intel_gpio_ids[] = {
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x08f7),
+ .driver_data = (kernel_ulong_t)&gpio_cloverview_core,
+ },
+- {
+- /* Tangier */
+- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x1199),
+- .driver_data = (kernel_ulong_t)&gpio_tangier,
+- },
+ { 0 }
+ };
+ MODULE_DEVICE_TABLE(pci, intel_gpio_ids);
+@@ -401,7 +382,7 @@ static int intel_gpio_probe(struct pci_dev *pdev,
+ spin_lock_init(&priv->lock);
+
+ pci_set_drvdata(pdev, priv);
+- retval = gpiochip_add_data(&priv->chip, priv);
++ retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
+ if (retval) {
+ dev_err(&pdev->dev, "gpiochip_add error %d\n", retval);
+ return retval;
+diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
+index 5e3be32ebb8d..3745de659594 100644
+--- a/drivers/gpio/gpio-pca953x.c
++++ b/drivers/gpio/gpio-pca953x.c
+@@ -90,7 +90,7 @@ MODULE_DEVICE_TABLE(acpi, pca953x_acpi_ids);
+ #define MAX_BANK 5
+ #define BANK_SZ 8
+
+-#define NBANK(chip) (chip->gpio_chip.ngpio / BANK_SZ)
++#define NBANK(chip) DIV_ROUND_UP(chip->gpio_chip.ngpio, BANK_SZ)
+
+ struct pca953x_chip {
+ unsigned gpio_start;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+index 9df1bcb35bf0..983175363b06 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+@@ -551,28 +551,19 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
+ le16_to_cpu(firmware_info->info.usReferenceClock);
+ ppll->reference_div = 0;
+
+- if (crev < 2)
+- ppll->pll_out_min =
+- le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output);
+- else
+- ppll->pll_out_min =
+- le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
++ ppll->pll_out_min =
++ le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
+ ppll->pll_out_max =
+ le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
+
+- if (crev >= 4) {
+- ppll->lcd_pll_out_min =
+- le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
+- if (ppll->lcd_pll_out_min == 0)
+- ppll->lcd_pll_out_min = ppll->pll_out_min;
+- ppll->lcd_pll_out_max =
+- le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
+- if (ppll->lcd_pll_out_max == 0)
+- ppll->lcd_pll_out_max = ppll->pll_out_max;
+- } else {
++ ppll->lcd_pll_out_min =
++ le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
++ if (ppll->lcd_pll_out_min == 0)
+ ppll->lcd_pll_out_min = ppll->pll_out_min;
++ ppll->lcd_pll_out_max =
++ le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
++ if (ppll->lcd_pll_out_max == 0)
+ ppll->lcd_pll_out_max = ppll->pll_out_max;
+- }
+
+ if (ppll->pll_out_min == 0)
+ ppll->pll_out_min = 64800;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+index 35a1248aaa77..1b4c069f7765 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+@@ -10,6 +10,7 @@
+ #include <linux/slab.h>
+ #include <linux/acpi.h>
+ #include <linux/pci.h>
++#include <linux/delay.h>
+
+ #include "amd_acpi.h"
+
+@@ -259,6 +260,10 @@ static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state)
+ if (!info)
+ return -EIO;
+ kfree(info);
++
++ /* 200ms delay is required after off */
++ if (state == 0)
++ msleep(200);
+ }
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+index cb07da41152b..ff0b55a65ca3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+@@ -1690,7 +1690,6 @@ amdgpu_connector_add(struct amdgpu_device *adev,
+ DRM_MODE_SCALE_NONE);
+ /* no HPD on analog connectors */
+ amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
+- connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+ connector->interlace_allowed = true;
+ connector->doublescan_allowed = true;
+ break;
+@@ -1893,8 +1892,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,
+ }
+
+ if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
+- if (i2c_bus->valid)
+- connector->polled = DRM_CONNECTOR_POLL_CONNECT;
++ if (i2c_bus->valid) {
++ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
++ DRM_CONNECTOR_POLL_DISCONNECT;
++ }
+ } else
+ connector->polled = DRM_CONNECTOR_POLL_HPD;
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 6e920086af46..b7f5650d8218 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -1841,7 +1841,23 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
+ }
+
+ drm_kms_helper_poll_enable(dev);
++
++ /*
++ * Most of the connector probing functions try to acquire runtime pm
++ * refs to ensure that the GPU is powered on when connector polling is
++ * performed. Since we're calling this from a runtime PM callback,
++ * trying to acquire rpm refs will cause us to deadlock.
++ *
++ * Since we're guaranteed to be holding the rpm lock, it's safe to
++ * temporarily disable the rpm helpers so this doesn't deadlock us.
++ */
++#ifdef CONFIG_PM
++ dev->dev->power.disable_depth++;
++#endif
+ drm_helper_hpd_irq_event(dev);
++#ifdef CONFIG_PM
++ dev->dev->power.disable_depth--;
++#endif
+
+ if (fbcon) {
+ amdgpu_fbdev_set_suspend(adev, 0);
+diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+index 48b6bd671cda..c32eca26155c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
++++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+@@ -98,6 +98,7 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ if (dig->backlight_level == 0)
+ amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+ ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+index 1feb6439cb0b..92695481093e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+@@ -167,6 +167,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
+ break;
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
++ case CHIP_MULLINS:
+ return 0;
+ default: BUG();
+ }
+diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+index 90b35c5c10a4..ffc7c0dd3f14 100644
+--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
++++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+@@ -592,12 +592,12 @@ bool atomctrl_get_pp_assign_pin(
+ const uint32_t pinId,
+ pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment)
+ {
+- bool bRet = 0;
++ bool bRet = false;
+ ATOM_GPIO_PIN_LUT *gpio_lookup_table =
+ get_gpio_lookup_table(hwmgr->device);
+
+ PP_ASSERT_WITH_CODE((NULL != gpio_lookup_table),
+- "Could not find GPIO lookup Table in BIOS.", return -1);
++ "Could not find GPIO lookup Table in BIOS.", return false);
+
+ bRet = atomctrl_lookup_gpio_pin(gpio_lookup_table, pinId,
+ gpio_pin_assignment);
+diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
+index 059f7c39c582..a7916e5f8864 100644
+--- a/drivers/gpu/drm/drm_cache.c
++++ b/drivers/gpu/drm/drm_cache.c
+@@ -136,6 +136,7 @@ drm_clflush_virt_range(void *addr, unsigned long length)
+ mb();
+ for (; addr < end; addr += size)
+ clflushopt(addr);
++ clflushopt(end - 1); /* force serialisation */
+ mb();
+ return;
+ }
+diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
+index eeaf5a7c3aa7..67b28f8018d2 100644
+--- a/drivers/gpu/drm/drm_dp_helper.c
++++ b/drivers/gpu/drm/drm_dp_helper.c
+@@ -203,7 +203,7 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
+
+ ret = aux->transfer(aux, &msg);
+
+- if (ret > 0) {
++ if (ret >= 0) {
+ native_reply = msg.reply & DP_AUX_NATIVE_REPLY_MASK;
+ if (native_reply == DP_AUX_NATIVE_REPLY_ACK) {
+ if (ret == size)
+diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
+index 7df26d4b7ad8..2cb472b9976a 100644
+--- a/drivers/gpu/drm/drm_edid.c
++++ b/drivers/gpu/drm/drm_edid.c
+@@ -74,6 +74,8 @@
+ #define EDID_QUIRK_FORCE_8BPC (1 << 8)
+ /* Force 12bpc */
+ #define EDID_QUIRK_FORCE_12BPC (1 << 9)
++/* Force 6bpc */
++#define EDID_QUIRK_FORCE_6BPC (1 << 10)
+
+ struct detailed_mode_closure {
+ struct drm_connector *connector;
+@@ -100,6 +102,9 @@ static struct edid_quirk {
+ /* Unknown Acer */
+ { "ACR", 2423, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
+
++ /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
++ { "AEO", 0, EDID_QUIRK_FORCE_6BPC },
++
+ /* Belinea 10 15 55 */
+ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
+ { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
+@@ -4082,6 +4087,9 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
+
+ drm_add_display_info(edid, &connector->display_info, connector);
+
++ if (quirks & EDID_QUIRK_FORCE_6BPC)
++ connector->display_info.bpc = 6;
++
+ if (quirks & EDID_QUIRK_FORCE_8BPC)
+ connector->display_info.bpc = 8;
+
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index bc3f2e6842e7..227a63ee0067 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -2591,6 +2591,8 @@ struct drm_i915_cmd_table {
+ #define SKL_REVID_D0 0x3
+ #define SKL_REVID_E0 0x4
+ #define SKL_REVID_F0 0x5
++#define SKL_REVID_G0 0x6
++#define SKL_REVID_H0 0x7
+
+ #define IS_SKL_REVID(p, since, until) (IS_SKYLAKE(p) && IS_REVID(p, since, until))
+
+diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
+index 3fcf7dd5b6ca..bc3b6dde7b4b 100644
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -1672,6 +1672,9 @@ enum skl_disp_power_wells {
+
+ #define GEN7_TLB_RD_ADDR _MMIO(0x4700)
+
++#define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0)
++#define GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS (1<<18)
++
+ #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8)
+ #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28)
+
+@@ -7538,6 +7541,8 @@ enum skl_disp_power_wells {
+
+ #define CDCLK_FREQ _MMIO(0x46200)
+
++#define CDCLK_FREQ _MMIO(0x46200)
++
+ #define _TRANSA_MSA_MISC 0x60410
+ #define _TRANSB_MSA_MISC 0x61410
+ #define _TRANSC_MSA_MISC 0x62410
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 3074c56a643d..32893195d7c4 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -9700,6 +9700,8 @@ static void broadwell_set_cdclk(struct drm_device *dev, int cdclk)
+
+ I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1);
+
++ I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1);
++
+ intel_update_cdclk(dev);
+
+ WARN(cdclk != dev_priv->cdclk_freq,
+@@ -12095,21 +12097,11 @@ connected_sink_compute_bpp(struct intel_connector *connector,
+ pipe_config->pipe_bpp = connector->base.display_info.bpc*3;
+ }
+
+- /* Clamp bpp to default limit on screens without EDID 1.4 */
+- if (connector->base.display_info.bpc == 0) {
+- int type = connector->base.connector_type;
+- int clamp_bpp = 24;
+-
+- /* Fall back to 18 bpp when DP sink capability is unknown. */
+- if (type == DRM_MODE_CONNECTOR_DisplayPort ||
+- type == DRM_MODE_CONNECTOR_eDP)
+- clamp_bpp = 18;
+-
+- if (bpp > clamp_bpp) {
+- DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n",
+- bpp, clamp_bpp);
+- pipe_config->pipe_bpp = clamp_bpp;
+- }
++ /* Clamp bpp to 8 on screens without EDID 1.4 */
++ if (connector->base.display_info.bpc == 0 && bpp > 24) {
++ DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n",
++ bpp);
++ pipe_config->pipe_bpp = 24;
+ }
+ }
+
+diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
+index 2863b92c9da6..c1ca5a7ba86f 100644
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -4563,7 +4563,8 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
+ else
+ gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
+ dev_priv->rps.last_adj = 0;
+- I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
++ I915_WRITE(GEN6_PMINTRMSK,
++ gen6_sanitize_rps_pm_mask(dev_priv, ~0));
+ }
+ mutex_unlock(&dev_priv->rps.hw_lock);
+
+diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
+index 68c5af079ef8..9d778f3ab27d 100644
+--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
++++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
+@@ -1135,6 +1135,11 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
+ /* WaDisableGafsUnitClkGating:skl */
+ WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
++ /* WaInPlaceDecompressionHang:skl */
++ if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
++ WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
++ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
++
+ /* WaDisableLSQCROPERFforOCL:skl */
+ ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
+ if (ret)
+@@ -1194,6 +1199,11 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
++ /* WaInPlaceDecompressionHang:bxt */
++ if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
++ WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
++ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
++
+ return 0;
+ }
+
+@@ -1241,6 +1251,10 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine)
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+
++ /* WaInPlaceDecompressionHang:kbl */
++ WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
++ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
++
+ /* WaDisableLSQCROPERFforOCL:kbl */
+ ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
+ if (ret)
+diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
+index 11f8dd9c0edb..d6c134b01833 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
+@@ -324,7 +324,16 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
+ !vga_switcheroo_handler_flags())
+ return -EPROBE_DEFER;
+
+- /* remove conflicting drivers (vesafb, efifb etc) */
++ /* We need to check that the chipset is supported before booting
++ * fbdev off the hardware, as there's no way to put it back.
++ */
++ ret = nvkm_device_pci_new(pdev, NULL, "error", true, false, 0, &device);
++ if (ret)
++ return ret;
++
++ nvkm_device_del(&device);
++
++ /* Remove conflicting drivers (vesafb, efifb etc). */
+ aper = alloc_apertures(3);
+ if (!aper)
+ return -ENOMEM;
+diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
+index 7d9248b8c664..da8fd5ff9d0f 100644
+--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
+@@ -107,11 +107,11 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ ((image->dx + image->width) & 0xffff));
+ OUT_RING(chan, bg);
+ OUT_RING(chan, fg);
+- OUT_RING(chan, (image->height << 16) | image->width);
++ OUT_RING(chan, (image->height << 16) | ALIGN(image->width, 8));
+ OUT_RING(chan, (image->height << 16) | image->width);
+ OUT_RING(chan, (image->dy << 16) | (image->dx & 0xffff));
+
+- dsize = ALIGN(image->width * image->height, 32) >> 5;
++ dsize = ALIGN(ALIGN(image->width, 8) * image->height, 32) >> 5;
+ while (dsize) {
+ int iter_len = dsize > 128 ? 128 : dsize;
+
+diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
+index 1aeb698e9707..af3d3c49411a 100644
+--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
+@@ -125,7 +125,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ OUT_RING(chan, 0);
+ OUT_RING(chan, image->dy);
+
+- dwords = ALIGN(image->width * image->height, 32) >> 5;
++ dwords = ALIGN(ALIGN(image->width, 8) * image->height, 32) >> 5;
+ while (dwords) {
+ int push = dwords > 2047 ? 2047 : dwords;
+
+diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+index 839f4c8c1805..054b6a056d99 100644
+--- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+@@ -125,7 +125,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ OUT_RING (chan, 0);
+ OUT_RING (chan, image->dy);
+
+- dwords = ALIGN(image->width * image->height, 32) >> 5;
++ dwords = ALIGN(ALIGN(image->width, 8) * image->height, 32) >> 5;
+ while (dwords) {
+ int push = dwords > 2047 ? 2047 : dwords;
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
+index 69de8c6259fe..f1e15a4d4f64 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
+@@ -76,8 +76,8 @@ nv30_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+ nvkm_wo32(chan->inst, i, 0x00040004);
+ for (i = 0x1f18; i <= 0x3088 ; i += 16) {
+ nvkm_wo32(chan->inst, i + 0, 0x10700ff9);
+- nvkm_wo32(chan->inst, i + 1, 0x0436086c);
+- nvkm_wo32(chan->inst, i + 2, 0x000c001b);
++ nvkm_wo32(chan->inst, i + 4, 0x0436086c);
++ nvkm_wo32(chan->inst, i + 8, 0x000c001b);
+ }
+ for (i = 0x30b8; i < 0x30c8; i += 4)
+ nvkm_wo32(chan->inst, i, 0x0000ffff);
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
+index 2207dac23981..300f5ed5de0b 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
+@@ -75,8 +75,8 @@ nv34_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+ nvkm_wo32(chan->inst, i, 0x00040004);
+ for (i = 0x15ac; i <= 0x271c ; i += 16) {
+ nvkm_wo32(chan->inst, i + 0, 0x10700ff9);
+- nvkm_wo32(chan->inst, i + 1, 0x0436086c);
+- nvkm_wo32(chan->inst, i + 2, 0x000c001b);
++ nvkm_wo32(chan->inst, i + 4, 0x0436086c);
++ nvkm_wo32(chan->inst, i + 8, 0x000c001b);
+ }
+ for (i = 0x274c; i < 0x275c; i += 4)
+ nvkm_wo32(chan->inst, i, 0x0000ffff);
+diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
+index 587cae4e73c9..56bb758f4e33 100644
+--- a/drivers/gpu/drm/radeon/atombios_encoders.c
++++ b/drivers/gpu/drm/radeon/atombios_encoders.c
+@@ -120,6 +120,7 @@ atombios_set_backlight_level(struct radeon_encoder *radeon_encoder, u8 level)
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
++ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ if (dig->backlight_level == 0)
+ atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
+ else {
+diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
+index f8097a0e7a79..5df3ec73021b 100644
+--- a/drivers/gpu/drm/radeon/radeon_atombios.c
++++ b/drivers/gpu/drm/radeon/radeon_atombios.c
+@@ -1155,7 +1155,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
+ le16_to_cpu(firmware_info->info.usReferenceClock);
+ p1pll->reference_div = 0;
+
+- if (crev < 2)
++ if ((frev < 2) && (crev < 2))
+ p1pll->pll_out_min =
+ le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output);
+ else
+@@ -1164,7 +1164,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
+ p1pll->pll_out_max =
+ le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
+
+- if (crev >= 4) {
++ if (((frev < 2) && (crev >= 4)) || (frev >= 2)) {
+ p1pll->lcd_pll_out_min =
+ le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
+ if (p1pll->lcd_pll_out_min == 0)
+diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+index 95f4fea89302..1b3f4e51f5dc 100644
+--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
++++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+@@ -10,6 +10,7 @@
+ #include <linux/slab.h>
+ #include <linux/acpi.h>
+ #include <linux/pci.h>
++#include <linux/delay.h>
+
+ #include "radeon_acpi.h"
+
+@@ -258,6 +259,10 @@ static int radeon_atpx_set_discrete_state(struct radeon_atpx *atpx, u8 state)
+ if (!info)
+ return -EIO;
+ kfree(info);
++
++ /* 200ms delay is required after off */
++ if (state == 0)
++ msleep(200);
+ }
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
+index 81a63d7f5cd9..b79f3b002471 100644
+--- a/drivers/gpu/drm/radeon/radeon_connectors.c
++++ b/drivers/gpu/drm/radeon/radeon_connectors.c
+@@ -2064,7 +2064,6 @@ radeon_add_atom_connector(struct drm_device *dev,
+ RADEON_OUTPUT_CSC_BYPASS);
+ /* no HPD on analog connectors */
+ radeon_connector->hpd.hpd = RADEON_HPD_NONE;
+- connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+ connector->interlace_allowed = true;
+ connector->doublescan_allowed = true;
+ break;
+@@ -2314,8 +2313,10 @@ radeon_add_atom_connector(struct drm_device *dev,
+ }
+
+ if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
+- if (i2c_bus->valid)
+- connector->polled = DRM_CONNECTOR_POLL_CONNECT;
++ if (i2c_bus->valid) {
++ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
++ DRM_CONNECTOR_POLL_DISCONNECT;
++ }
+ } else
+ connector->polled = DRM_CONNECTOR_POLL_HPD;
+
+@@ -2391,7 +2392,6 @@ radeon_add_legacy_connector(struct drm_device *dev,
+ 1);
+ /* no HPD on analog connectors */
+ radeon_connector->hpd.hpd = RADEON_HPD_NONE;
+- connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+ connector->interlace_allowed = true;
+ connector->doublescan_allowed = true;
+ break;
+@@ -2476,10 +2476,13 @@ radeon_add_legacy_connector(struct drm_device *dev,
+ }
+
+ if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
+- if (i2c_bus->valid)
+- connector->polled = DRM_CONNECTOR_POLL_CONNECT;
++ if (i2c_bus->valid) {
++ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
++ DRM_CONNECTOR_POLL_DISCONNECT;
++ }
+ } else
+ connector->polled = DRM_CONNECTOR_POLL_HPD;
++
+ connector->display_info.subpixel_order = subpixel_order;
+ drm_connector_register(connector);
+ }
+diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+index 1c4d5b5a70a2..b1673236c356 100644
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+@@ -1048,6 +1048,17 @@ static void vop_crtc_destroy(struct drm_crtc *crtc)
+ drm_crtc_cleanup(crtc);
+ }
+
++static void vop_crtc_reset(struct drm_crtc *crtc)
++{
++ if (crtc->state)
++ __drm_atomic_helper_crtc_destroy_state(crtc->state);
++ kfree(crtc->state);
++
++ crtc->state = kzalloc(sizeof(struct rockchip_crtc_state), GFP_KERNEL);
++ if (crtc->state)
++ crtc->state->crtc = crtc;
++}
++
+ static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc)
+ {
+ struct rockchip_crtc_state *rockchip_state;
+@@ -1073,7 +1084,7 @@ static const struct drm_crtc_funcs vop_crtc_funcs = {
+ .set_config = drm_atomic_helper_set_config,
+ .page_flip = drm_atomic_helper_page_flip,
+ .destroy = vop_crtc_destroy,
+- .reset = drm_atomic_helper_crtc_reset,
++ .reset = vop_crtc_reset,
+ .atomic_duplicate_state = vop_crtc_duplicate_state,
+ .atomic_destroy_state = vop_crtc_destroy_state,
+ };
+diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
+index 16b6f11a0700..99ec3ff7563b 100644
+--- a/drivers/hid/uhid.c
++++ b/drivers/hid/uhid.c
+@@ -51,10 +51,26 @@ struct uhid_device {
+ u32 report_id;
+ u32 report_type;
+ struct uhid_event report_buf;
++ struct work_struct worker;
+ };
+
+ static struct miscdevice uhid_misc;
+
++static void uhid_device_add_worker(struct work_struct *work)
++{
++ struct uhid_device *uhid = container_of(work, struct uhid_device, worker);
++ int ret;
++
++ ret = hid_add_device(uhid->hid);
++ if (ret) {
++ hid_err(uhid->hid, "Cannot register HID device: error %d\n", ret);
++
++ hid_destroy_device(uhid->hid);
++ uhid->hid = NULL;
++ uhid->running = false;
++ }
++}
++
+ static void uhid_queue(struct uhid_device *uhid, struct uhid_event *ev)
+ {
+ __u8 newhead;
+@@ -498,18 +514,14 @@ static int uhid_dev_create2(struct uhid_device *uhid,
+ uhid->hid = hid;
+ uhid->running = true;
+
+- ret = hid_add_device(hid);
+- if (ret) {
+- hid_err(hid, "Cannot register HID device\n");
+- goto err_hid;
+- }
++ /* Adding of a HID device is done through a worker, to allow HID drivers
++ * which use feature requests during .probe to work, without they would
++ * be blocked on devlock, which is held by uhid_char_write.
++ */
++ schedule_work(&uhid->worker);
+
+ return 0;
+
+-err_hid:
+- hid_destroy_device(hid);
+- uhid->hid = NULL;
+- uhid->running = false;
+ err_free:
+ kfree(uhid->rd_data);
+ uhid->rd_data = NULL;
+@@ -550,6 +562,8 @@ static int uhid_dev_destroy(struct uhid_device *uhid)
+ uhid->running = false;
+ wake_up_interruptible(&uhid->report_wait);
+
++ cancel_work_sync(&uhid->worker);
++
+ hid_destroy_device(uhid->hid);
+ kfree(uhid->rd_data);
+
+@@ -612,6 +626,7 @@ static int uhid_char_open(struct inode *inode, struct file *file)
+ init_waitqueue_head(&uhid->waitq);
+ init_waitqueue_head(&uhid->report_wait);
+ uhid->running = false;
++ INIT_WORK(&uhid->worker, uhid_device_add_worker);
+
+ file->private_data = uhid;
+ nonseekable_open(inode, file);
+diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
+index 952f20fdc7e3..e82f7e1c217c 100644
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -42,6 +42,7 @@
+ #include <linux/screen_info.h>
+ #include <linux/kdebug.h>
+ #include <linux/efi.h>
++#include <linux/random.h>
+ #include "hyperv_vmbus.h"
+
+ static struct acpi_device *hv_acpi_dev;
+@@ -806,6 +807,8 @@ static void vmbus_isr(void)
+ else
+ tasklet_schedule(hv_context.msg_dpc[cpu]);
+ }
++
++ add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
+ }
+
+
+diff --git a/drivers/i2c/busses/i2c-efm32.c b/drivers/i2c/busses/i2c-efm32.c
+index 8eff62738877..e253598d764c 100644
+--- a/drivers/i2c/busses/i2c-efm32.c
++++ b/drivers/i2c/busses/i2c-efm32.c
+@@ -433,7 +433,7 @@ static int efm32_i2c_probe(struct platform_device *pdev)
+ ret = request_irq(ddata->irq, efm32_i2c_irq, 0, DRIVER_NAME, ddata);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "failed to request irq (%d)\n", ret);
+- return ret;
++ goto err_disable_clk;
+ }
+
+ ret = i2c_add_adapter(&ddata->adapter);
+diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
+index 1eb9b1294a63..dbfd854c32c9 100644
+--- a/drivers/infiniband/core/rw.c
++++ b/drivers/infiniband/core/rw.c
+@@ -58,19 +58,13 @@ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
+ return false;
+ }
+
+-static inline u32 rdma_rw_max_sge(struct ib_device *dev,
+- enum dma_data_direction dir)
+-{
+- return dir == DMA_TO_DEVICE ?
+- dev->attrs.max_sge : dev->attrs.max_sge_rd;
+-}
+-
+ static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev)
+ {
+ /* arbitrary limit to avoid allocating gigantic resources */
+ return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256);
+ }
+
++/* Caller must have zero-initialized *reg. */
+ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
+ struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
+ u32 sg_cnt, u32 offset)
+@@ -114,6 +108,7 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+ {
++ struct rdma_rw_reg_ctx *prev = NULL;
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ int i, j, ret = 0, count = 0;
+
+@@ -125,7 +120,6 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ }
+
+ for (i = 0; i < ctx->nr_ops; i++) {
+- struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL;
+ struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
+ u32 nents = min(sg_cnt, pages_per_mr);
+
+@@ -162,9 +156,13 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ sg_cnt -= nents;
+ for (j = 0; j < nents; j++)
+ sg = sg_next(sg);
++ prev = reg;
+ offset = 0;
+ }
+
++ if (prev)
++ prev->wr.wr.next = NULL;
++
+ ctx->type = RDMA_RW_MR;
+ return count;
+
+@@ -181,7 +179,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+ {
+ struct ib_device *dev = qp->pd->device;
+- u32 max_sge = rdma_rw_max_sge(dev, dir);
++ u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
++ qp->max_read_sge;
+ struct ib_sge *sge;
+ u32 total_len = 0, i, j;
+
+@@ -205,11 +204,10 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ rdma_wr->remote_addr = remote_addr + total_len;
+ rdma_wr->rkey = rkey;
++ rdma_wr->wr.num_sge = nr_sge;
+ rdma_wr->wr.sg_list = sge;
+
+ for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
+- rdma_wr->wr.num_sge++;
+-
+ sge->addr = ib_sg_dma_address(dev, sg) + offset;
+ sge->length = ib_sg_dma_len(dev, sg) - offset;
+ sge->lkey = qp->pd->local_dma_lkey;
+@@ -220,8 +218,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ offset = 0;
+ }
+
+- if (i + 1 < ctx->nr_ops)
+- rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr;
++ rdma_wr->wr.next = i + 1 < ctx->nr_ops ?
++ &ctx->map.wrs[i + 1].wr : NULL;
+ }
+
+ ctx->type = RDMA_RW_MULTI_WR;
+diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
+index 6298f54b4137..e39a0b597234 100644
+--- a/drivers/infiniband/core/verbs.c
++++ b/drivers/infiniband/core/verbs.c
+@@ -814,6 +814,15 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ }
+ }
+
++ /*
++ * Note: all hw drivers guarantee that max_send_sge is lower than
++ * the device RDMA WRITE SGE limit but not all hw drivers ensure that
++ * max_send_sge <= max_sge_rd.
++ */
++ qp->max_write_sge = qp_init_attr->cap.max_send_sge;
++ qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
++ device->attrs.max_sge_rd);
++
+ return qp;
+ }
+ EXPORT_SYMBOL(ib_create_qp);
+diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
+index 53e03c8ede79..79e6309460dc 100644
+--- a/drivers/infiniband/hw/mlx5/gsi.c
++++ b/drivers/infiniband/hw/mlx5/gsi.c
+@@ -69,15 +69,6 @@ static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
+ return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
+ }
+
+-static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
+-{
+- return ++index % gsi->cap.max_send_wr;
+-}
+-
+-#define for_each_outstanding_wr(gsi, index) \
+- for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
+- index = next_outstanding(gsi, index))
+-
+ /* Call with gsi->lock locked */
+ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+ {
+@@ -85,8 +76,9 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+ struct mlx5_ib_gsi_wr *wr;
+ u32 index;
+
+- for_each_outstanding_wr(gsi, index) {
+- wr = &gsi->outstanding_wrs[index];
++ for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
++ index++) {
++ wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
+
+ if (!wr->completed)
+ break;
+@@ -430,8 +422,9 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+ return -ENOMEM;
+ }
+
+- gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
+- gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
++ gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
++ gsi->cap.max_send_wr];
++ gsi->outstanding_pi++;
+
+ if (!wc) {
+ memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
+diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
+index 11aa6a34bd71..1da8d01a6855 100644
+--- a/drivers/infiniband/sw/rdmavt/Kconfig
++++ b/drivers/infiniband/sw/rdmavt/Kconfig
+@@ -1,6 +1,5 @@
+ config INFINIBAND_RDMAVT
+ tristate "RDMA verbs transport library"
+ depends on 64BIT
+- default m
+ ---help---
+ This is a common software verbs provider for RDMA networks.
+diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
+index 6ca6fa80dd6e..f2f229efbe64 100644
+--- a/drivers/infiniband/sw/rdmavt/cq.c
++++ b/drivers/infiniband/sw/rdmavt/cq.c
+@@ -510,6 +510,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
+
+ if (rdi->worker)
+ return 0;
++ spin_lock_init(&rdi->n_cqs_lock);
+ rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
+ if (!rdi->worker)
+ return -ENOMEM;
+diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
+index 4a4155640d51..9a3b954e862d 100644
+--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
++++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
+@@ -1601,6 +1601,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
+ struct ib_qp_init_attr *qp_init;
+ struct srpt_port *sport = ch->sport;
+ struct srpt_device *sdev = sport->sdev;
++ const struct ib_device_attr *attrs = &sdev->device->attrs;
+ u32 srp_sq_size = sport->port_attrib.srp_sq_size;
+ int ret;
+
+@@ -1638,7 +1639,7 @@ retry:
+ */
+ qp_init->cap.max_send_wr = srp_sq_size / 2;
+ qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
+- qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
++ qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
+ qp_init->port_num = ch->sport->port;
+
+ ch->qp = ib_create_qp(sdev->pd, qp_init);
+diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
+index 389030487da7..581878782854 100644
+--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
++++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
+@@ -106,7 +106,11 @@ enum {
+ SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
+
+ SRPT_DEF_SG_TABLESIZE = 128,
+- SRPT_DEF_SG_PER_WQE = 16,
++ /*
++ * An experimentally determined value that avoids that QP creation
++ * fails due to "swiotlb buffer is full" on systems using the swiotlb.
++ */
++ SRPT_MAX_SG_PER_WQE = 16,
+
+ MIN_SRPT_SQ_SIZE = 16,
+ DEF_SRPT_SQ_SIZE = 4096,
+diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
+index 2f589857a039..d15b33813021 100644
+--- a/drivers/input/mouse/elan_i2c_core.c
++++ b/drivers/input/mouse/elan_i2c_core.c
+@@ -4,7 +4,8 @@
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+- * Version: 1.6.0
++ * Author: KT Liao <kt.liao@emc.com.tw>
++ * Version: 1.6.2
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+@@ -40,7 +41,7 @@
+ #include "elan_i2c.h"
+
+ #define DRIVER_NAME "elan_i2c"
+-#define ELAN_DRIVER_VERSION "1.6.1"
++#define ELAN_DRIVER_VERSION "1.6.2"
+ #define ELAN_VENDOR_ID 0x04f3
+ #define ETP_MAX_PRESSURE 255
+ #define ETP_FWIDTH_REDUCE 90
+@@ -199,9 +200,41 @@ static int elan_sleep(struct elan_tp_data *data)
+ return error;
+ }
+
++static int elan_query_product(struct elan_tp_data *data)
++{
++ int error;
++
++ error = data->ops->get_product_id(data->client, &data->product_id);
++ if (error)
++ return error;
++
++ error = data->ops->get_sm_version(data->client, &data->ic_type,
++ &data->sm_version);
++ if (error)
++ return error;
++
++ return 0;
++}
++
++static int elan_check_ASUS_special_fw(struct elan_tp_data *data)
++{
++ if (data->ic_type != 0x0E)
++ return false;
++
++ switch (data->product_id) {
++ case 0x05 ... 0x07:
++ case 0x09:
++ case 0x13:
++ return true;
++ default:
++ return false;
++ }
++}
++
+ static int __elan_initialize(struct elan_tp_data *data)
+ {
+ struct i2c_client *client = data->client;
++ bool woken_up = false;
+ int error;
+
+ error = data->ops->initialize(client);
+@@ -210,6 +243,27 @@ static int __elan_initialize(struct elan_tp_data *data)
+ return error;
+ }
+
++ error = elan_query_product(data);
++ if (error)
++ return error;
++
++ /*
++ * Some ASUS devices were shipped with firmware that requires
++ * touchpads to be woken up first, before attempting to switch
++ * them into absolute reporting mode.
++ */
++ if (elan_check_ASUS_special_fw(data)) {
++ error = data->ops->sleep_control(client, false);
++ if (error) {
++ dev_err(&client->dev,
++ "failed to wake device up: %d\n", error);
++ return error;
++ }
++
++ msleep(200);
++ woken_up = true;
++ }
++
+ data->mode |= ETP_ENABLE_ABS;
+ error = data->ops->set_mode(client, data->mode);
+ if (error) {
+@@ -218,11 +272,13 @@ static int __elan_initialize(struct elan_tp_data *data)
+ return error;
+ }
+
+- error = data->ops->sleep_control(client, false);
+- if (error) {
+- dev_err(&client->dev,
+- "failed to wake device up: %d\n", error);
+- return error;
++ if (!woken_up) {
++ error = data->ops->sleep_control(client, false);
++ if (error) {
++ dev_err(&client->dev,
++ "failed to wake device up: %d\n", error);
++ return error;
++ }
+ }
+
+ return 0;
+@@ -248,10 +304,6 @@ static int elan_query_device_info(struct elan_tp_data *data)
+ {
+ int error;
+
+- error = data->ops->get_product_id(data->client, &data->product_id);
+- if (error)
+- return error;
+-
+ error = data->ops->get_version(data->client, false, &data->fw_version);
+ if (error)
+ return error;
+@@ -261,11 +313,6 @@ static int elan_query_device_info(struct elan_tp_data *data)
+ if (error)
+ return error;
+
+- error = data->ops->get_sm_version(data->client, &data->ic_type,
+- &data->sm_version);
+- if (error)
+- return error;
+-
+ error = data->ops->get_version(data->client, true, &data->iap_version);
+ if (error)
+ return error;
+diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c
+index 880c40b23f66..b7e8c11a6fc2 100644
+--- a/drivers/input/touchscreen/sur40.c
++++ b/drivers/input/touchscreen/sur40.c
+@@ -126,7 +126,7 @@ struct sur40_image_header {
+ #define VIDEO_PACKET_SIZE 16384
+
+ /* polling interval (ms) */
+-#define POLL_INTERVAL 4
++#define POLL_INTERVAL 1
+
+ /* maximum number of contacts FIXME: this is a guess? */
+ #define MAX_CONTACTS 64
+@@ -448,7 +448,7 @@ static void sur40_process_video(struct sur40_state *sur40)
+
+ /* return error if streaming was stopped in the meantime */
+ if (sur40->sequence == -1)
+- goto err_poll;
++ return;
+
+ /* mark as finished */
+ new_buf->vb.vb2_buf.timestamp = ktime_get_ns();
+@@ -736,6 +736,7 @@ static int sur40_start_streaming(struct vb2_queue *vq, unsigned int count)
+ static void sur40_stop_streaming(struct vb2_queue *vq)
+ {
+ struct sur40_state *sur40 = vb2_get_drv_priv(vq);
++ vb2_wait_for_all_buffers(vq);
+ sur40->sequence = -1;
+
+ /* Release all active buffers */
+diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
+index 634f636393d5..2511c8b6aada 100644
+--- a/drivers/iommu/amd_iommu.c
++++ b/drivers/iommu/amd_iommu.c
+@@ -466,9 +466,11 @@ static void init_iommu_group(struct device *dev)
+ if (!domain)
+ goto out;
+
+- dma_domain = to_pdomain(domain)->priv;
++ if (to_pdomain(domain)->flags == PD_DMA_OPS_MASK) {
++ dma_domain = to_pdomain(domain)->priv;
++ init_unity_mappings_for_device(dev, dma_domain);
++ }
+
+- init_unity_mappings_for_device(dev, dma_domain);
+ out:
+ iommu_group_put(group);
+ }
+@@ -2512,8 +2514,15 @@ static void update_device_table(struct protection_domain *domain)
+ {
+ struct iommu_dev_data *dev_data;
+
+- list_for_each_entry(dev_data, &domain->dev_list, list)
++ list_for_each_entry(dev_data, &domain->dev_list, list) {
+ set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
++
++ if (dev_data->devid == dev_data->alias)
++ continue;
++
++ /* There is an alias, update device table entry for it */
++ set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled);
++ }
+ }
+
+ static void update_domain(struct protection_domain *domain)
+@@ -3103,9 +3112,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
+ static void amd_iommu_domain_free(struct iommu_domain *dom)
+ {
+ struct protection_domain *domain;
+-
+- if (!dom)
+- return;
++ struct dma_ops_domain *dma_dom;
+
+ domain = to_pdomain(dom);
+
+@@ -3114,13 +3121,24 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
+
+ BUG_ON(domain->dev_cnt != 0);
+
+- if (domain->mode != PAGE_MODE_NONE)
+- free_pagetable(domain);
++ if (!dom)
++ return;
++
++ switch (dom->type) {
++ case IOMMU_DOMAIN_DMA:
++ dma_dom = domain->priv;
++ dma_ops_domain_free(dma_dom);
++ break;
++ default:
++ if (domain->mode != PAGE_MODE_NONE)
++ free_pagetable(domain);
+
+- if (domain->flags & PD_IOMMUV2_MASK)
+- free_gcr3_table(domain);
++ if (domain->flags & PD_IOMMUV2_MASK)
++ free_gcr3_table(domain);
+
+- protection_domain_free(domain);
++ protection_domain_free(domain);
++ break;
++ }
+ }
+
+ static void amd_iommu_detach_device(struct iommu_domain *dom,
+diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
+index 5ecc86cb74c8..e27e3b7df4e7 100644
+--- a/drivers/iommu/exynos-iommu.c
++++ b/drivers/iommu/exynos-iommu.c
+@@ -709,6 +709,7 @@ static struct platform_driver exynos_sysmmu_driver __refdata = {
+ .name = "exynos-sysmmu",
+ .of_match_table = sysmmu_of_match,
+ .pm = &sysmmu_pm_ops,
++ .suppress_bind_attrs = true,
+ }
+ };
+
+diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
+index 323dac9900ba..d416242c4ab8 100644
+--- a/drivers/iommu/intel-iommu.c
++++ b/drivers/iommu/intel-iommu.c
+@@ -2076,7 +2076,7 @@ out_unlock:
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+- return 0;
++ return ret;
+ }
+
+ struct domain_context_mapping_data {
+diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
+index a1ed1b73fed4..f5c90e1366ce 100644
+--- a/drivers/iommu/io-pgtable-arm.c
++++ b/drivers/iommu/io-pgtable-arm.c
+@@ -576,7 +576,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
+ return 0;
+
+ found_translation:
+- iova &= (ARM_LPAE_GRANULE(data) - 1);
++ iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
+ return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
+ }
+
+diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
+index b7341de87015..4bb49cd602e9 100644
+--- a/drivers/md/dm-flakey.c
++++ b/drivers/md/dm-flakey.c
+@@ -289,10 +289,16 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
+ pb->bio_submitted = true;
+
+ /*
+- * Map reads as normal.
++ * Map reads as normal only if corrupt_bio_byte set.
+ */
+- if (bio_data_dir(bio) == READ)
+- goto map_bio;
++ if (bio_data_dir(bio) == READ) {
++ /* If flags were specified, only corrupt those that match. */
++ if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
++ all_corrupt_bio_flags_match(bio, fc))
++ goto map_bio;
++ else
++ return -EIO;
++ }
+
+ /*
+ * Drop writes?
+@@ -330,12 +336,13 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+
+ /*
+ * Corrupt successful READs while in down state.
+- * If flags were specified, only corrupt those that match.
+ */
+- if (fc->corrupt_bio_byte && !error && pb->bio_submitted &&
+- (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
+- all_corrupt_bio_flags_match(bio, fc))
+- corrupt_bio_data(bio, fc);
++ if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
++ if (fc->corrupt_bio_byte)
++ corrupt_bio_data(bio, fc);
++ else
++ return -EIO;
++ }
+
+ return error;
+ }
+diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
+index 459a9f8905ed..0f0eb8a3d922 100644
+--- a/drivers/md/dm-verity-fec.c
++++ b/drivers/md/dm-verity-fec.c
+@@ -453,9 +453,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+ */
+
+ offset = block << v->data_dev_block_bits;
+-
+- res = offset;
+- div64_u64(res, v->fec->rounds << v->data_dev_block_bits);
++ res = div64_u64(offset, v->fec->rounds << v->data_dev_block_bits);
+
+ /*
+ * The base RS block we can feed to the interleaver to find out all
+diff --git a/drivers/md/dm.c b/drivers/md/dm.c
+index 1b2f96205361..fd40bcb3937d 100644
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -2175,7 +2175,7 @@ static void dm_request_fn(struct request_queue *q)
+ md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
+ md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
+ (ti->type->busy && ti->type->busy(ti))) {
+- blk_delay_queue(q, HZ / 100);
++ blk_delay_queue(q, 10);
+ return;
+ }
+
+@@ -3128,7 +3128,8 @@ static void unlock_fs(struct mapped_device *md)
+ * Caller must hold md->suspend_lock
+ */
+ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
+- unsigned suspend_flags, int interruptible)
++ unsigned suspend_flags, int interruptible,
++ int dmf_suspended_flag)
+ {
+ bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
+ bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
+@@ -3195,6 +3196,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
+ * to finish.
+ */
+ r = dm_wait_for_completion(md, interruptible);
++ if (!r)
++ set_bit(dmf_suspended_flag, &md->flags);
+
+ if (noflush)
+ clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
+@@ -3256,12 +3259,10 @@ retry:
+
+ map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+
+- r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
++ r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
+ if (r)
+ goto out_unlock;
+
+- set_bit(DMF_SUSPENDED, &md->flags);
+-
+ dm_table_postsuspend_targets(map);
+
+ out_unlock:
+@@ -3355,9 +3356,8 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
+ * would require changing .presuspend to return an error -- avoid this
+ * until there is a need for more elaborate variants of internal suspend.
+ */
+- (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
+-
+- set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
++ (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE,
++ DMF_SUSPENDED_INTERNALLY);
+
+ dm_table_postsuspend_targets(map);
+ }
+diff --git a/drivers/media/dvb-core/dvb_ringbuffer.c b/drivers/media/dvb-core/dvb_ringbuffer.c
+index 1100e98a7b1d..7df7fb3738a0 100644
+--- a/drivers/media/dvb-core/dvb_ringbuffer.c
++++ b/drivers/media/dvb-core/dvb_ringbuffer.c
+@@ -55,7 +55,13 @@ void dvb_ringbuffer_init(struct dvb_ringbuffer *rbuf, void *data, size_t len)
+
+ int dvb_ringbuffer_empty(struct dvb_ringbuffer *rbuf)
+ {
+- return (rbuf->pread==rbuf->pwrite);
++ /* smp_load_acquire() to load write pointer on reader side
++ * this pairs with smp_store_release() in dvb_ringbuffer_write(),
++ * dvb_ringbuffer_write_user(), or dvb_ringbuffer_reset()
++ *
++ * for memory barriers also see Documentation/circular-buffers.txt
++ */
++ return (rbuf->pread == smp_load_acquire(&rbuf->pwrite));
+ }
+
+
+@@ -64,7 +70,12 @@ ssize_t dvb_ringbuffer_free(struct dvb_ringbuffer *rbuf)
+ {
+ ssize_t free;
+
+- free = rbuf->pread - rbuf->pwrite;
++ /* ACCESS_ONCE() to load read pointer on writer side
++ * this pairs with smp_store_release() in dvb_ringbuffer_read(),
++ * dvb_ringbuffer_read_user(), dvb_ringbuffer_flush(),
++ * or dvb_ringbuffer_reset()
++ */
++ free = ACCESS_ONCE(rbuf->pread) - rbuf->pwrite;
+ if (free <= 0)
+ free += rbuf->size;
+ return free-1;
+@@ -76,7 +87,11 @@ ssize_t dvb_ringbuffer_avail(struct dvb_ringbuffer *rbuf)
+ {
+ ssize_t avail;
+
+- avail = rbuf->pwrite - rbuf->pread;
++ /* smp_load_acquire() to load write pointer on reader side
++ * this pairs with smp_store_release() in dvb_ringbuffer_write(),
++ * dvb_ringbuffer_write_user(), or dvb_ringbuffer_reset()
++ */
++ avail = smp_load_acquire(&rbuf->pwrite) - rbuf->pread;
+ if (avail < 0)
+ avail += rbuf->size;
+ return avail;
+@@ -86,14 +101,25 @@ ssize_t dvb_ringbuffer_avail(struct dvb_ringbuffer *rbuf)
+
+ void dvb_ringbuffer_flush(struct dvb_ringbuffer *rbuf)
+ {
+- rbuf->pread = rbuf->pwrite;
++ /* dvb_ringbuffer_flush() counts as read operation
++ * smp_load_acquire() to load write pointer
++ * smp_store_release() to update read pointer, this ensures that the
++ * correct pointer is visible for subsequent dvb_ringbuffer_free()
++ * calls on other cpu cores
++ */
++ smp_store_release(&rbuf->pread, smp_load_acquire(&rbuf->pwrite));
+ rbuf->error = 0;
+ }
+ EXPORT_SYMBOL(dvb_ringbuffer_flush);
+
+ void dvb_ringbuffer_reset(struct dvb_ringbuffer *rbuf)
+ {
+- rbuf->pread = rbuf->pwrite = 0;
++ /* dvb_ringbuffer_reset() counts as read and write operation
++ * smp_store_release() to update read pointer
++ */
++ smp_store_release(&rbuf->pread, 0);
++ /* smp_store_release() to update write pointer */
++ smp_store_release(&rbuf->pwrite, 0);
+ rbuf->error = 0;
+ }
+
+@@ -119,12 +145,17 @@ ssize_t dvb_ringbuffer_read_user(struct dvb_ringbuffer *rbuf, u8 __user *buf, si
+ return -EFAULT;
+ buf += split;
+ todo -= split;
+- rbuf->pread = 0;
++ /* smp_store_release() for read pointer update to ensure
++ * that buf is not overwritten until read is complete,
++ * this pairs with ACCESS_ONCE() in dvb_ringbuffer_free()
++ */
++ smp_store_release(&rbuf->pread, 0);
+ }
+ if (copy_to_user(buf, rbuf->data+rbuf->pread, todo))
+ return -EFAULT;
+
+- rbuf->pread = (rbuf->pread + todo) % rbuf->size;
++ /* smp_store_release() to update read pointer, see above */
++ smp_store_release(&rbuf->pread, (rbuf->pread + todo) % rbuf->size);
+
+ return len;
+ }
+@@ -139,11 +170,16 @@ void dvb_ringbuffer_read(struct dvb_ringbuffer *rbuf, u8 *buf, size_t len)
+ memcpy(buf, rbuf->data+rbuf->pread, split);
+ buf += split;
+ todo -= split;
+- rbuf->pread = 0;
++ /* smp_store_release() for read pointer update to ensure
++ * that buf is not overwritten until read is complete,
++ * this pairs with ACCESS_ONCE() in dvb_ringbuffer_free()
++ */
++ smp_store_release(&rbuf->pread, 0);
+ }
+ memcpy(buf, rbuf->data+rbuf->pread, todo);
+
+- rbuf->pread = (rbuf->pread + todo) % rbuf->size;
++ /* smp_store_release() to update read pointer, see above */
++ smp_store_release(&rbuf->pread, (rbuf->pread + todo) % rbuf->size);
+ }
+
+
+@@ -158,10 +194,16 @@ ssize_t dvb_ringbuffer_write(struct dvb_ringbuffer *rbuf, const u8 *buf, size_t
+ memcpy(rbuf->data+rbuf->pwrite, buf, split);
+ buf += split;
+ todo -= split;
+- rbuf->pwrite = 0;
++ /* smp_store_release() for write pointer update to ensure that
++ * written data is visible on other cpu cores before the pointer
++ * update, this pairs with smp_load_acquire() in
++ * dvb_ringbuffer_empty() or dvb_ringbuffer_avail()
++ */
++ smp_store_release(&rbuf->pwrite, 0);
+ }
+ memcpy(rbuf->data+rbuf->pwrite, buf, todo);
+- rbuf->pwrite = (rbuf->pwrite + todo) % rbuf->size;
++ /* smp_store_release() for write pointer update, see above */
++ smp_store_release(&rbuf->pwrite, (rbuf->pwrite + todo) % rbuf->size);
+
+ return len;
+ }
+@@ -181,12 +223,18 @@ ssize_t dvb_ringbuffer_write_user(struct dvb_ringbuffer *rbuf,
+ return len - todo;
+ buf += split;
+ todo -= split;
+- rbuf->pwrite = 0;
++ /* smp_store_release() for write pointer update to ensure that
++ * written data is visible on other cpu cores before the pointer
++ * update, this pairs with smp_load_acquire() in
++ * dvb_ringbuffer_empty() or dvb_ringbuffer_avail()
++ */
++ smp_store_release(&rbuf->pwrite, 0);
+ }
+ status = copy_from_user(rbuf->data+rbuf->pwrite, buf, todo);
+ if (status)
+ return len - todo;
+- rbuf->pwrite = (rbuf->pwrite + todo) % rbuf->size;
++ /* smp_store_release() for write pointer update, see above */
++ smp_store_release(&rbuf->pwrite, (rbuf->pwrite + todo) % rbuf->size);
+
+ return len;
+ }
+diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c
+index b16466fe35ee..beb4fd5bd326 100644
+--- a/drivers/media/platform/s5p-mfc/s5p_mfc.c
++++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c
+@@ -1050,6 +1050,11 @@ static int match_child(struct device *dev, void *data)
+ return !strcmp(dev_name(dev), (char *)data);
+ }
+
++static void s5p_mfc_memdev_release(struct device *dev)
++{
++ dma_release_declared_memory(dev);
++}
++
+ static void *mfc_get_drv_data(struct platform_device *pdev);
+
+ static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev)
+@@ -1062,6 +1067,9 @@ static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev)
+ mfc_err("Not enough memory\n");
+ return -ENOMEM;
+ }
++
++ dev_set_name(dev->mem_dev_l, "%s", "s5p-mfc-l");
++ dev->mem_dev_l->release = s5p_mfc_memdev_release;
+ device_initialize(dev->mem_dev_l);
+ of_property_read_u32_array(dev->plat_dev->dev.of_node,
+ "samsung,mfc-l", mem_info, 2);
+@@ -1079,6 +1087,9 @@ static int s5p_mfc_alloc_memdevs(struct s5p_mfc_dev *dev)
+ mfc_err("Not enough memory\n");
+ return -ENOMEM;
+ }
++
++ dev_set_name(dev->mem_dev_r, "%s", "s5p-mfc-r");
++ dev->mem_dev_r->release = s5p_mfc_memdev_release;
+ device_initialize(dev->mem_dev_r);
+ of_property_read_u32_array(dev->plat_dev->dev.of_node,
+ "samsung,mfc-r", mem_info, 2);
+diff --git a/drivers/media/rc/ir-rc5-decoder.c b/drivers/media/rc/ir-rc5-decoder.c
+index 6ffe776abf6b..a0fd4e6b2155 100644
+--- a/drivers/media/rc/ir-rc5-decoder.c
++++ b/drivers/media/rc/ir-rc5-decoder.c
+@@ -29,7 +29,7 @@
+ #define RC5_BIT_START (1 * RC5_UNIT)
+ #define RC5_BIT_END (1 * RC5_UNIT)
+ #define RC5X_SPACE (4 * RC5_UNIT)
+-#define RC5_TRAILER (10 * RC5_UNIT) /* In reality, approx 100 */
++#define RC5_TRAILER (6 * RC5_UNIT) /* In reality, approx 100 */
+
+ enum rc5_state {
+ STATE_INACTIVE,
+diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
+index 99b303b702ac..e8ceb0e2f6d7 100644
+--- a/drivers/media/rc/nuvoton-cir.c
++++ b/drivers/media/rc/nuvoton-cir.c
+@@ -401,6 +401,7 @@ static int nvt_hw_detect(struct nvt_dev *nvt)
+ /* Check if we're wired for the alternate EFER setup */
+ nvt->chip_major = nvt_cr_read(nvt, CR_CHIP_ID_HI);
+ if (nvt->chip_major == 0xff) {
++ nvt_efm_disable(nvt);
+ nvt->cr_efir = CR_EFIR2;
+ nvt->cr_efdr = CR_EFDR2;
+ nvt_efm_enable(nvt);
+diff --git a/drivers/media/usb/usbtv/usbtv-audio.c b/drivers/media/usb/usbtv/usbtv-audio.c
+index 78c12d22dfbb..5dab02432e82 100644
+--- a/drivers/media/usb/usbtv/usbtv-audio.c
++++ b/drivers/media/usb/usbtv/usbtv-audio.c
+@@ -278,6 +278,9 @@ static void snd_usbtv_trigger(struct work_struct *work)
+ {
+ struct usbtv *chip = container_of(work, struct usbtv, snd_trigger);
+
++ if (!chip->snd)
++ return;
++
+ if (atomic_read(&chip->snd_stream))
+ usbtv_audio_start(chip);
+ else
+@@ -378,6 +381,8 @@ err:
+
+ void usbtv_audio_free(struct usbtv *usbtv)
+ {
++ cancel_work_sync(&usbtv->snd_trigger);
++
+ if (usbtv->snd && usbtv->udev) {
+ snd_card_free(usbtv->snd);
+ usbtv->snd = NULL;
+diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
+index 9fbcb67a9ee6..633fc1ab1d7a 100644
+--- a/drivers/media/v4l2-core/videobuf2-core.c
++++ b/drivers/media/v4l2-core/videobuf2-core.c
+@@ -1648,7 +1648,7 @@ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
+ void *pb, int nonblocking)
+ {
+ unsigned long flags;
+- int ret;
++ int ret = 0;
+
+ /*
+ * Wait for at least one buffer to become available on the done_list.
+@@ -1664,10 +1664,12 @@ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
+ spin_lock_irqsave(&q->done_lock, flags);
+ *vb = list_first_entry(&q->done_list, struct vb2_buffer, done_entry);
+ /*
+- * Only remove the buffer from done_list if v4l2_buffer can handle all
+- * the planes.
++ * Only remove the buffer from done_list if all planes can be
++ * handled. Some cases such as V4L2 file I/O and DVB have pb
++ * == NULL; skip the check then as there's nothing to verify.
+ */
+- ret = call_bufop(q, verify_planes_array, *vb, pb);
++ if (pb)
++ ret = call_bufop(q, verify_planes_array, *vb, pb);
+ if (!ret)
+ list_del(&(*vb)->done_entry);
+ spin_unlock_irqrestore(&q->done_lock, flags);
+diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c
+index 0b1b8c7b6ce5..7f366f1b0377 100644
+--- a/drivers/media/v4l2-core/videobuf2-v4l2.c
++++ b/drivers/media/v4l2-core/videobuf2-v4l2.c
+@@ -74,6 +74,11 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer
+ return 0;
+ }
+
++static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb)
++{
++ return __verify_planes_array(vb, pb);
++}
++
+ /**
+ * __verify_length() - Verify that the bytesused value for each plane fits in
+ * the plane length and that the data offset doesn't exceed the bytesused value.
+@@ -437,6 +442,7 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb,
+ }
+
+ static const struct vb2_buf_ops v4l2_buf_ops = {
++ .verify_planes_array = __verify_planes_array_core,
+ .fill_user_buffer = __fill_v4l2_buffer,
+ .fill_vb2_buffer = __fill_vb2_buffer,
+ .copy_timestamp = __copy_timestamp,
+diff --git a/drivers/mfd/qcom_rpm.c b/drivers/mfd/qcom_rpm.c
+index 1be47ad6441b..880d4699bcb0 100644
+--- a/drivers/mfd/qcom_rpm.c
++++ b/drivers/mfd/qcom_rpm.c
+@@ -34,7 +34,13 @@ struct qcom_rpm_resource {
+ struct qcom_rpm_data {
+ u32 version;
+ const struct qcom_rpm_resource *resource_table;
+- unsigned n_resources;
++ unsigned int n_resources;
++ unsigned int req_ctx_off;
++ unsigned int req_sel_off;
++ unsigned int ack_ctx_off;
++ unsigned int ack_sel_off;
++ unsigned int req_sel_size;
++ unsigned int ack_sel_size;
+ };
+
+ struct qcom_rpm {
+@@ -61,11 +67,7 @@ struct qcom_rpm {
+
+ #define RPM_REQUEST_TIMEOUT (5 * HZ)
+
+-#define RPM_REQUEST_CONTEXT 3
+-#define RPM_REQ_SELECT 11
+-#define RPM_ACK_CONTEXT 15
+-#define RPM_ACK_SELECTOR 23
+-#define RPM_SELECT_SIZE 7
++#define RPM_MAX_SEL_SIZE 7
+
+ #define RPM_NOTIFICATION BIT(30)
+ #define RPM_REJECTED BIT(31)
+@@ -157,6 +159,12 @@ static const struct qcom_rpm_data apq8064_template = {
+ .version = 3,
+ .resource_table = apq8064_rpm_resource_table,
+ .n_resources = ARRAY_SIZE(apq8064_rpm_resource_table),
++ .req_ctx_off = 3,
++ .req_sel_off = 11,
++ .ack_ctx_off = 15,
++ .ack_sel_off = 23,
++ .req_sel_size = 4,
++ .ack_sel_size = 7,
+ };
+
+ static const struct qcom_rpm_resource msm8660_rpm_resource_table[] = {
+@@ -240,6 +248,12 @@ static const struct qcom_rpm_data msm8660_template = {
+ .version = 2,
+ .resource_table = msm8660_rpm_resource_table,
+ .n_resources = ARRAY_SIZE(msm8660_rpm_resource_table),
++ .req_ctx_off = 3,
++ .req_sel_off = 11,
++ .ack_ctx_off = 19,
++ .ack_sel_off = 27,
++ .req_sel_size = 7,
++ .ack_sel_size = 7,
+ };
+
+ static const struct qcom_rpm_resource msm8960_rpm_resource_table[] = {
+@@ -322,6 +336,12 @@ static const struct qcom_rpm_data msm8960_template = {
+ .version = 3,
+ .resource_table = msm8960_rpm_resource_table,
+ .n_resources = ARRAY_SIZE(msm8960_rpm_resource_table),
++ .req_ctx_off = 3,
++ .req_sel_off = 11,
++ .ack_ctx_off = 15,
++ .ack_sel_off = 23,
++ .req_sel_size = 4,
++ .ack_sel_size = 7,
+ };
+
+ static const struct qcom_rpm_resource ipq806x_rpm_resource_table[] = {
+@@ -362,6 +382,12 @@ static const struct qcom_rpm_data ipq806x_template = {
+ .version = 3,
+ .resource_table = ipq806x_rpm_resource_table,
+ .n_resources = ARRAY_SIZE(ipq806x_rpm_resource_table),
++ .req_ctx_off = 3,
++ .req_sel_off = 11,
++ .ack_ctx_off = 15,
++ .ack_sel_off = 23,
++ .req_sel_size = 4,
++ .ack_sel_size = 7,
+ };
+
+ static const struct of_device_id qcom_rpm_of_match[] = {
+@@ -380,7 +406,7 @@ int qcom_rpm_write(struct qcom_rpm *rpm,
+ {
+ const struct qcom_rpm_resource *res;
+ const struct qcom_rpm_data *data = rpm->data;
+- u32 sel_mask[RPM_SELECT_SIZE] = { 0 };
++ u32 sel_mask[RPM_MAX_SEL_SIZE] = { 0 };
+ int left;
+ int ret = 0;
+ int i;
+@@ -398,12 +424,12 @@ int qcom_rpm_write(struct qcom_rpm *rpm,
+ writel_relaxed(buf[i], RPM_REQ_REG(rpm, res->target_id + i));
+
+ bitmap_set((unsigned long *)sel_mask, res->select_id, 1);
+- for (i = 0; i < ARRAY_SIZE(sel_mask); i++) {
++ for (i = 0; i < rpm->data->req_sel_size; i++) {
+ writel_relaxed(sel_mask[i],
+- RPM_CTRL_REG(rpm, RPM_REQ_SELECT + i));
++ RPM_CTRL_REG(rpm, rpm->data->req_sel_off + i));
+ }
+
+- writel_relaxed(BIT(state), RPM_CTRL_REG(rpm, RPM_REQUEST_CONTEXT));
++ writel_relaxed(BIT(state), RPM_CTRL_REG(rpm, rpm->data->req_ctx_off));
+
+ reinit_completion(&rpm->ack);
+ regmap_write(rpm->ipc_regmap, rpm->ipc_offset, BIT(rpm->ipc_bit));
+@@ -426,10 +452,11 @@ static irqreturn_t qcom_rpm_ack_interrupt(int irq, void *dev)
+ u32 ack;
+ int i;
+
+- ack = readl_relaxed(RPM_CTRL_REG(rpm, RPM_ACK_CONTEXT));
+- for (i = 0; i < RPM_SELECT_SIZE; i++)
+- writel_relaxed(0, RPM_CTRL_REG(rpm, RPM_ACK_SELECTOR + i));
+- writel(0, RPM_CTRL_REG(rpm, RPM_ACK_CONTEXT));
++ ack = readl_relaxed(RPM_CTRL_REG(rpm, rpm->data->ack_ctx_off));
++ for (i = 0; i < rpm->data->ack_sel_size; i++)
++ writel_relaxed(0,
++ RPM_CTRL_REG(rpm, rpm->data->ack_sel_off + i));
++ writel(0, RPM_CTRL_REG(rpm, rpm->data->ack_ctx_off));
+
+ if (ack & RPM_NOTIFICATION) {
+ dev_warn(rpm->dev, "ignoring notification!\n");
+diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
+index 0b0dc29d2af7..77533f7f2429 100644
+--- a/drivers/mtd/nand/nand_base.c
++++ b/drivers/mtd/nand/nand_base.c
+@@ -2610,7 +2610,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
+ int cached = writelen > bytes && page != blockmask;
+ uint8_t *wbuf = buf;
+ int use_bufpoi;
+- int part_pagewr = (column || writelen < (mtd->writesize - 1));
++ int part_pagewr = (column || writelen < mtd->writesize);
+
+ if (part_pagewr)
+ use_bufpoi = 1;
+diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
+index ef3618299494..0680516bb472 100644
+--- a/drivers/mtd/ubi/build.c
++++ b/drivers/mtd/ubi/build.c
+@@ -874,7 +874,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
+ for (i = 0; i < UBI_MAX_DEVICES; i++) {
+ ubi = ubi_devices[i];
+ if (ubi && mtd->index == ubi->mtd->index) {
+- ubi_err(ubi, "mtd%d is already attached to ubi%d",
++ pr_err("ubi: mtd%d is already attached to ubi%d",
+ mtd->index, i);
+ return -EEXIST;
+ }
+@@ -889,7 +889,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
+ * no sense to attach emulated MTD devices, so we prohibit this.
+ */
+ if (mtd->type == MTD_UBIVOLUME) {
+- ubi_err(ubi, "refuse attaching mtd%d - it is already emulated on top of UBI",
++ pr_err("ubi: refuse attaching mtd%d - it is already emulated on top of UBI",
+ mtd->index);
+ return -EINVAL;
+ }
+@@ -900,7 +900,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
+ if (!ubi_devices[ubi_num])
+ break;
+ if (ubi_num == UBI_MAX_DEVICES) {
+- ubi_err(ubi, "only %d UBI devices may be created",
++ pr_err("ubi: only %d UBI devices may be created",
+ UBI_MAX_DEVICES);
+ return -ENFILE;
+ }
+@@ -910,7 +910,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
+
+ /* Make sure ubi_num is not busy */
+ if (ubi_devices[ubi_num]) {
+- ubi_err(ubi, "already exists");
++ pr_err("ubi: ubi%i already exists", ubi_num);
+ return -EEXIST;
+ }
+ }
+@@ -992,6 +992,9 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
+ goto out_detach;
+ }
+
++ /* Make device "available" before it becomes accessible via sysfs */
++ ubi_devices[ubi_num] = ubi;
++
+ err = uif_init(ubi, &ref);
+ if (err)
+ goto out_detach;
+@@ -1036,7 +1039,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
+ wake_up_process(ubi->bgt_thread);
+ spin_unlock(&ubi->wl_lock);
+
+- ubi_devices[ubi_num] = ubi;
+ ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL);
+ return ubi_num;
+
+@@ -1047,6 +1049,7 @@ out_uif:
+ ubi_assert(ref);
+ uif_close(ubi);
+ out_detach:
++ ubi_devices[ubi_num] = NULL;
+ ubi_wl_close(ubi);
+ ubi_free_internal_volumes(ubi);
+ vfree(ubi->vtbl);
+diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
+index 10059dfdc1b6..0138f526474a 100644
+--- a/drivers/mtd/ubi/vmt.c
++++ b/drivers/mtd/ubi/vmt.c
+@@ -488,13 +488,6 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
+ spin_unlock(&ubi->volumes_lock);
+ }
+
+- /* Change volume table record */
+- vtbl_rec = ubi->vtbl[vol_id];
+- vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs);
+- err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+- if (err)
+- goto out_acc;
+-
+ if (pebs < 0) {
+ for (i = 0; i < -pebs; i++) {
+ err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i);
+@@ -512,6 +505,24 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
+ spin_unlock(&ubi->volumes_lock);
+ }
+
++ /*
++ * When we shrink a volume we have to flush all pending (erase) work.
++ * Otherwise it can happen that upon next attach UBI finds a LEB with
++ * lnum > highest_lnum and refuses to attach.
++ */
++ if (pebs < 0) {
++ err = ubi_wl_flush(ubi, vol_id, UBI_ALL);
++ if (err)
++ goto out_acc;
++ }
++
++ /* Change volume table record */
++ vtbl_rec = ubi->vtbl[vol_id];
++ vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs);
++ err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
++ if (err)
++ goto out_acc;
++
+ vol->reserved_pebs = reserved_pebs;
+ if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+ vol->used_ebs = reserved_pebs;
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+index 5b30922b67ec..2ce319903cfb 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+@@ -2469,10 +2469,22 @@ void brcmf_fws_bustxfail(struct brcmf_fws_info *fws, struct sk_buff *skb)
+ void brcmf_fws_bus_blocked(struct brcmf_pub *drvr, bool flow_blocked)
+ {
+ struct brcmf_fws_info *fws = drvr->fws;
++ struct brcmf_if *ifp;
++ int i;
+
+- fws->bus_flow_blocked = flow_blocked;
+- if (!flow_blocked)
+- brcmf_fws_schedule_deq(fws);
+- else
+- fws->stats.bus_flow_block++;
++ if (fws->avoid_queueing) {
++ for (i = 0; i < BRCMF_MAX_IFS; i++) {
++ ifp = drvr->iflist[i];
++ if (!ifp || !ifp->ndev)
++ continue;
++ brcmf_txflowblock_if(ifp, BRCMF_NETIF_STOP_REASON_FLOW,
++ flow_blocked);
++ }
++ } else {
++ fws->bus_flow_blocked = flow_blocked;
++ if (!flow_blocked)
++ brcmf_fws_schedule_deq(fws);
++ else
++ fws->stats.bus_flow_block++;
++ }
+ }
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+index a588b05e38eb..6f020e4ec7dc 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+@@ -433,6 +433,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
+ /* 8000 Series */
+ {IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24F3, 0x10B0, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)},
+@@ -454,6 +455,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
+ {IWL_PCI_DEVICE(0x24F3, 0xD010, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24F3, 0xD0B0, iwl8260_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24F3, 0xB0B0, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)},
+@@ -481,6 +484,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
+ {IWL_PCI_DEVICE(0x24FD, 0x0010, iwl8265_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24FD, 0x0110, iwl8265_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24FD, 0x1110, iwl8265_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24FD, 0x1130, iwl8265_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24FD, 0x0130, iwl8265_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24FD, 0x1010, iwl8265_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24FD, 0x0050, iwl8265_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24FD, 0x0150, iwl8265_2ac_cfg)},
+@@ -491,6 +496,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
+ {IWL_PCI_DEVICE(0x24FD, 0x0810, iwl8265_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24FD, 0x9110, iwl8265_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24FD, 0x8130, iwl8265_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24FD, 0x0910, iwl8265_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24FD, 0x0930, iwl8265_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24FD, 0x0950, iwl8265_2ac_cfg)},
++ {IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)},
+
+ /* 9000 Series */
+ {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)},
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+index de6974f9c52f..2d8cce290a15 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+@@ -496,7 +496,7 @@ void iwl_pcie_dump_csr(struct iwl_trans *trans);
+ /*****************************************************
+ * Helpers
+ ******************************************************/
+-static inline void iwl_disable_interrupts(struct iwl_trans *trans)
++static inline void _iwl_disable_interrupts(struct iwl_trans *trans)
+ {
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+@@ -519,7 +519,16 @@ static inline void iwl_disable_interrupts(struct iwl_trans *trans)
+ IWL_DEBUG_ISR(trans, "Disabled interrupts\n");
+ }
+
+-static inline void iwl_enable_interrupts(struct iwl_trans *trans)
++static inline void iwl_disable_interrupts(struct iwl_trans *trans)
++{
++ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
++
++ spin_lock(&trans_pcie->irq_lock);
++ _iwl_disable_interrupts(trans);
++ spin_unlock(&trans_pcie->irq_lock);
++}
++
++static inline void _iwl_enable_interrupts(struct iwl_trans *trans)
+ {
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+@@ -542,6 +551,14 @@ static inline void iwl_enable_interrupts(struct iwl_trans *trans)
+ }
+ }
+
++static inline void iwl_enable_interrupts(struct iwl_trans *trans)
++{
++ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
++
++ spin_lock(&trans_pcie->irq_lock);
++ _iwl_enable_interrupts(trans);
++ spin_unlock(&trans_pcie->irq_lock);
++}
+ static inline void iwl_enable_hw_int_msk_msix(struct iwl_trans *trans, u32 msk)
+ {
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+index 0a4a3c502c3c..aaaf2ad6e4da 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+@@ -1507,7 +1507,7 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id)
+ * have anything to service
+ */
+ if (test_bit(STATUS_INT_ENABLED, &trans->status))
+- iwl_enable_interrupts(trans);
++ _iwl_enable_interrupts(trans);
+ spin_unlock(&trans_pcie->irq_lock);
+ lock_map_release(&trans->sync_cmd_lockdep_map);
+ return IRQ_NONE;
+@@ -1699,15 +1699,17 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id)
+ inta & ~trans_pcie->inta_mask);
+ }
+
++ spin_lock(&trans_pcie->irq_lock);
++ /* only Re-enable all interrupt if disabled by irq */
++ if (test_bit(STATUS_INT_ENABLED, &trans->status))
++ _iwl_enable_interrupts(trans);
+ /* we are loading the firmware, enable FH_TX interrupt only */
+- if (handled & CSR_INT_BIT_FH_TX)
++ else if (handled & CSR_INT_BIT_FH_TX)
+ iwl_enable_fw_load_int(trans);
+- /* only Re-enable all interrupt if disabled by irq */
+- else if (test_bit(STATUS_INT_ENABLED, &trans->status))
+- iwl_enable_interrupts(trans);
+ /* Re-enable RF_KILL if it occurred */
+ else if (handled & CSR_INT_BIT_RF_KILL)
+ iwl_enable_rfkill_int(trans);
++ spin_unlock(&trans_pcie->irq_lock);
+
+ out:
+ lock_map_release(&trans->sync_cmd_lockdep_map);
+@@ -1771,7 +1773,7 @@ void iwl_pcie_reset_ict(struct iwl_trans *trans)
+ return;
+
+ spin_lock(&trans_pcie->irq_lock);
+- iwl_disable_interrupts(trans);
++ _iwl_disable_interrupts(trans);
+
+ memset(trans_pcie->ict_tbl, 0, ICT_SIZE);
+
+@@ -1787,7 +1789,7 @@ void iwl_pcie_reset_ict(struct iwl_trans *trans)
+ trans_pcie->use_ict = true;
+ trans_pcie->ict_index = 0;
+ iwl_write32(trans, CSR_INT, trans_pcie->inta_mask);
+- iwl_enable_interrupts(trans);
++ _iwl_enable_interrupts(trans);
+ spin_unlock(&trans_pcie->irq_lock);
+ }
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+index f603d7830a6b..d9f139462b31 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+@@ -801,6 +801,8 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans,
+
+ *first_ucode_section = last_read_idx;
+
++ iwl_enable_interrupts(trans);
++
+ if (cpu == 1)
+ iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFF);
+ else
+@@ -980,6 +982,8 @@ static int iwl_pcie_load_given_ucode(struct iwl_trans *trans,
+ iwl_pcie_apply_destination(trans);
+ }
+
++ iwl_enable_interrupts(trans);
++
+ /* release CPU reset */
+ iwl_write32(trans, CSR_RESET, 0);
+
+@@ -1033,9 +1037,7 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
+ was_hw_rfkill = iwl_is_rfkill_set(trans);
+
+ /* tell the device to stop sending interrupts */
+- spin_lock(&trans_pcie->irq_lock);
+ iwl_disable_interrupts(trans);
+- spin_unlock(&trans_pcie->irq_lock);
+
+ /* device going down, Stop using ICT table */
+ iwl_pcie_disable_ict(trans);
+@@ -1079,9 +1081,7 @@ static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
+ * the time, unless the interrupt is ACKed even if the interrupt
+ * should be masked. Re-ACK all the interrupts here.
+ */
+- spin_lock(&trans_pcie->irq_lock);
+ iwl_disable_interrupts(trans);
+- spin_unlock(&trans_pcie->irq_lock);
+
+ /* clear all status bits */
+ clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
+@@ -1215,7 +1215,6 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
+ ret = iwl_pcie_load_given_ucode_8000(trans, fw);
+ else
+ ret = iwl_pcie_load_given_ucode(trans, fw);
+- iwl_enable_interrupts(trans);
+
+ /* re-check RF-Kill state since we may have missed the interrupt */
+ hw_rfkill = iwl_is_rfkill_set(trans);
+@@ -1567,15 +1566,11 @@ static void iwl_trans_pcie_op_mode_leave(struct iwl_trans *trans)
+ mutex_lock(&trans_pcie->mutex);
+
+ /* disable interrupts - don't enable HW RF kill interrupt */
+- spin_lock(&trans_pcie->irq_lock);
+ iwl_disable_interrupts(trans);
+- spin_unlock(&trans_pcie->irq_lock);
+
+ iwl_pcie_apm_stop(trans, true);
+
+- spin_lock(&trans_pcie->irq_lock);
+ iwl_disable_interrupts(trans);
+- spin_unlock(&trans_pcie->irq_lock);
+
+ iwl_pcie_disable_ict(trans);
+
+diff --git a/drivers/of/base.c b/drivers/of/base.c
+index ebf84e3b56d5..8bb3d1adf1b0 100644
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -112,6 +112,7 @@ static ssize_t of_node_property_read(struct file *filp, struct kobject *kobj,
+ return memory_read_from_buffer(buf, count, &offset, pp->value, pp->length);
+ }
+
++/* always return newly allocated name, caller must free after use */
+ static const char *safe_name(struct kobject *kobj, const char *orig_name)
+ {
+ const char *name = orig_name;
+@@ -126,9 +127,12 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name)
+ name = kasprintf(GFP_KERNEL, "%s#%i", orig_name, ++i);
+ }
+
+- if (name != orig_name)
++ if (name == orig_name) {
++ name = kstrdup(orig_name, GFP_KERNEL);
++ } else {
+ pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n",
+ kobject_name(kobj), name);
++ }
+ return name;
+ }
+
+@@ -159,6 +163,7 @@ int __of_add_property_sysfs(struct device_node *np, struct property *pp)
+ int __of_attach_node_sysfs(struct device_node *np)
+ {
+ const char *name;
++ struct kobject *parent;
+ struct property *pp;
+ int rc;
+
+@@ -171,15 +176,16 @@ int __of_attach_node_sysfs(struct device_node *np)
+ np->kobj.kset = of_kset;
+ if (!np->parent) {
+ /* Nodes without parents are new top level trees */
+- rc = kobject_add(&np->kobj, NULL, "%s",
+- safe_name(&of_kset->kobj, "base"));
++ name = safe_name(&of_kset->kobj, "base");
++ parent = NULL;
+ } else {
+ name = safe_name(&np->parent->kobj, kbasename(np->full_name));
+- if (!name || !name[0])
+- return -EINVAL;
+-
+- rc = kobject_add(&np->kobj, &np->parent->kobj, "%s", name);
++ parent = &np->parent->kobj;
+ }
++ if (!name)
++ return -ENOMEM;
++ rc = kobject_add(&np->kobj, parent, "%s", name);
++ kfree(name);
+ if (rc)
+ return rc;
+
+@@ -1815,6 +1821,12 @@ int __of_remove_property(struct device_node *np, struct property *prop)
+ return 0;
+ }
+
++void __of_sysfs_remove_bin_file(struct device_node *np, struct property *prop)
++{
++ sysfs_remove_bin_file(&np->kobj, &prop->attr);
++ kfree(prop->attr.attr.name);
++}
++
+ void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
+ {
+ if (!IS_ENABLED(CONFIG_SYSFS))
+@@ -1822,7 +1834,7 @@ void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
+
+ /* at early boot, bail here and defer setup to of_init() */
+ if (of_kset && of_node_is_attached(np))
+- sysfs_remove_bin_file(&np->kobj, &prop->attr);
++ __of_sysfs_remove_bin_file(np, prop);
+ }
+
+ /**
+@@ -1895,7 +1907,7 @@ void __of_update_property_sysfs(struct device_node *np, struct property *newprop
+ return;
+
+ if (oldprop)
+- sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
++ __of_sysfs_remove_bin_file(np, oldprop);
+ __of_add_property_sysfs(np, newprop);
+ }
+
+diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
+index 3033fa3250dc..a2015599ed7e 100644
+--- a/drivers/of/dynamic.c
++++ b/drivers/of/dynamic.c
+@@ -55,7 +55,7 @@ void __of_detach_node_sysfs(struct device_node *np)
+ /* only remove properties if on sysfs */
+ if (of_node_is_attached(np)) {
+ for_each_property_of_node(np, pp)
+- sysfs_remove_bin_file(&np->kobj, &pp->attr);
++ __of_sysfs_remove_bin_file(np, pp);
+ kobject_del(&np->kobj);
+ }
+
+diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
+index 829469faeb23..18bbb4517e25 100644
+--- a/drivers/of/of_private.h
++++ b/drivers/of/of_private.h
+@@ -83,6 +83,9 @@ extern int __of_attach_node_sysfs(struct device_node *np);
+ extern void __of_detach_node(struct device_node *np);
+ extern void __of_detach_node_sysfs(struct device_node *np);
+
++extern void __of_sysfs_remove_bin_file(struct device_node *np,
++ struct property *prop);
++
+ /* iterators for transactions, used for overlays */
+ /* forward iterator */
+ #define for_each_transaction_entry(_oft, _te) \
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index ee72ebe18f4b..7902fbf47f89 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -3189,13 +3189,15 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
+ }
+
+ /*
+- * Atheros AR93xx chips do not behave after a bus reset. The device will
+- * throw a Link Down error on AER-capable systems and regardless of AER,
+- * config space of the device is never accessible again and typically
+- * causes the system to hang or reset when access is attempted.
++ * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
++ * The device will throw a Link Down error on AER-capable systems and
++ * regardless of AER, config space of the device is never accessible again
++ * and typically causes the system to hang or reset when access is attempted.
+ * http://www.spinics.net/lists/linux-pci/msg34797.html
+ */
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0030, quirk_no_bus_reset);
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0032, quirk_no_bus_reset);
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003c, quirk_no_bus_reset);
+
+ static void quirk_no_pm_reset(struct pci_dev *dev)
+ {
+diff --git a/drivers/phy/phy-rcar-gen3-usb2.c b/drivers/phy/phy-rcar-gen3-usb2.c
+index 4be3f5dbbc9f..31156c9c4707 100644
+--- a/drivers/phy/phy-rcar-gen3-usb2.c
++++ b/drivers/phy/phy-rcar-gen3-usb2.c
+@@ -21,6 +21,7 @@
+ #include <linux/phy/phy.h>
+ #include <linux/platform_device.h>
+ #include <linux/regulator/consumer.h>
++#include <linux/workqueue.h>
+
+ /******* USB2.0 Host registers (original offset is +0x200) *******/
+ #define USB2_INT_ENABLE 0x000
+@@ -81,9 +82,25 @@ struct rcar_gen3_chan {
+ struct extcon_dev *extcon;
+ struct phy *phy;
+ struct regulator *vbus;
++ struct work_struct work;
++ bool extcon_host;
+ bool has_otg;
+ };
+
++static void rcar_gen3_phy_usb2_work(struct work_struct *work)
++{
++ struct rcar_gen3_chan *ch = container_of(work, struct rcar_gen3_chan,
++ work);
++
++ if (ch->extcon_host) {
++ extcon_set_cable_state_(ch->extcon, EXTCON_USB_HOST, true);
++ extcon_set_cable_state_(ch->extcon, EXTCON_USB, false);
++ } else {
++ extcon_set_cable_state_(ch->extcon, EXTCON_USB_HOST, false);
++ extcon_set_cable_state_(ch->extcon, EXTCON_USB, true);
++ }
++}
++
+ static void rcar_gen3_set_host_mode(struct rcar_gen3_chan *ch, int host)
+ {
+ void __iomem *usb2_base = ch->base;
+@@ -130,8 +147,8 @@ static void rcar_gen3_init_for_host(struct rcar_gen3_chan *ch)
+ rcar_gen3_set_host_mode(ch, 1);
+ rcar_gen3_enable_vbus_ctrl(ch, 1);
+
+- extcon_set_cable_state_(ch->extcon, EXTCON_USB_HOST, true);
+- extcon_set_cable_state_(ch->extcon, EXTCON_USB, false);
++ ch->extcon_host = true;
++ schedule_work(&ch->work);
+ }
+
+ static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch)
+@@ -140,8 +157,8 @@ static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch)
+ rcar_gen3_set_host_mode(ch, 0);
+ rcar_gen3_enable_vbus_ctrl(ch, 0);
+
+- extcon_set_cable_state_(ch->extcon, EXTCON_USB_HOST, false);
+- extcon_set_cable_state_(ch->extcon, EXTCON_USB, true);
++ ch->extcon_host = false;
++ schedule_work(&ch->work);
+ }
+
+ static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
+@@ -301,6 +318,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
+ if (irq >= 0) {
+ int ret;
+
++ INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work);
+ irq = devm_request_irq(dev, irq, rcar_gen3_phy_usb2_irq,
+ IRQF_SHARED, dev_name(dev), channel);
+ if (irq < 0)
+diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
+index ac4f564f1c3e..bf65c948b31d 100644
+--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
++++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
+@@ -160,7 +160,6 @@ struct chv_pin_context {
+ * @pctldev: Pointer to the pin controller device
+ * @chip: GPIO chip in this pin controller
+ * @regs: MMIO registers
+- * @lock: Lock to serialize register accesses
+ * @intr_lines: Stores mapping between 16 HW interrupt wires and GPIO
+ * offset (in GPIO number space)
+ * @community: Community this pinctrl instance represents
+@@ -174,7 +173,6 @@ struct chv_pinctrl {
+ struct pinctrl_dev *pctldev;
+ struct gpio_chip chip;
+ void __iomem *regs;
+- raw_spinlock_t lock;
+ unsigned intr_lines[16];
+ const struct chv_community *community;
+ u32 saved_intmask;
+@@ -657,6 +655,17 @@ static const struct chv_community *chv_communities[] = {
+ &southeast_community,
+ };
+
++/*
++ * Lock to serialize register accesses
++ *
++ * Due to a silicon issue, a shared lock must be used to prevent
++ * concurrent accesses across the 4 GPIO controllers.
++ *
++ * See Intel Atom Z8000 Processor Series Specification Update (Rev. 005),
++ * errata #CHT34, for further information.
++ */
++static DEFINE_RAW_SPINLOCK(chv_lock);
++
+ static void __iomem *chv_padreg(struct chv_pinctrl *pctrl, unsigned offset,
+ unsigned reg)
+ {
+@@ -718,13 +727,13 @@ static void chv_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s,
+ u32 ctrl0, ctrl1;
+ bool locked;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+
+ ctrl0 = readl(chv_padreg(pctrl, offset, CHV_PADCTRL0));
+ ctrl1 = readl(chv_padreg(pctrl, offset, CHV_PADCTRL1));
+ locked = chv_pad_locked(pctrl, offset);
+
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ if (ctrl0 & CHV_PADCTRL0_GPIOEN) {
+ seq_puts(s, "GPIO ");
+@@ -787,14 +796,14 @@ static int chv_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned function,
+
+ grp = &pctrl->community->groups[group];
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+
+ /* Check first that the pad is not locked */
+ for (i = 0; i < grp->npins; i++) {
+ if (chv_pad_locked(pctrl, grp->pins[i])) {
+ dev_warn(pctrl->dev, "unable to set mode for locked pin %u\n",
+ grp->pins[i]);
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ return -EBUSY;
+ }
+ }
+@@ -837,7 +846,7 @@ static int chv_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned function,
+ pin, altfunc->mode, altfunc->invert_oe ? "" : "not ");
+ }
+
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ return 0;
+ }
+@@ -851,13 +860,13 @@ static int chv_gpio_request_enable(struct pinctrl_dev *pctldev,
+ void __iomem *reg;
+ u32 value;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+
+ if (chv_pad_locked(pctrl, offset)) {
+ value = readl(chv_padreg(pctrl, offset, CHV_PADCTRL0));
+ if (!(value & CHV_PADCTRL0_GPIOEN)) {
+ /* Locked so cannot enable */
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ return -EBUSY;
+ }
+ } else {
+@@ -897,7 +906,7 @@ static int chv_gpio_request_enable(struct pinctrl_dev *pctldev,
+ chv_writel(value, reg);
+ }
+
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ return 0;
+ }
+@@ -911,13 +920,13 @@ static void chv_gpio_disable_free(struct pinctrl_dev *pctldev,
+ void __iomem *reg;
+ u32 value;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+
+ reg = chv_padreg(pctrl, offset, CHV_PADCTRL0);
+ value = readl(reg) & ~CHV_PADCTRL0_GPIOEN;
+ chv_writel(value, reg);
+
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ }
+
+ static int chv_gpio_set_direction(struct pinctrl_dev *pctldev,
+@@ -929,7 +938,7 @@ static int chv_gpio_set_direction(struct pinctrl_dev *pctldev,
+ unsigned long flags;
+ u32 ctrl0;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+
+ ctrl0 = readl(reg) & ~CHV_PADCTRL0_GPIOCFG_MASK;
+ if (input)
+@@ -938,7 +947,7 @@ static int chv_gpio_set_direction(struct pinctrl_dev *pctldev,
+ ctrl0 |= CHV_PADCTRL0_GPIOCFG_GPO << CHV_PADCTRL0_GPIOCFG_SHIFT;
+ chv_writel(ctrl0, reg);
+
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ return 0;
+ }
+@@ -963,10 +972,10 @@ static int chv_config_get(struct pinctrl_dev *pctldev, unsigned pin,
+ u16 arg = 0;
+ u32 term;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+ ctrl0 = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0));
+ ctrl1 = readl(chv_padreg(pctrl, pin, CHV_PADCTRL1));
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ term = (ctrl0 & CHV_PADCTRL0_TERM_MASK) >> CHV_PADCTRL0_TERM_SHIFT;
+
+@@ -1040,7 +1049,7 @@ static int chv_config_set_pull(struct chv_pinctrl *pctrl, unsigned pin,
+ unsigned long flags;
+ u32 ctrl0, pull;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+ ctrl0 = readl(reg);
+
+ switch (param) {
+@@ -1063,7 +1072,7 @@ static int chv_config_set_pull(struct chv_pinctrl *pctrl, unsigned pin,
+ pull = CHV_PADCTRL0_TERM_20K << CHV_PADCTRL0_TERM_SHIFT;
+ break;
+ default:
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ return -EINVAL;
+ }
+
+@@ -1081,7 +1090,7 @@ static int chv_config_set_pull(struct chv_pinctrl *pctrl, unsigned pin,
+ pull = CHV_PADCTRL0_TERM_20K << CHV_PADCTRL0_TERM_SHIFT;
+ break;
+ default:
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ return -EINVAL;
+ }
+
+@@ -1089,12 +1098,12 @@ static int chv_config_set_pull(struct chv_pinctrl *pctrl, unsigned pin,
+ break;
+
+ default:
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ return -EINVAL;
+ }
+
+ chv_writel(ctrl0, reg);
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ return 0;
+ }
+@@ -1160,9 +1169,9 @@ static int chv_gpio_get(struct gpio_chip *chip, unsigned offset)
+ unsigned long flags;
+ u32 ctrl0, cfg;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+ ctrl0 = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0));
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ cfg = ctrl0 & CHV_PADCTRL0_GPIOCFG_MASK;
+ cfg >>= CHV_PADCTRL0_GPIOCFG_SHIFT;
+@@ -1180,7 +1189,7 @@ static void chv_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+ void __iomem *reg;
+ u32 ctrl0;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+
+ reg = chv_padreg(pctrl, pin, CHV_PADCTRL0);
+ ctrl0 = readl(reg);
+@@ -1192,7 +1201,7 @@ static void chv_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+
+ chv_writel(ctrl0, reg);
+
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ }
+
+ static int chv_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+@@ -1202,9 +1211,9 @@ static int chv_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+ u32 ctrl0, direction;
+ unsigned long flags;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+ ctrl0 = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0));
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ direction = ctrl0 & CHV_PADCTRL0_GPIOCFG_MASK;
+ direction >>= CHV_PADCTRL0_GPIOCFG_SHIFT;
+@@ -1242,14 +1251,14 @@ static void chv_gpio_irq_ack(struct irq_data *d)
+ int pin = chv_gpio_offset_to_pin(pctrl, irqd_to_hwirq(d));
+ u32 intr_line;
+
+- raw_spin_lock(&pctrl->lock);
++ raw_spin_lock(&chv_lock);
+
+ intr_line = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0));
+ intr_line &= CHV_PADCTRL0_INTSEL_MASK;
+ intr_line >>= CHV_PADCTRL0_INTSEL_SHIFT;
+ chv_writel(BIT(intr_line), pctrl->regs + CHV_INTSTAT);
+
+- raw_spin_unlock(&pctrl->lock);
++ raw_spin_unlock(&chv_lock);
+ }
+
+ static void chv_gpio_irq_mask_unmask(struct irq_data *d, bool mask)
+@@ -1260,7 +1269,7 @@ static void chv_gpio_irq_mask_unmask(struct irq_data *d, bool mask)
+ u32 value, intr_line;
+ unsigned long flags;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+
+ intr_line = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0));
+ intr_line &= CHV_PADCTRL0_INTSEL_MASK;
+@@ -1273,7 +1282,7 @@ static void chv_gpio_irq_mask_unmask(struct irq_data *d, bool mask)
+ value |= BIT(intr_line);
+ chv_writel(value, pctrl->regs + CHV_INTMASK);
+
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ }
+
+ static void chv_gpio_irq_mask(struct irq_data *d)
+@@ -1307,7 +1316,7 @@ static unsigned chv_gpio_irq_startup(struct irq_data *d)
+ unsigned long flags;
+ u32 intsel, value;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+ intsel = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0));
+ intsel &= CHV_PADCTRL0_INTSEL_MASK;
+ intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
+@@ -1322,7 +1331,7 @@ static unsigned chv_gpio_irq_startup(struct irq_data *d)
+ irq_set_handler_locked(d, handler);
+ pctrl->intr_lines[intsel] = offset;
+ }
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+ }
+
+ chv_gpio_irq_unmask(d);
+@@ -1338,7 +1347,7 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned type)
+ unsigned long flags;
+ u32 value;
+
+- raw_spin_lock_irqsave(&pctrl->lock, flags);
++ raw_spin_lock_irqsave(&chv_lock, flags);
+
+ /*
+ * Pins which can be used as shared interrupt are configured in
+@@ -1387,7 +1396,7 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned type)
+ else if (type & IRQ_TYPE_LEVEL_MASK)
+ irq_set_handler_locked(d, handle_level_irq);
+
+- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
++ raw_spin_unlock_irqrestore(&chv_lock, flags);
+
+ return 0;
+ }
+@@ -1499,7 +1508,6 @@ static int chv_pinctrl_probe(struct platform_device *pdev)
+ if (i == ARRAY_SIZE(chv_communities))
+ return -ENODEV;
+
+- raw_spin_lock_init(&pctrl->lock);
+ pctrl->dev = &pdev->dev;
+
+ #ifdef CONFIG_PM_SLEEP
+diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
+index 6f145f2d004d..96ffda493266 100644
+--- a/drivers/platform/x86/hp-wmi.c
++++ b/drivers/platform/x86/hp-wmi.c
+@@ -718,6 +718,11 @@ static int __init hp_wmi_rfkill_setup(struct platform_device *device)
+ if (err)
+ return err;
+
++ err = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, &wireless,
++ sizeof(wireless), 0);
++ if (err)
++ return err;
++
+ if (wireless & 0x1) {
+ wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev,
+ RFKILL_TYPE_WLAN,
+@@ -882,7 +887,7 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
+ wwan_rfkill = NULL;
+ rfkill2_count = 0;
+
+- if (hp_wmi_bios_2009_later() || hp_wmi_rfkill_setup(device))
++ if (hp_wmi_rfkill_setup(device))
+ hp_wmi_rfkill2_setup(device);
+
+ err = device_create_file(&device->dev, &dev_attr_display);
+diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
+index 02fb6b4ea820..d838e77dd947 100644
+--- a/drivers/regulator/s2mps11.c
++++ b/drivers/regulator/s2mps11.c
+@@ -750,7 +750,7 @@ static const struct regulator_linear_range s2mps15_ldo_voltage_ranges3[] = {
+
+ /* voltage range for s2mps15 LDO 7, 8, 9 and 10 */
+ static const struct regulator_linear_range s2mps15_ldo_voltage_ranges4[] = {
+- REGULATOR_LINEAR_RANGE(700000, 0xc, 0x18, 25000),
++ REGULATOR_LINEAR_RANGE(700000, 0x10, 0x20, 25000),
+ };
+
+ /* voltage range for s2mps15 LDO 1 */
+@@ -760,12 +760,12 @@ static const struct regulator_linear_range s2mps15_ldo_voltage_ranges5[] = {
+
+ /* voltage range for s2mps15 BUCK 1, 2, 3, 4, 5, 6 and 7 */
+ static const struct regulator_linear_range s2mps15_buck_voltage_ranges1[] = {
+- REGULATOR_LINEAR_RANGE(500000, 0x20, 0xb0, 6250),
++ REGULATOR_LINEAR_RANGE(500000, 0x20, 0xc0, 6250),
+ };
+
+ /* voltage range for s2mps15 BUCK 8, 9 and 10 */
+ static const struct regulator_linear_range s2mps15_buck_voltage_ranges2[] = {
+- REGULATOR_LINEAR_RANGE(1000000, 0x20, 0xc0, 12500),
++ REGULATOR_LINEAR_RANGE(1000000, 0x20, 0x78, 12500),
+ };
+
+ static const struct regulator_desc s2mps15_regulators[] = {
+diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
+index db3958b3f094..fe0539ed9cb5 100644
+--- a/drivers/remoteproc/remoteproc_core.c
++++ b/drivers/remoteproc/remoteproc_core.c
+@@ -1264,11 +1264,6 @@ int rproc_add(struct rproc *rproc)
+ if (ret < 0)
+ return ret;
+
+- /* expose to rproc_get_by_phandle users */
+- mutex_lock(&rproc_list_mutex);
+- list_add(&rproc->node, &rproc_list);
+- mutex_unlock(&rproc_list_mutex);
+-
+ dev_info(dev, "%s is available\n", rproc->name);
+
+ dev_info(dev, "Note: remoteproc is still under development and considered experimental.\n");
+@@ -1276,8 +1271,16 @@ int rproc_add(struct rproc *rproc)
+
+ /* create debugfs entries */
+ rproc_create_debug_dir(rproc);
++ ret = rproc_add_virtio_devices(rproc);
++ if (ret < 0)
++ return ret;
+
+- return rproc_add_virtio_devices(rproc);
++ /* expose to rproc_get_by_phandle users */
++ mutex_lock(&rproc_list_mutex);
++ list_add(&rproc->node, &rproc_list);
++ mutex_unlock(&rproc_list_mutex);
++
++ return 0;
+ }
+ EXPORT_SYMBOL(rproc_add);
+
+diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
+index d01ad7e8078e..4e823c4b7a4d 100644
+--- a/drivers/rtc/rtc-s3c.c
++++ b/drivers/rtc/rtc-s3c.c
+@@ -149,12 +149,14 @@ static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq)
+ if (!is_power_of_2(freq))
+ return -EINVAL;
+
++ s3c_rtc_enable_clk(info);
+ spin_lock_irq(&info->pie_lock);
+
+ if (info->data->set_freq)
+ info->data->set_freq(info, freq);
+
+ spin_unlock_irq(&info->pie_lock);
++ s3c_rtc_disable_clk(info);
+
+ return 0;
+ }
+diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
+index b2afad5a5682..2a34eb5f6161 100644
+--- a/drivers/s390/cio/cmf.c
++++ b/drivers/s390/cio/cmf.c
+@@ -753,6 +753,17 @@ static void reset_cmb(struct ccw_device *cdev)
+ cmf_generic_reset(cdev);
+ }
+
++static int cmf_enabled(struct ccw_device *cdev)
++{
++ int enabled;
++
++ spin_lock_irq(cdev->ccwlock);
++ enabled = !!cdev->private->cmb;
++ spin_unlock_irq(cdev->ccwlock);
++
++ return enabled;
++}
++
+ static struct attribute_group cmf_attr_group;
+
+ static struct cmb_operations cmbops_basic = {
+@@ -1153,13 +1164,8 @@ static ssize_t cmb_enable_show(struct device *dev,
+ char *buf)
+ {
+ struct ccw_device *cdev = to_ccwdev(dev);
+- int enabled;
+
+- spin_lock_irq(cdev->ccwlock);
+- enabled = !!cdev->private->cmb;
+- spin_unlock_irq(cdev->ccwlock);
+-
+- return sprintf(buf, "%d\n", enabled);
++ return sprintf(buf, "%d\n", cmf_enabled(cdev));
+ }
+
+ static ssize_t cmb_enable_store(struct device *dev,
+@@ -1199,15 +1205,20 @@ int ccw_set_cmf(struct ccw_device *cdev, int enable)
+ * @cdev: The ccw device to be enabled
+ *
+ * Returns %0 for success or a negative error value.
+- *
++ * Note: If this is called on a device for which channel measurement is already
++ * enabled a reset of the measurement data is triggered.
+ * Context:
+ * non-atomic
+ */
+ int enable_cmf(struct ccw_device *cdev)
+ {
+- int ret;
++ int ret = 0;
+
+ device_lock(&cdev->dev);
++ if (cmf_enabled(cdev)) {
++ cmbops->reset(cdev);
++ goto out_unlock;
++ }
+ get_device(&cdev->dev);
+ ret = cmbops->alloc(cdev);
+ if (ret)
+@@ -1226,7 +1237,7 @@ int enable_cmf(struct ccw_device *cdev)
+ out:
+ if (ret)
+ put_device(&cdev->dev);
+-
++out_unlock:
+ device_unlock(&cdev->dev);
+ return ret;
+ }
+diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
+index 3bd0be6277b3..c7e5695da4f5 100644
+--- a/drivers/scsi/lpfc/lpfc_scsi.c
++++ b/drivers/scsi/lpfc/lpfc_scsi.c
+@@ -3874,7 +3874,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
+ uint32_t tag;
+ uint16_t hwq;
+
+- if (shost_use_blk_mq(cmnd->device->host)) {
++ if (cmnd && shost_use_blk_mq(cmnd->device->host)) {
+ tag = blk_mq_unique_tag(cmnd->request);
+ hwq = blk_mq_unique_tag_to_hwq(tag);
+
+diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
+index f4b0690450d2..2dab3dc2aa69 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_base.c
++++ b/drivers/scsi/megaraid/megaraid_sas_base.c
+@@ -4079,6 +4079,12 @@ megasas_get_pd_list(struct megasas_instance *instance)
+ struct MR_PD_ADDRESS *pd_addr;
+ dma_addr_t ci_h = 0;
+
++ if (instance->pd_list_not_supported) {
++ dev_info(&instance->pdev->dev, "MR_DCMD_PD_LIST_QUERY "
++ "not supported by firmware\n");
++ return ret;
++ }
++
+ cmd = megasas_get_cmd(instance);
+
+ if (!cmd) {
+diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c
+index f1eed7f9dd67..9c2788b8f2c3 100644
+--- a/drivers/soc/qcom/smp2p.c
++++ b/drivers/soc/qcom/smp2p.c
+@@ -344,11 +344,12 @@ static int qcom_smp2p_outbound_entry(struct qcom_smp2p *smp2p,
+ /* Allocate an entry from the smem item */
+ strlcpy(buf, entry->name, SMP2P_MAX_ENTRY_NAME);
+ memcpy_toio(out->entries[out->valid_entries].name, buf, SMP2P_MAX_ENTRY_NAME);
+- out->valid_entries++;
+
+ /* Make the logical entry reference the physical value */
+ entry->value = &out->entries[out->valid_entries].value;
+
++ out->valid_entries++;
++
+ entry->state = qcom_smem_state_register(node, &smp2p_state_ops, entry);
+ if (IS_ERR(entry->state)) {
+ dev_err(smp2p->dev, "failed to register qcom_smem_state\n");
+diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
+index fe07c0592b44..daf28443b7ad 100644
+--- a/drivers/spi/spi-pxa2xx.c
++++ b/drivers/spi/spi-pxa2xx.c
+@@ -585,7 +585,14 @@ static void reset_sccr1(struct driver_data *drv_data)
+ u32 sccr1_reg;
+
+ sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1;
+- sccr1_reg &= ~SSCR1_RFT;
++ switch (drv_data->ssp_type) {
++ case QUARK_X1000_SSP:
++ sccr1_reg &= ~QUARK_X1000_SSCR1_RFT;
++ break;
++ default:
++ sccr1_reg &= ~SSCR1_RFT;
++ break;
++ }
+ sccr1_reg |= chip->threshold;
+ pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg);
+ }
+diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
+index 50f3d3a0dd7b..39b928c2849d 100644
+--- a/drivers/target/iscsi/iscsi_target.c
++++ b/drivers/target/iscsi/iscsi_target.c
+@@ -492,7 +492,8 @@ void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+ bool scsi_cmd = (cmd->iscsi_opcode == ISCSI_OP_SCSI_CMD);
+
+ spin_lock_bh(&conn->cmd_lock);
+- if (!list_empty(&cmd->i_conn_node))
++ if (!list_empty(&cmd->i_conn_node) &&
++ !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP))
+ list_del_init(&cmd->i_conn_node);
+ spin_unlock_bh(&conn->cmd_lock);
+
+@@ -4034,6 +4035,7 @@ int iscsi_target_rx_thread(void *arg)
+
+ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
+ {
++ LIST_HEAD(tmp_list);
+ struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL;
+ struct iscsi_session *sess = conn->sess;
+ /*
+@@ -4042,18 +4044,26 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
+ * has been reset -> returned sleeping pre-handler state.
+ */
+ spin_lock_bh(&conn->cmd_lock);
+- list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_conn_node) {
++ list_splice_init(&conn->conn_cmd_list, &tmp_list);
+
++ list_for_each_entry(cmd, &tmp_list, i_conn_node) {
++ struct se_cmd *se_cmd = &cmd->se_cmd;
++
++ if (se_cmd->se_tfo != NULL) {
++ spin_lock(&se_cmd->t_state_lock);
++ se_cmd->transport_state |= CMD_T_FABRIC_STOP;
++ spin_unlock(&se_cmd->t_state_lock);
++ }
++ }
++ spin_unlock_bh(&conn->cmd_lock);
++
++ list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) {
+ list_del_init(&cmd->i_conn_node);
+- spin_unlock_bh(&conn->cmd_lock);
+
+ iscsit_increment_maxcmdsn(cmd, sess);
+-
+ iscsit_free_cmd(cmd, true);
+
+- spin_lock_bh(&conn->cmd_lock);
+ }
+- spin_unlock_bh(&conn->cmd_lock);
+ }
+
+ static void iscsit_stop_timers_for_cmds(
+diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
+index b5212f0f9571..adf419fa4291 100644
+--- a/drivers/target/iscsi/iscsi_target_login.c
++++ b/drivers/target/iscsi/iscsi_target_login.c
+@@ -1371,8 +1371,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
+ }
+ login->zero_tsih = zero_tsih;
+
+- conn->sess->se_sess->sup_prot_ops =
+- conn->conn_transport->iscsit_get_sup_prot_ops(conn);
++ if (conn->sess)
++ conn->sess->se_sess->sup_prot_ops =
++ conn->conn_transport->iscsit_get_sup_prot_ops(conn);
+
+ tpg = conn->tpg;
+ if (!tpg) {
+diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
+index a4046ca6e60d..6b423485c5d6 100644
+--- a/drivers/target/target_core_device.c
++++ b/drivers/target/target_core_device.c
+@@ -821,13 +821,15 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
+ * in ATA and we need to set TPE=1
+ */
+ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+- struct request_queue *q, int block_size)
++ struct request_queue *q)
+ {
++ int block_size = queue_logical_block_size(q);
++
+ if (!blk_queue_discard(q))
+ return false;
+
+- attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) /
+- block_size;
++ attrib->max_unmap_lba_count =
++ q->limits.max_discard_sectors >> (ilog2(block_size) - 9);
+ /*
+ * Currently hardcoded to 1 in Linux/SCSI code..
+ */
+diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
+index 75f0f08b2a34..79291869bce6 100644
+--- a/drivers/target/target_core_file.c
++++ b/drivers/target/target_core_file.c
+@@ -161,8 +161,7 @@ static int fd_configure_device(struct se_device *dev)
+ dev_size, div_u64(dev_size, fd_dev->fd_block_size),
+ fd_dev->fd_block_size);
+
+- if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
+- fd_dev->fd_block_size))
++ if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
+ pr_debug("IFILE: BLOCK Discard support available,"
+ " disabled by default\n");
+ /*
+diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
+index 7c4efb4417b0..2077bc28640a 100644
+--- a/drivers/target/target_core_iblock.c
++++ b/drivers/target/target_core_iblock.c
+@@ -121,8 +121,7 @@ static int iblock_configure_device(struct se_device *dev)
+ dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
+ dev->dev_attrib.hw_queue_depth = q->nr_requests;
+
+- if (target_configure_unmap_from_queue(&dev->dev_attrib, q,
+- dev->dev_attrib.hw_block_size))
++ if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
+ pr_debug("IBLOCK: BLOCK Discard support available,"
+ " disabled by default\n");
+
+diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
+index fc91e85f54ba..e2c970a9d61c 100644
+--- a/drivers/target/target_core_internal.h
++++ b/drivers/target/target_core_internal.h
+@@ -146,6 +146,7 @@ sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size);
+ void target_qf_do_work(struct work_struct *work);
+ bool target_check_wce(struct se_device *dev);
+ bool target_check_fua(struct se_device *dev);
++void __target_execute_cmd(struct se_cmd *, bool);
+
+ /* target_core_stat.c */
+ void target_stat_setup_dev_default_groups(struct se_device *);
+diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
+index a9057aa07176..04f616b3ba0a 100644
+--- a/drivers/target/target_core_sbc.c
++++ b/drivers/target/target_core_sbc.c
+@@ -602,7 +602,7 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
+ cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
+ spin_unlock_irq(&cmd->t_state_lock);
+
+- __target_execute_cmd(cmd);
++ __target_execute_cmd(cmd, false);
+
+ kfree(buf);
+ return ret;
+diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
+index 5ab3967dda43..42c2a44b83dd 100644
+--- a/drivers/target/target_core_transport.c
++++ b/drivers/target/target_core_transport.c
+@@ -1303,23 +1303,6 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb)
+
+ trace_target_sequencer_start(cmd);
+
+- /*
+- * Check for an existing UNIT ATTENTION condition
+- */
+- ret = target_scsi3_ua_check(cmd);
+- if (ret)
+- return ret;
+-
+- ret = target_alua_state_check(cmd);
+- if (ret)
+- return ret;
+-
+- ret = target_check_reservation(cmd);
+- if (ret) {
+- cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT;
+- return ret;
+- }
+-
+ ret = dev->transport->parse_cdb(cmd);
+ if (ret == TCM_UNSUPPORTED_SCSI_OPCODE)
+ pr_warn_ratelimited("%s/%s: Unsupported SCSI Opcode 0x%02x, sending CHECK_CONDITION.\n",
+@@ -1761,20 +1744,45 @@ queue_full:
+ }
+ EXPORT_SYMBOL(transport_generic_request_failure);
+
+-void __target_execute_cmd(struct se_cmd *cmd)
++void __target_execute_cmd(struct se_cmd *cmd, bool do_checks)
+ {
+ sense_reason_t ret;
+
+- if (cmd->execute_cmd) {
+- ret = cmd->execute_cmd(cmd);
+- if (ret) {
+- spin_lock_irq(&cmd->t_state_lock);
+- cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
+- spin_unlock_irq(&cmd->t_state_lock);
++ if (!cmd->execute_cmd) {
++ ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
++ goto err;
++ }
++ if (do_checks) {
++ /*
++ * Check for an existing UNIT ATTENTION condition after
++ * target_handle_task_attr() has done SAM task attr
++ * checking, and possibly have already defered execution
++ * out to target_restart_delayed_cmds() context.
++ */
++ ret = target_scsi3_ua_check(cmd);
++ if (ret)
++ goto err;
++
++ ret = target_alua_state_check(cmd);
++ if (ret)
++ goto err;
+
+- transport_generic_request_failure(cmd, ret);
++ ret = target_check_reservation(cmd);
++ if (ret) {
++ cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT;
++ goto err;
+ }
+ }
++
++ ret = cmd->execute_cmd(cmd);
++ if (!ret)
++ return;
++err:
++ spin_lock_irq(&cmd->t_state_lock);
++ cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
++ spin_unlock_irq(&cmd->t_state_lock);
++
++ transport_generic_request_failure(cmd, ret);
+ }
+
+ static int target_write_prot_action(struct se_cmd *cmd)
+@@ -1819,6 +1827,8 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
+ if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+ return false;
+
++ cmd->se_cmd_flags |= SCF_TASK_ATTR_SET;
++
+ /*
+ * Check for the existence of HEAD_OF_QUEUE, and if true return 1
+ * to allow the passed struct se_cmd list of tasks to the front of the list.
+@@ -1899,7 +1909,7 @@ void target_execute_cmd(struct se_cmd *cmd)
+ return;
+ }
+
+- __target_execute_cmd(cmd);
++ __target_execute_cmd(cmd, true);
+ }
+ EXPORT_SYMBOL(target_execute_cmd);
+
+@@ -1923,7 +1933,7 @@ static void target_restart_delayed_cmds(struct se_device *dev)
+ list_del(&cmd->se_delayed_node);
+ spin_unlock(&dev->delayed_cmd_lock);
+
+- __target_execute_cmd(cmd);
++ __target_execute_cmd(cmd, true);
+
+ if (cmd->sam_task_attr == TCM_ORDERED_TAG)
+ break;
+@@ -1941,6 +1951,9 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
+ if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+ return;
+
++ if (!(cmd->se_cmd_flags & SCF_TASK_ATTR_SET))
++ goto restart;
++
+ if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
+ atomic_dec_mb(&dev->simple_cmds);
+ dev->dev_cur_ordered_id++;
+@@ -1957,7 +1970,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
+ pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n",
+ dev->dev_cur_ordered_id);
+ }
+-
++restart:
+ target_restart_delayed_cmds(dev);
+ }
+
+@@ -2557,15 +2570,10 @@ static void target_release_cmd_kref(struct kref *kref)
+ bool fabric_stop;
+
+ spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
+- if (list_empty(&se_cmd->se_cmd_list)) {
+- spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+- target_free_cmd_mem(se_cmd);
+- se_cmd->se_tfo->release_cmd(se_cmd);
+- return;
+- }
+
+ spin_lock(&se_cmd->t_state_lock);
+- fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP);
++ fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) &&
++ (se_cmd->transport_state & CMD_T_ABORTED);
+ spin_unlock(&se_cmd->t_state_lock);
+
+ if (se_cmd->cmd_wait_set || fabric_stop) {
+diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
+index 954941dd8124..f9c798cba83f 100644
+--- a/drivers/tty/serial/atmel_serial.c
++++ b/drivers/tty/serial/atmel_serial.c
+@@ -482,19 +482,21 @@ static void atmel_start_tx(struct uart_port *port)
+ {
+ struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+
+- if (atmel_use_pdc_tx(port)) {
+- if (atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN)
+- /* The transmitter is already running. Yes, we
+- really need this.*/
+- return;
++ if (atmel_use_pdc_tx(port) && (atmel_uart_readl(port, ATMEL_PDC_PTSR)
++ & ATMEL_PDC_TXTEN))
++ /* The transmitter is already running. Yes, we
++ really need this.*/
++ return;
+
++ if (atmel_use_pdc_tx(port) || atmel_use_dma_tx(port))
+ if ((port->rs485.flags & SER_RS485_ENABLED) &&
+ !(port->rs485.flags & SER_RS485_RX_DURING_TX))
+ atmel_stop_rx(port);
+
++ if (atmel_use_pdc_tx(port))
+ /* re-enable PDC transmit */
+ atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTEN);
+- }
++
+ /* Enable interrupts */
+ atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask);
+ }
+diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
+index b7d80bd57db9..7d62610d9de5 100644
+--- a/drivers/tty/serial/msm_serial.c
++++ b/drivers/tty/serial/msm_serial.c
+@@ -726,7 +726,7 @@ static void msm_handle_tx(struct uart_port *port)
+ return;
+ }
+
+- pio_count = CIRC_CNT(xmit->head, xmit->tail, UART_XMIT_SIZE);
++ pio_count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
+ dma_count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
+
+ dma_min = 1; /* Always DMA */
+diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
+index 99bb23161dd6..f0bd2ec0db59 100644
+--- a/drivers/tty/serial/samsung.c
++++ b/drivers/tty/serial/samsung.c
+@@ -1684,7 +1684,7 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
+ return -ENODEV;
+
+ if (port->mapbase != 0)
+- return 0;
++ return -EINVAL;
+
+ /* setup info for port */
+ port->dev = &platdev->dev;
+@@ -1738,22 +1738,25 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
+ ourport->dma = devm_kzalloc(port->dev,
+ sizeof(*ourport->dma),
+ GFP_KERNEL);
+- if (!ourport->dma)
+- return -ENOMEM;
++ if (!ourport->dma) {
++ ret = -ENOMEM;
++ goto err;
++ }
+ }
+
+ ourport->clk = clk_get(&platdev->dev, "uart");
+ if (IS_ERR(ourport->clk)) {
+ pr_err("%s: Controller clock not found\n",
+ dev_name(&platdev->dev));
+- return PTR_ERR(ourport->clk);
++ ret = PTR_ERR(ourport->clk);
++ goto err;
+ }
+
+ ret = clk_prepare_enable(ourport->clk);
+ if (ret) {
+ pr_err("uart: clock failed to prepare+enable: %d\n", ret);
+ clk_put(ourport->clk);
+- return ret;
++ goto err;
+ }
+
+ /* Keep all interrupts masked and cleared */
+@@ -1769,7 +1772,12 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
+
+ /* reset the fifos (and setup the uart) */
+ s3c24xx_serial_resetport(port, cfg);
++
+ return 0;
++
++err:
++ port->mapbase = 0;
++ return ret;
+ }
+
+ /* Device driver serial port probe */
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index 944a6dca0fcb..d2e50a27140c 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -128,6 +128,9 @@ static const struct usb_device_id usb_quirk_list[] = {
+ { USB_DEVICE(0x04f3, 0x016f), .driver_info =
+ USB_QUIRK_DEVICE_QUALIFIER },
+
++ { USB_DEVICE(0x04f3, 0x0381), .driver_info =
++ USB_QUIRK_NO_LPM },
++
+ { USB_DEVICE(0x04f3, 0x21b8), .driver_info =
+ USB_QUIRK_DEVICE_QUALIFIER },
+
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 07248ff1be5c..716f4f051a0a 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -258,11 +258,13 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
+ * We will also set SUSPHY bit to what it was before returning as stated
+ * by the same section on Synopsys databook.
+ */
+- reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+- if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) {
+- susphy = true;
+- reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+- dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
++ if (dwc->gadget.speed <= USB_SPEED_HIGH) {
++ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
++ if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) {
++ susphy = true;
++ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
++ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
++ }
+ }
+
+ if (cmd == DWC3_DEPCMD_STARTTRANSFER) {
+@@ -2023,6 +2025,10 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
+ return 1;
+ }
+
++ if (usb_endpoint_xfer_isoc(dep->endpoint.desc))
++ if ((event->status & DEPEVT_STATUS_IOC) &&
++ (trb->ctrl & DWC3_TRB_CTRL_IOC))
++ return 0;
+ return 1;
+ }
+
+diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
+index 18569de06b04..bb1f6c8f0f01 100644
+--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
++++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
+@@ -1920,6 +1920,8 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
+
+ udc->errata = match->data;
+ udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
++ if (IS_ERR(udc->pmc))
++ udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9x5-pmc");
+ if (udc->errata && IS_ERR(udc->pmc))
+ return ERR_CAST(udc->pmc);
+
+diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
+index ebc51ec5790a..71751429814f 100644
+--- a/drivers/usb/gadget/udc/pch_udc.c
++++ b/drivers/usb/gadget/udc/pch_udc.c
+@@ -1477,11 +1477,11 @@ static void complete_req(struct pch_udc_ep *ep, struct pch_udc_request *req,
+ req->dma_mapped = 0;
+ }
+ ep->halted = 1;
+- spin_lock(&dev->lock);
++ spin_unlock(&dev->lock);
+ if (!ep->in)
+ pch_udc_ep_clear_rrdy(ep);
+ usb_gadget_giveback_request(&ep->ep, &req->req);
+- spin_unlock(&dev->lock);
++ spin_lock(&dev->lock);
+ ep->halted = halted;
+ }
+
+@@ -2573,9 +2573,9 @@ static void pch_udc_svc_ur_interrupt(struct pch_udc_dev *dev)
+ empty_req_queue(ep);
+ }
+ if (dev->driver) {
+- spin_lock(&dev->lock);
+- usb_gadget_udc_reset(&dev->gadget, dev->driver);
+ spin_unlock(&dev->lock);
++ usb_gadget_udc_reset(&dev->gadget, dev->driver);
++ spin_lock(&dev->lock);
+ }
+ }
+
+@@ -2654,9 +2654,9 @@ static void pch_udc_svc_intf_interrupt(struct pch_udc_dev *dev)
+ dev->ep[i].halted = 0;
+ }
+ dev->stall = 0;
+- spin_lock(&dev->lock);
+- dev->driver->setup(&dev->gadget, &dev->setup_data);
+ spin_unlock(&dev->lock);
++ dev->driver->setup(&dev->gadget, &dev->setup_data);
++ spin_lock(&dev->lock);
+ }
+
+ /**
+@@ -2691,9 +2691,9 @@ static void pch_udc_svc_cfg_interrupt(struct pch_udc_dev *dev)
+ dev->stall = 0;
+
+ /* call gadget zero with setup data received */
+- spin_lock(&dev->lock);
+- dev->driver->setup(&dev->gadget, &dev->setup_data);
+ spin_unlock(&dev->lock);
++ dev->driver->setup(&dev->gadget, &dev->setup_data);
++ spin_lock(&dev->lock);
+ }
+
+ /**
+diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
+index 7be4e7d57ace..280ed5ff021b 100644
+--- a/drivers/usb/renesas_usbhs/fifo.c
++++ b/drivers/usb/renesas_usbhs/fifo.c
+@@ -810,20 +810,27 @@ static void xfer_work(struct work_struct *work)
+ {
+ struct usbhs_pkt *pkt = container_of(work, struct usbhs_pkt, work);
+ struct usbhs_pipe *pipe = pkt->pipe;
+- struct usbhs_fifo *fifo = usbhs_pipe_to_fifo(pipe);
++ struct usbhs_fifo *fifo;
+ struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe);
+ struct dma_async_tx_descriptor *desc;
+- struct dma_chan *chan = usbhsf_dma_chan_get(fifo, pkt);
++ struct dma_chan *chan;
+ struct device *dev = usbhs_priv_to_dev(priv);
+ enum dma_transfer_direction dir;
++ unsigned long flags;
+
++ usbhs_lock(priv, flags);
++ fifo = usbhs_pipe_to_fifo(pipe);
++ if (!fifo)
++ goto xfer_work_end;
++
++ chan = usbhsf_dma_chan_get(fifo, pkt);
+ dir = usbhs_pipe_is_dir_in(pipe) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+
+ desc = dmaengine_prep_slave_single(chan, pkt->dma + pkt->actual,
+ pkt->trans, dir,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ if (!desc)
+- return;
++ goto xfer_work_end;
+
+ desc->callback = usbhsf_dma_complete;
+ desc->callback_param = pipe;
+@@ -831,7 +838,7 @@ static void xfer_work(struct work_struct *work)
+ pkt->cookie = dmaengine_submit(desc);
+ if (pkt->cookie < 0) {
+ dev_err(dev, "Failed to submit dma descriptor\n");
+- return;
++ goto xfer_work_end;
+ }
+
+ dev_dbg(dev, " %s %d (%d/ %d)\n",
+@@ -842,6 +849,9 @@ static void xfer_work(struct work_struct *work)
+ usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans);
+ dma_async_issue_pending(chan);
+ usbhs_pipe_enable(pipe);
++
++xfer_work_end:
++ usbhs_unlock(priv, flags);
+ }
+
+ /*
+diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
+index 30345c2d01be..50f3363cc382 100644
+--- a/drivers/usb/renesas_usbhs/mod_gadget.c
++++ b/drivers/usb/renesas_usbhs/mod_gadget.c
+@@ -585,6 +585,9 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
+ struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
+ struct usbhs_pipe *pipe;
+ int ret = -EIO;
++ unsigned long flags;
++
++ usbhs_lock(priv, flags);
+
+ /*
+ * if it already have pipe,
+@@ -593,7 +596,8 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
+ if (uep->pipe) {
+ usbhs_pipe_clear(uep->pipe);
+ usbhs_pipe_sequence_data0(uep->pipe);
+- return 0;
++ ret = 0;
++ goto usbhsg_ep_enable_end;
+ }
+
+ pipe = usbhs_pipe_malloc(priv,
+@@ -621,6 +625,9 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
+ ret = 0;
+ }
+
++usbhsg_ep_enable_end:
++ usbhs_unlock(priv, flags);
++
+ return ret;
+ }
+
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index d96d423d00e6..8e07536c233a 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -273,6 +273,7 @@ static void option_instat_callback(struct urb *urb);
+ #define TELIT_PRODUCT_LE922_USBCFG5 0x1045
+ #define TELIT_PRODUCT_LE920 0x1200
+ #define TELIT_PRODUCT_LE910 0x1201
++#define TELIT_PRODUCT_LE910_USBCFG4 0x1206
+
+ /* ZTE PRODUCTS */
+ #define ZTE_VENDOR_ID 0x19d2
+@@ -1198,6 +1199,8 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
+ .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
++ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
++ .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
+ .driver_info = (kernel_ulong_t)&telit_le920_blacklist },
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
+diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
+index 476c0e3a7150..f6ea8f4ba7cf 100644
+--- a/drivers/virtio/virtio_balloon.c
++++ b/drivers/virtio/virtio_balloon.c
+@@ -202,6 +202,8 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
+ num = min(num, ARRAY_SIZE(vb->pfns));
+
+ mutex_lock(&vb->balloon_lock);
++ /* We can't release more pages than taken */
++ num = min(num, (size_t)vb->num_pages);
+ for (vb->num_pfns = 0; vb->num_pfns < num;
+ vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
+ page = balloon_page_dequeue(vb_dev_info);
+diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c
+index a2eec97d5064..bb09de633939 100644
+--- a/drivers/w1/masters/omap_hdq.c
++++ b/drivers/w1/masters/omap_hdq.c
+@@ -390,8 +390,6 @@ static int hdq_read_byte(struct hdq_data *hdq_data, u8 *val)
+ goto out;
+ }
+
+- hdq_data->hdq_irqstatus = 0;
+-
+ if (!(hdq_data->hdq_irqstatus & OMAP_HDQ_INT_STATUS_RXCOMPLETE)) {
+ hdq_reg_merge(hdq_data, OMAP_HDQ_CTRL_STATUS,
+ OMAP_HDQ_CTRL_STATUS_DIR | OMAP_HDQ_CTRL_STATUS_GO,
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index 75533adef998..92fe3f8012db 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2696,12 +2696,6 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+ btrfs_bio->csum = NULL;
+ btrfs_bio->csum_allocated = NULL;
+ btrfs_bio->end_io = NULL;
+-
+-#ifdef CONFIG_BLK_CGROUP
+- /* FIXME, put this into bio_clone_bioset */
+- if (bio->bi_css)
+- bio_associate_blkcg(new, bio->bi_css);
+-#endif
+ }
+ return new;
+ }
+diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
+index 2234e88cf674..b56887b35889 100644
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1629,13 +1629,11 @@ again:
+ * managed to copy.
+ */
+ if (num_sectors > dirty_sectors) {
+- /*
+- * we round down because we don't want to count
+- * any partial blocks actually sent through the
+- * IO machines
+- */
+- release_bytes = round_down(release_bytes - copied,
+- root->sectorsize);
++
++ /* release everything except the sectors we dirtied */
++ release_bytes -= dirty_sectors <<
++ root->fs_info->sb->s_blocksize_bits;
++
+ if (copied > 0) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
+index 4ae75006e73b..3f7c2cd41f8f 100644
+--- a/fs/cachefiles/namei.c
++++ b/fs/cachefiles/namei.c
+@@ -263,6 +263,8 @@ requeue:
+ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
+ struct cachefiles_object *object)
+ {
++ blkcnt_t i_blocks = d_backing_inode(object->dentry)->i_blocks;
++
+ write_lock(&cache->active_lock);
+ rb_erase(&object->active_node, &cache->active_nodes);
+ clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
+@@ -273,8 +275,7 @@ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
+ /* This object can now be culled, so we need to let the daemon know
+ * that there is something it can remove if it needs to.
+ */
+- atomic_long_add(d_backing_inode(object->dentry)->i_blocks,
+- &cache->b_released);
++ atomic_long_add(i_blocks, &cache->b_released);
+ if (atomic_inc_return(&cache->f_released))
+ cachefiles_state_changed(cache);
+ }
+diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
+index 3182273a3407..1418daa03d95 100644
+--- a/fs/cifs/cifs_fs_sb.h
++++ b/fs/cifs/cifs_fs_sb.h
+@@ -46,6 +46,9 @@
+ #define CIFS_MOUNT_CIFS_BACKUPUID 0x200000 /* backup intent bit for a user */
+ #define CIFS_MOUNT_CIFS_BACKUPGID 0x400000 /* backup intent bit for a group */
+ #define CIFS_MOUNT_MAP_SFM_CHR 0x800000 /* SFM/MAC mapping for illegal chars */
++#define CIFS_MOUNT_USE_PREFIX_PATH 0x1000000 /* make subpath with unaccessible
++ * root mountable
++ */
+
+ struct cifs_sb_info {
+ struct rb_root tlink_tree;
+@@ -67,5 +70,6 @@ struct cifs_sb_info {
+ struct backing_dev_info bdi;
+ struct delayed_work prune_tlinks;
+ struct rcu_head rcu;
++ char *prepath;
+ };
+ #endif /* _CIFS_FS_SB_H */
+diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
+index 6aeb8d4616a4..8347c90cf483 100644
+--- a/fs/cifs/cifsencrypt.c
++++ b/fs/cifs/cifsencrypt.c
+@@ -743,24 +743,26 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
+
+ memcpy(ses->auth_key.response + baselen, tiblob, tilen);
+
++ mutex_lock(&ses->server->srv_mutex);
++
+ rc = crypto_hmacmd5_alloc(ses->server);
+ if (rc) {
+ cifs_dbg(VFS, "could not crypto alloc hmacmd5 rc %d\n", rc);
+- goto setup_ntlmv2_rsp_ret;
++ goto unlock;
+ }
+
+ /* calculate ntlmv2_hash */
+ rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "could not get v2 hash rc %d\n", rc);
+- goto setup_ntlmv2_rsp_ret;
++ goto unlock;
+ }
+
+ /* calculate first part of the client response (CR1) */
+ rc = CalcNTLMv2_response(ses, ntlmv2_hash);
+ if (rc) {
+ cifs_dbg(VFS, "Could not calculate CR1 rc: %d\n", rc);
+- goto setup_ntlmv2_rsp_ret;
++ goto unlock;
+ }
+
+ /* now calculate the session key for NTLMv2 */
+@@ -769,13 +771,13 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
+ __func__);
+- goto setup_ntlmv2_rsp_ret;
++ goto unlock;
+ }
+
+ rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
+- goto setup_ntlmv2_rsp_ret;
++ goto unlock;
+ }
+
+ rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+@@ -783,7 +785,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
+ CIFS_HMAC_MD5_HASH_SIZE);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
+- goto setup_ntlmv2_rsp_ret;
++ goto unlock;
+ }
+
+ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+@@ -791,6 +793,8 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
+ if (rc)
+ cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
+
++unlock:
++ mutex_unlock(&ses->server->srv_mutex);
+ setup_ntlmv2_rsp_ret:
+ kfree(tiblob);
+
+diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
+index 5d841f39c4b7..6bbec5e784cd 100644
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -689,6 +689,14 @@ cifs_do_mount(struct file_system_type *fs_type,
+ goto out_cifs_sb;
+ }
+
++ if (volume_info->prepath) {
++ cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL);
++ if (cifs_sb->prepath == NULL) {
++ root = ERR_PTR(-ENOMEM);
++ goto out_cifs_sb;
++ }
++ }
++
+ cifs_setup_cifs_sb(volume_info, cifs_sb);
+
+ rc = cifs_mount(cifs_sb, volume_info);
+@@ -727,7 +735,11 @@ cifs_do_mount(struct file_system_type *fs_type,
+ sb->s_flags |= MS_ACTIVE;
+ }
+
+- root = cifs_get_root(volume_info, sb);
++ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
++ root = dget(sb->s_root);
++ else
++ root = cifs_get_root(volume_info, sb);
++
+ if (IS_ERR(root))
+ goto out_super;
+
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index 7d2b15c06090..7ae03283bd61 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -1228,6 +1228,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
+ vol->ops = &smb1_operations;
+ vol->vals = &smb1_values;
+
++ vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
++
+ if (!mountdata)
+ goto cifs_parse_mount_err;
+
+@@ -2049,7 +2051,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
+ if (!match_security(server, vol))
+ return 0;
+
+- if (server->echo_interval != vol->echo_interval)
++ if (server->echo_interval != vol->echo_interval * HZ)
+ return 0;
+
+ return 1;
+@@ -3483,6 +3485,44 @@ cifs_get_volume_info(char *mount_data, const char *devname)
+ return volume_info;
+ }
+
++static int
++cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
++ unsigned int xid,
++ struct cifs_tcon *tcon,
++ struct cifs_sb_info *cifs_sb,
++ char *full_path)
++{
++ int rc;
++ char *s;
++ char sep, tmp;
++
++ sep = CIFS_DIR_SEP(cifs_sb);
++ s = full_path;
++
++ rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, "");
++ while (rc == 0) {
++ /* skip separators */
++ while (*s == sep)
++ s++;
++ if (!*s)
++ break;
++ /* next separator */
++ while (*s && *s != sep)
++ s++;
++
++ /*
++ * temporarily null-terminate the path at the end of
++ * the current component
++ */
++ tmp = *s;
++ *s = 0;
++ rc = server->ops->is_path_accessible(xid, tcon, cifs_sb,
++ full_path);
++ *s = tmp;
++ }
++ return rc;
++}
++
+ int
+ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
+ {
+@@ -3620,6 +3660,16 @@ remote_path_check:
+ kfree(full_path);
+ goto mount_fail_check;
+ }
++
++ rc = cifs_are_all_path_components_accessible(server,
++ xid, tcon, cifs_sb,
++ full_path);
++ if (rc != 0) {
++ cifs_dbg(VFS, "cannot query dirs between root and final path, "
++ "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
++ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
++ rc = 0;
++ }
+ kfree(full_path);
+ }
+
+@@ -3889,6 +3939,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
+
+ bdi_destroy(&cifs_sb->bdi);
+ kfree(cifs_sb->mountdata);
++ kfree(cifs_sb->prepath);
+ call_rcu(&cifs_sb->rcu, delayed_free);
+ }
+
+diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
+index fb0903fffc22..6f7333d552a3 100644
+--- a/fs/cifs/dir.c
++++ b/fs/cifs/dir.c
+@@ -84,6 +84,7 @@ build_path_from_dentry(struct dentry *direntry)
+ struct dentry *temp;
+ int namelen;
+ int dfsplen;
++ int pplen = 0;
+ char *full_path;
+ char dirsep;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+@@ -95,8 +96,12 @@ build_path_from_dentry(struct dentry *direntry)
+ dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
+ else
+ dfsplen = 0;
++
++ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
++ pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0;
++
+ cifs_bp_rename_retry:
+- namelen = dfsplen;
++ namelen = dfsplen + pplen;
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
+ for (temp = direntry; !IS_ROOT(temp);) {
+@@ -137,7 +142,7 @@ cifs_bp_rename_retry:
+ }
+ }
+ rcu_read_unlock();
+- if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
++ if (namelen != dfsplen + pplen || read_seqretry(&rename_lock, seq)) {
+ cifs_dbg(FYI, "did not end path lookup where expected. namelen=%ddfsplen=%d\n",
+ namelen, dfsplen);
+ /* presumably this is only possible if racing with a rename
+@@ -153,6 +158,17 @@ cifs_bp_rename_retry:
+ those safely to '/' if any are found in the middle of the prepath */
+ /* BB test paths to Windows with '/' in the midst of prepath */
+
++ if (pplen) {
++ int i;
++
++ cifs_dbg(FYI, "using cifs_sb prepath <%s>\n", cifs_sb->prepath);
++ memcpy(full_path+dfsplen+1, cifs_sb->prepath, pplen-1);
++ full_path[dfsplen] = '\\';
++ for (i = 0; i < pplen-1; i++)
++ if (full_path[dfsplen+1+i] == '/')
++ full_path[dfsplen+1+i] = CIFS_DIR_SEP(cifs_sb);
++ }
++
+ if (dfsplen) {
+ strncpy(full_path, tcon->treeName, dfsplen);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
+@@ -229,6 +245,13 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
+ goto cifs_create_get_file_info;
+ }
+
++ if (S_ISDIR(newinode->i_mode)) {
++ CIFSSMBClose(xid, tcon, fid->netfid);
++ iput(newinode);
++ rc = -EISDIR;
++ goto out;
++ }
++
+ if (!S_ISREG(newinode->i_mode)) {
+ /*
+ * The server may allow us to open things like
+@@ -399,10 +422,14 @@ cifs_create_set_dentry:
+ if (rc != 0) {
+ cifs_dbg(FYI, "Create worked, get_inode_info failed rc = %d\n",
+ rc);
+- if (server->ops->close)
+- server->ops->close(xid, tcon, fid);
+- goto out;
++ goto out_err;
+ }
++
++ if (S_ISDIR(newinode->i_mode)) {
++ rc = -EISDIR;
++ goto out_err;
++ }
++
+ d_drop(direntry);
+ d_add(direntry, newinode);
+
+@@ -410,6 +437,13 @@ out:
+ kfree(buf);
+ kfree(full_path);
+ return rc;
++
++out_err:
++ if (server->ops->close)
++ server->ops->close(xid, tcon, fid);
++ if (newinode)
++ iput(newinode);
++ goto out;
+ }
+
+ int
+diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
+index 514dadb0575d..b87efd0c92d6 100644
+--- a/fs/cifs/inode.c
++++ b/fs/cifs/inode.c
+@@ -1002,10 +1002,26 @@ struct inode *cifs_root_iget(struct super_block *sb)
+ struct inode *inode = NULL;
+ long rc;
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
++ char *path = NULL;
++ int len;
++
++ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
++ && cifs_sb->prepath) {
++ len = strlen(cifs_sb->prepath);
++ path = kzalloc(len + 2 /* leading sep + null */, GFP_KERNEL);
++ if (path == NULL)
++ return ERR_PTR(-ENOMEM);
++ path[0] = '/';
++ memcpy(path+1, cifs_sb->prepath, len);
++ } else {
++ path = kstrdup("", GFP_KERNEL);
++ if (path == NULL)
++ return ERR_PTR(-ENOMEM);
++ }
+
+ xid = get_xid();
+ if (tcon->unix_ext) {
+- rc = cifs_get_inode_info_unix(&inode, "", sb, xid);
++ rc = cifs_get_inode_info_unix(&inode, path, sb, xid);
+ /* some servers mistakenly claim POSIX support */
+ if (rc != -EOPNOTSUPP)
+ goto iget_no_retry;
+@@ -1013,7 +1029,8 @@ struct inode *cifs_root_iget(struct super_block *sb)
+ tcon->unix_ext = false;
+ }
+
+- rc = cifs_get_inode_info(&inode, "", NULL, sb, xid, NULL);
++ convert_delimiter(path, CIFS_DIR_SEP(cifs_sb));
++ rc = cifs_get_inode_info(&inode, path, NULL, sb, xid, NULL);
+
+ iget_no_retry:
+ if (!inode) {
+@@ -1042,6 +1059,7 @@ iget_no_retry:
+ }
+
+ out:
++ kfree(path);
+ /* can not call macro free_xid here since in a void func
+ * TODO: This is no longer true
+ */
+diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
+index 3525ed756173..505e6d6406fd 100644
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -1044,6 +1044,9 @@ smb2_new_lease_key(struct cifs_fid *fid)
+ get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
+ }
+
++#define SMB2_SYMLINK_STRUCT_SIZE \
++ (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
++
+ static int
+ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
+ const char *full_path, char **target_path,
+@@ -1056,7 +1059,10 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_fid fid;
+ struct smb2_err_rsp *err_buf = NULL;
+ struct smb2_symlink_err_rsp *symlink;
+- unsigned int sub_len, sub_offset;
++ unsigned int sub_len;
++ unsigned int sub_offset;
++ unsigned int print_len;
++ unsigned int print_offset;
+
+ cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+
+@@ -1077,11 +1083,33 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
+ kfree(utf16_path);
+ return -ENOENT;
+ }
++
++ if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) ||
++ get_rfc1002_length(err_buf) + 4 < SMB2_SYMLINK_STRUCT_SIZE) {
++ kfree(utf16_path);
++ return -ENOENT;
++ }
++
+ /* open must fail on symlink - reset rc */
+ rc = 0;
+ symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData;
+ sub_len = le16_to_cpu(symlink->SubstituteNameLength);
+ sub_offset = le16_to_cpu(symlink->SubstituteNameOffset);
++ print_len = le16_to_cpu(symlink->PrintNameLength);
++ print_offset = le16_to_cpu(symlink->PrintNameOffset);
++
++ if (get_rfc1002_length(err_buf) + 4 <
++ SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
++ kfree(utf16_path);
++ return -ENOENT;
++ }
++
++ if (get_rfc1002_length(err_buf) + 4 <
++ SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
++ kfree(utf16_path);
++ return -ENOENT;
++ }
++
+ *target_path = cifs_strndup_from_utf16(
+ (char *)symlink->PathBuffer + sub_offset,
+ sub_len, true, cifs_sb->local_nls);
+diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
+index 70078096117d..78313adb3c95 100644
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -124,7 +124,7 @@ static int journal_submit_commit_record(journal_t *journal,
+ struct commit_header *tmp;
+ struct buffer_head *bh;
+ int ret;
+- struct timespec now = current_kernel_time();
++ struct timespec64 now = current_kernel_time64();
+
+ *cbh = NULL;
+
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index ff416d0e24bc..7796beacdefb 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -427,6 +427,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
+ case -NFS4ERR_DELAY:
+ nfs_inc_server_stats(server, NFSIOS_DELAY);
+ case -NFS4ERR_GRACE:
++ case -NFS4ERR_LAYOUTTRYLATER:
+ case -NFS4ERR_RECALLCONFLICT:
+ exception->delay = 1;
+ return 0;
+@@ -7869,11 +7870,13 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
+ struct inode *inode = lgp->args.inode;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layout_hdr *lo;
+- int status = task->tk_status;
++ int nfs4err = task->tk_status;
++ int err, status = 0;
++ LIST_HEAD(head);
+
+ dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
+
+- switch (status) {
++ switch (nfs4err) {
+ case 0:
+ goto out;
+
+@@ -7905,45 +7908,43 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
+ status = -EOVERFLOW;
+ goto out;
+ }
+- /* Fallthrough */
++ status = -EBUSY;
++ break;
+ case -NFS4ERR_RECALLCONFLICT:
+- nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT,
+- exception);
+ status = -ERECALLCONFLICT;
+- goto out;
++ break;
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_BAD_STATEID:
+ exception->timeout = 0;
+ spin_lock(&inode->i_lock);
+- if (nfs4_stateid_match(&lgp->args.stateid,
++ lo = NFS_I(inode)->layout;
++ /* If the open stateid was bad, then recover it. */
++ if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
++ nfs4_stateid_match_other(&lgp->args.stateid,
+ &lgp->args.ctx->state->stateid)) {
+ spin_unlock(&inode->i_lock);
+- /* If the open stateid was bad, then recover it. */
+ exception->state = lgp->args.ctx->state;
+ break;
+ }
+- lo = NFS_I(inode)->layout;
+- if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
+- nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
+- LIST_HEAD(head);
+-
+- /*
+- * Mark the bad layout state as invalid, then retry
+- * with the current stateid.
+- */
+- set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+- pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
+- spin_unlock(&inode->i_lock);
+- pnfs_free_lseg_list(&head);
+- status = -EAGAIN;
+- goto out;
+- } else
+- spin_unlock(&inode->i_lock);
+- }
+
+- status = nfs4_handle_exception(server, status, exception);
+- if (exception->retry)
++ /*
++ * Mark the bad layout state as invalid, then retry
++ */
++ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
++ pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
++ spin_unlock(&inode->i_lock);
++ pnfs_free_lseg_list(&head);
+ status = -EAGAIN;
++ goto out;
++ }
++
++ err = nfs4_handle_exception(server, nfs4err, exception);
++ if (!status) {
++ if (exception->retry)
++ status = -EAGAIN;
++ else
++ status = err;
++ }
+ out:
+ dprintk("<-- %s\n", __func__);
+ return status;
+diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
+index 0fbe734cc38c..7d992362ff04 100644
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1505,7 +1505,7 @@ pnfs_update_layout(struct inode *ino,
+ struct pnfs_layout_segment *lseg = NULL;
+ nfs4_stateid stateid;
+ long timeout = 0;
+- unsigned long giveup = jiffies + rpc_get_timeout(server->client);
++ unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
+ bool first;
+
+ if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
+@@ -1645,33 +1645,44 @@ lookup_again:
+ lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags);
+ trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
+ PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
++ atomic_dec(&lo->plh_outstanding);
+ if (IS_ERR(lseg)) {
+ switch(PTR_ERR(lseg)) {
+- case -ERECALLCONFLICT:
++ case -EBUSY:
+ if (time_after(jiffies, giveup))
+ lseg = NULL;
+- /* Fallthrough */
+- case -EAGAIN:
+- pnfs_put_layout_hdr(lo);
+- if (first)
+- pnfs_clear_first_layoutget(lo);
+- if (lseg) {
+- trace_pnfs_update_layout(ino, pos, count,
+- iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
+- goto lookup_again;
++ break;
++ case -ERECALLCONFLICT:
++ /* Huh? We hold no layouts, how is there a recall? */
++ if (first) {
++ lseg = NULL;
++ break;
+ }
++ /* Destroy the existing layout and start over */
++ if (time_after(jiffies, giveup))
++ pnfs_destroy_layout(NFS_I(ino));
+ /* Fallthrough */
++ case -EAGAIN:
++ break;
+ default:
+ if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
+ pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
+ lseg = NULL;
+ }
++ goto out_put_layout_hdr;
++ }
++ if (lseg) {
++ if (first)
++ pnfs_clear_first_layoutget(lo);
++ trace_pnfs_update_layout(ino, pos, count,
++ iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
++ pnfs_put_layout_hdr(lo);
++ goto lookup_again;
+ }
+ } else {
+ pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
+ }
+
+- atomic_dec(&lo->plh_outstanding);
+ out_put_layout_hdr:
+ if (first)
+ pnfs_clear_first_layoutget(lo);
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index e1c74d3db64d..649fa5e26050 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -1289,6 +1289,9 @@ int nfs_updatepage(struct file *file, struct page *page,
+ dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
+ file, count, (long long)(page_file_offset(page) + offset));
+
++ if (!count)
++ goto out;
++
+ if (nfs_can_extend_write(file, page, inode)) {
+ count = max(count + offset, nfs_page_length(page));
+ offset = 0;
+@@ -1299,7 +1302,7 @@ int nfs_updatepage(struct file *file, struct page *page,
+ nfs_set_pageerror(page);
+ else
+ __set_page_dirty_nobuffers(page);
+-
++out:
+ dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
+ status, (long long)i_size_read(inode));
+ return status;
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 70d0b9b33031..806eda192d1c 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4906,6 +4906,32 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ return nfs_ok;
+ }
+
++static __be32
++nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
++{
++ struct nfs4_ol_stateid *stp = openlockstateid(s);
++ __be32 ret;
++
++ mutex_lock(&stp->st_mutex);
++
++ ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
++ if (ret)
++ goto out;
++
++ ret = nfserr_locks_held;
++ if (check_for_locks(stp->st_stid.sc_file,
++ lockowner(stp->st_stateowner)))
++ goto out;
++
++ release_lock_stateid(stp);
++ ret = nfs_ok;
++
++out:
++ mutex_unlock(&stp->st_mutex);
++ nfs4_put_stid(s);
++ return ret;
++}
++
+ __be32
+ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_free_stateid *free_stateid)
+@@ -4913,7 +4939,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ stateid_t *stateid = &free_stateid->fr_stateid;
+ struct nfs4_stid *s;
+ struct nfs4_delegation *dp;
+- struct nfs4_ol_stateid *stp;
+ struct nfs4_client *cl = cstate->session->se_client;
+ __be32 ret = nfserr_bad_stateid;
+
+@@ -4932,18 +4957,9 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ ret = nfserr_locks_held;
+ break;
+ case NFS4_LOCK_STID:
+- ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+- if (ret)
+- break;
+- stp = openlockstateid(s);
+- ret = nfserr_locks_held;
+- if (check_for_locks(stp->st_stid.sc_file,
+- lockowner(stp->st_stateowner)))
+- break;
+- WARN_ON(!unhash_lock_stateid(stp));
++ atomic_inc(&s->sc_count);
+ spin_unlock(&cl->cl_lock);
+- nfs4_put_stid(s);
+- ret = nfs_ok;
++ ret = nfsd4_free_lock_stateid(stateid, s);
+ goto out;
+ case NFS4_REVOKED_DELEG_STID:
+ dp = delegstateid(s);
+@@ -5510,7 +5526,7 @@ static __be32
+ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
+ struct nfs4_ol_stateid *ost,
+ struct nfsd4_lock *lock,
+- struct nfs4_ol_stateid **lst, bool *new)
++ struct nfs4_ol_stateid **plst, bool *new)
+ {
+ __be32 status;
+ struct nfs4_file *fi = ost->st_stid.sc_file;
+@@ -5518,7 +5534,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
+ struct nfs4_client *cl = oo->oo_owner.so_client;
+ struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
+ struct nfs4_lockowner *lo;
++ struct nfs4_ol_stateid *lst;
+ unsigned int strhashval;
++ bool hashed;
+
+ lo = find_lockowner_str(cl, &lock->lk_new_owner);
+ if (!lo) {
+@@ -5534,12 +5552,27 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
+ goto out;
+ }
+
+- *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
+- if (*lst == NULL) {
++retry:
++ lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
++ if (lst == NULL) {
+ status = nfserr_jukebox;
+ goto out;
+ }
++
++ mutex_lock(&lst->st_mutex);
++
++ /* See if it's still hashed to avoid race with FREE_STATEID */
++ spin_lock(&cl->cl_lock);
++ hashed = !list_empty(&lst->st_perfile);
++ spin_unlock(&cl->cl_lock);
++
++ if (!hashed) {
++ mutex_unlock(&lst->st_mutex);
++ nfs4_put_stid(&lst->st_stid);
++ goto retry;
++ }
+ status = nfs_ok;
++ *plst = lst;
+ out:
+ nfs4_put_stateowner(&lo->lo_owner);
+ return status;
+@@ -5606,8 +5639,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ goto out;
+ status = lookup_or_create_lock_state(cstate, open_stp, lock,
+ &lock_stp, &new);
+- if (status == nfs_ok)
+- mutex_lock(&lock_stp->st_mutex);
+ } else {
+ status = nfs4_preprocess_seqid_op(cstate,
+ lock->lk_old_lock_seqid,
+diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
+index 9a7693d5f8ff..6db75cbb668f 100644
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -404,7 +404,8 @@ static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
+ static bool ovl_dentry_remote(struct dentry *dentry)
+ {
+ return dentry->d_flags &
+- (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
++ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE |
++ DCACHE_OP_REAL);
+ }
+
+ static bool ovl_dentry_weird(struct dentry *dentry)
+diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
+index 4c463b99fe57..a36a5a418f7d 100644
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -87,6 +87,12 @@ xfs_find_bdev_for_inode(
+ * We're now finished for good with this page. Update the page state via the
+ * associated buffer_heads, paying attention to the start and end offsets that
+ * we need to process on the page.
++ *
++ * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
++ * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
++ * the page at all, as we may be racing with memory reclaim and it can free both
++ * the bufferhead chain and the page as it will see the page as clean and
++ * unused.
+ */
+ static void
+ xfs_finish_page_writeback(
+@@ -95,8 +101,9 @@ xfs_finish_page_writeback(
+ int error)
+ {
+ unsigned int end = bvec->bv_offset + bvec->bv_len - 1;
+- struct buffer_head *head, *bh;
++ struct buffer_head *head, *bh, *next;
+ unsigned int off = 0;
++ unsigned int bsize;
+
+ ASSERT(bvec->bv_offset < PAGE_SIZE);
+ ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
+@@ -105,15 +112,17 @@ xfs_finish_page_writeback(
+
+ bh = head = page_buffers(bvec->bv_page);
+
++ bsize = bh->b_size;
+ do {
++ next = bh->b_this_page;
+ if (off < bvec->bv_offset)
+ goto next_bh;
+ if (off > end)
+ break;
+ bh->b_end_io(bh, !error);
+ next_bh:
+- off += bh->b_size;
+- } while ((bh = bh->b_this_page) != head);
++ off += bsize;
++ } while ((bh = next) != head);
+ }
+
+ /*
+diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
+index 3f103076d0bf..c357f27d5483 100644
+--- a/include/linux/backing-dev-defs.h
++++ b/include/linux/backing-dev-defs.h
+@@ -163,6 +163,7 @@ struct backing_dev_info {
+ wait_queue_head_t wb_waitq;
+
+ struct device *dev;
++ struct device *owner;
+
+ struct timer_list laptop_mode_wb_timer;
+
+diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
+index c82794f20110..89d3de3e096b 100644
+--- a/include/linux/backing-dev.h
++++ b/include/linux/backing-dev.h
+@@ -24,6 +24,7 @@ __printf(3, 4)
+ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
+ const char *fmt, ...);
+ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
++int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
+ void bdi_unregister(struct backing_dev_info *bdi);
+
+ int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
+diff --git a/include/linux/bio.h b/include/linux/bio.h
+index 9faebf7f9a33..75fadd28eec8 100644
+--- a/include/linux/bio.h
++++ b/include/linux/bio.h
+@@ -527,11 +527,14 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
+ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
+ int bio_associate_current(struct bio *bio);
+ void bio_disassociate_task(struct bio *bio);
++void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
+ #else /* CONFIG_BLK_CGROUP */
+ static inline int bio_associate_blkcg(struct bio *bio,
+ struct cgroup_subsys_state *blkcg_css) { return 0; }
+ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
+ static inline void bio_disassociate_task(struct bio *bio) { }
++static inline void bio_clone_blkcg_association(struct bio *dst,
++ struct bio *src) { }
+ #endif /* CONFIG_BLK_CGROUP */
+
+ #ifdef CONFIG_HIGHMEM
+diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
+index ab310819ac36..7879bf411891 100644
+--- a/include/linux/mlx5/qp.h
++++ b/include/linux/mlx5/qp.h
+@@ -556,9 +556,9 @@ struct mlx5_destroy_qp_mbox_out {
+ struct mlx5_modify_qp_mbox_in {
+ struct mlx5_inbox_hdr hdr;
+ __be32 qpn;
+- u8 rsvd1[4];
+- __be32 optparam;
+ u8 rsvd0[4];
++ __be32 optparam;
++ u8 rsvd1[4];
+ struct mlx5_qp_context ctx;
+ u8 rsvd2[16];
+ };
+diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
+index 7e440d41487a..e694f02d42e3 100644
+--- a/include/rdma/ib_verbs.h
++++ b/include/rdma/ib_verbs.h
+@@ -1428,6 +1428,10 @@ struct ib_srq {
+ } ext;
+ };
+
++/*
++ * @max_write_sge: Maximum SGE elements per RDMA WRITE request.
++ * @max_read_sge: Maximum SGE elements per RDMA READ request.
++ */
+ struct ib_qp {
+ struct ib_device *device;
+ struct ib_pd *pd;
+@@ -1449,6 +1453,8 @@ struct ib_qp {
+ void (*event_handler)(struct ib_event *, void *);
+ void *qp_context;
+ u32 qp_num;
++ u32 max_write_sge;
++ u32 max_read_sge;
+ enum ib_qp_type qp_type;
+ };
+
+diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
+index d8ab5101fad5..f6f3bc52c1ac 100644
+--- a/include/target/target_core_backend.h
++++ b/include/target/target_core_backend.h
+@@ -95,6 +95,6 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
+ bool target_sense_desc_format(struct se_device *dev);
+ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
+ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+- struct request_queue *q, int block_size);
++ struct request_queue *q);
+
+ #endif /* TARGET_CORE_BACKEND_H */
+diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
+index b316b44d03f3..fb8e3b6febdf 100644
+--- a/include/target/target_core_base.h
++++ b/include/target/target_core_base.h
+@@ -142,6 +142,7 @@ enum se_cmd_flags_table {
+ SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000,
+ SCF_ACK_KREF = 0x00400000,
+ SCF_USE_CPUID = 0x00800000,
++ SCF_TASK_ATTR_SET = 0x01000000,
+ };
+
+ /*
+diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
+index de44462a7680..5cd6faa6e0d1 100644
+--- a/include/target/target_core_fabric.h
++++ b/include/target/target_core_fabric.h
+@@ -163,7 +163,6 @@ int core_tmr_alloc_req(struct se_cmd *, void *, u8, gfp_t);
+ void core_tmr_release_req(struct se_tmr_req *);
+ int transport_generic_handle_tmr(struct se_cmd *);
+ void transport_generic_request_failure(struct se_cmd *, sense_reason_t);
+-void __target_execute_cmd(struct se_cmd *);
+ int transport_lookup_tmr_lun(struct se_cmd *, u64);
+ void core_allocate_nexus_loss_ua(struct se_node_acl *acl);
+
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index 003dca933803..5664ca07c9c7 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -529,20 +529,27 @@ TRACE_EVENT(svc_xprt_do_enqueue,
+
+ TP_STRUCT__entry(
+ __field(struct svc_xprt *, xprt)
+- __field_struct(struct sockaddr_storage, ss)
+ __field(int, pid)
+ __field(unsigned long, flags)
++ __dynamic_array(unsigned char, addr, xprt != NULL ?
++ xprt->xpt_remotelen : 0)
+ ),
+
+ TP_fast_assign(
+ __entry->xprt = xprt;
+- xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
+ __entry->pid = rqst? rqst->rq_task->pid : 0;
+- __entry->flags = xprt ? xprt->xpt_flags : 0;
++ if (xprt) {
++ memcpy(__get_dynamic_array(addr),
++ &xprt->xpt_remote,
++ xprt->xpt_remotelen);
++ __entry->flags = xprt->xpt_flags;
++ } else
++ __entry->flags = 0;
+ ),
+
+ TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
+- (struct sockaddr *)&__entry->ss,
++ __get_dynamic_array_len(addr) != 0 ?
++ (struct sockaddr *)__get_dynamic_array(addr) : NULL,
+ __entry->pid, show_svc_xprt_flags(__entry->flags))
+ );
+
+@@ -553,18 +560,25 @@ TRACE_EVENT(svc_xprt_dequeue,
+
+ TP_STRUCT__entry(
+ __field(struct svc_xprt *, xprt)
+- __field_struct(struct sockaddr_storage, ss)
+ __field(unsigned long, flags)
++ __dynamic_array(unsigned char, addr, xprt != NULL ?
++ xprt->xpt_remotelen : 0)
+ ),
+
+ TP_fast_assign(
+- __entry->xprt = xprt,
+- xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
+- __entry->flags = xprt ? xprt->xpt_flags : 0;
++ __entry->xprt = xprt;
++ if (xprt) {
++ memcpy(__get_dynamic_array(addr),
++ &xprt->xpt_remote,
++ xprt->xpt_remotelen);
++ __entry->flags = xprt->xpt_flags;
++ } else
++ __entry->flags = 0;
+ ),
+
+ TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt,
+- (struct sockaddr *)&__entry->ss,
++ __get_dynamic_array_len(addr) != 0 ?
++ (struct sockaddr *)__get_dynamic_array(addr) : NULL,
+ show_svc_xprt_flags(__entry->flags))
+ );
+
+@@ -592,19 +606,26 @@ TRACE_EVENT(svc_handle_xprt,
+ TP_STRUCT__entry(
+ __field(struct svc_xprt *, xprt)
+ __field(int, len)
+- __field_struct(struct sockaddr_storage, ss)
+ __field(unsigned long, flags)
++ __dynamic_array(unsigned char, addr, xprt != NULL ?
++ xprt->xpt_remotelen : 0)
+ ),
+
+ TP_fast_assign(
+ __entry->xprt = xprt;
+- xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
+ __entry->len = len;
+- __entry->flags = xprt ? xprt->xpt_flags : 0;
++ if (xprt) {
++ memcpy(__get_dynamic_array(addr),
++ &xprt->xpt_remote,
++ xprt->xpt_remotelen);
++ __entry->flags = xprt->xpt_flags;
++ } else
++ __entry->flags = 0;
+ ),
+
+ TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
+- (struct sockaddr *)&__entry->ss,
++ __get_dynamic_array_len(addr) != 0 ?
++ (struct sockaddr *)__get_dynamic_array(addr) : NULL,
+ __entry->len, show_svc_xprt_flags(__entry->flags))
+ );
+ #endif /* _TRACE_SUNRPC_H */
+diff --git a/kernel/auditsc.c b/kernel/auditsc.c
+index 2672d105cffc..b3341284f1c6 100644
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -72,6 +72,7 @@
+ #include <linux/compat.h>
+ #include <linux/ctype.h>
+ #include <linux/string.h>
++#include <linux/uaccess.h>
+ #include <uapi/linux/limits.h>
+
+ #include "audit.h"
+@@ -81,7 +82,8 @@
+ #define AUDITSC_SUCCESS 1
+ #define AUDITSC_FAILURE 2
+
+-/* no execve audit message should be longer than this (userspace limits) */
++/* no execve audit message should be longer than this (userspace limits),
++ * see the note near the top of audit_log_execve_info() about this value */
+ #define MAX_EXECVE_AUDIT_LEN 7500
+
+ /* max length to print of cmdline/proctitle value during audit */
+@@ -987,184 +989,178 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid,
+ return rc;
+ }
+
+-/*
+- * to_send and len_sent accounting are very loose estimates. We aren't
+- * really worried about a hard cap to MAX_EXECVE_AUDIT_LEN so much as being
+- * within about 500 bytes (next page boundary)
+- *
+- * why snprintf? an int is up to 12 digits long. if we just assumed when
+- * logging that a[%d]= was going to be 16 characters long we would be wasting
+- * space in every audit message. In one 7500 byte message we can log up to
+- * about 1000 min size arguments. That comes down to about 50% waste of space
+- * if we didn't do the snprintf to find out how long arg_num_len was.
+- */
+-static int audit_log_single_execve_arg(struct audit_context *context,
+- struct audit_buffer **ab,
+- int arg_num,
+- size_t *len_sent,
+- const char __user *p,
+- char *buf)
++static void audit_log_execve_info(struct audit_context *context,
++ struct audit_buffer **ab)
+ {
+- char arg_num_len_buf[12];
+- const char __user *tmp_p = p;
+- /* how many digits are in arg_num? 5 is the length of ' a=""' */
+- size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 5;
+- size_t len, len_left, to_send;
+- size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN;
+- unsigned int i, has_cntl = 0, too_long = 0;
+- int ret;
+-
+- /* strnlen_user includes the null we don't want to send */
+- len_left = len = strnlen_user(p, MAX_ARG_STRLEN) - 1;
+-
+- /*
+- * We just created this mm, if we can't find the strings
+- * we just copied into it something is _very_ wrong. Similar
+- * for strings that are too long, we should not have created
+- * any.
+- */
+- if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) {
+- send_sig(SIGKILL, current, 0);
+- return -1;
++ long len_max;
++ long len_rem;
++ long len_full;
++ long len_buf;
++ long len_abuf;
++ long len_tmp;
++ bool require_data;
++ bool encode;
++ unsigned int iter;
++ unsigned int arg;
++ char *buf_head;
++ char *buf;
++ const char __user *p = (const char __user *)current->mm->arg_start;
++
++ /* NOTE: this buffer needs to be large enough to hold all the non-arg
++ * data we put in the audit record for this argument (see the
++ * code below) ... at this point in time 96 is plenty */
++ char abuf[96];
++
++ /* NOTE: we set MAX_EXECVE_AUDIT_LEN to a rather arbitrary limit, the
++ * current value of 7500 is not as important as the fact that it
++ * is less than 8k, a setting of 7500 gives us plenty of wiggle
++ * room if we go over a little bit in the logging below */
++ WARN_ON_ONCE(MAX_EXECVE_AUDIT_LEN > 7500);
++ len_max = MAX_EXECVE_AUDIT_LEN;
++
++ /* scratch buffer to hold the userspace args */
++ buf_head = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL);
++ if (!buf_head) {
++ audit_panic("out of memory for argv string");
++ return;
+ }
++ buf = buf_head;
+
+- /* walk the whole argument looking for non-ascii chars */
++ audit_log_format(*ab, "argc=%d", context->execve.argc);
++
++ len_rem = len_max;
++ len_buf = 0;
++ len_full = 0;
++ require_data = true;
++ encode = false;
++ iter = 0;
++ arg = 0;
+ do {
+- if (len_left > MAX_EXECVE_AUDIT_LEN)
+- to_send = MAX_EXECVE_AUDIT_LEN;
+- else
+- to_send = len_left;
+- ret = copy_from_user(buf, tmp_p, to_send);
+- /*
+- * There is no reason for this copy to be short. We just
+- * copied them here, and the mm hasn't been exposed to user-
+- * space yet.
+- */
+- if (ret) {
+- WARN_ON(1);
+- send_sig(SIGKILL, current, 0);
+- return -1;
+- }
+- buf[to_send] = '\0';
+- has_cntl = audit_string_contains_control(buf, to_send);
+- if (has_cntl) {
+- /*
+- * hex messages get logged as 2 bytes, so we can only
+- * send half as much in each message
+- */
+- max_execve_audit_len = MAX_EXECVE_AUDIT_LEN / 2;
+- break;
+- }
+- len_left -= to_send;
+- tmp_p += to_send;
+- } while (len_left > 0);
+-
+- len_left = len;
+-
+- if (len > max_execve_audit_len)
+- too_long = 1;
+-
+- /* rewalk the argument actually logging the message */
+- for (i = 0; len_left > 0; i++) {
+- int room_left;
+-
+- if (len_left > max_execve_audit_len)
+- to_send = max_execve_audit_len;
+- else
+- to_send = len_left;
+-
+- /* do we have space left to send this argument in this ab? */
+- room_left = MAX_EXECVE_AUDIT_LEN - arg_num_len - *len_sent;
+- if (has_cntl)
+- room_left -= (to_send * 2);
+- else
+- room_left -= to_send;
+- if (room_left < 0) {
+- *len_sent = 0;
+- audit_log_end(*ab);
+- *ab = audit_log_start(context, GFP_KERNEL, AUDIT_EXECVE);
+- if (!*ab)
+- return 0;
+- }
++ /* NOTE: we don't ever want to trust this value for anything
++ * serious, but the audit record format insists we
++ * provide an argument length for really long arguments,
++ * e.g. > MAX_EXECVE_AUDIT_LEN, so we have no choice but
++ * to use strncpy_from_user() to obtain this value for
++ * recording in the log, although we don't use it
++ * anywhere here to avoid a double-fetch problem */
++ if (len_full == 0)
++ len_full = strnlen_user(p, MAX_ARG_STRLEN) - 1;
++
++ /* read more data from userspace */
++ if (require_data) {
++ /* can we make more room in the buffer? */
++ if (buf != buf_head) {
++ memmove(buf_head, buf, len_buf);
++ buf = buf_head;
++ }
++
++ /* fetch as much as we can of the argument */
++ len_tmp = strncpy_from_user(&buf_head[len_buf], p,
++ len_max - len_buf);
++ if (len_tmp == -EFAULT) {
++ /* unable to copy from userspace */
++ send_sig(SIGKILL, current, 0);
++ goto out;
++ } else if (len_tmp == (len_max - len_buf)) {
++ /* buffer is not large enough */
++ require_data = true;
++ /* NOTE: if we are going to span multiple
++ * buffers force the encoding so we stand
++ * a chance at a sane len_full value and
++ * consistent record encoding */
++ encode = true;
++ len_full = len_full * 2;
++ p += len_tmp;
++ } else {
++ require_data = false;
++ if (!encode)
++ encode = audit_string_contains_control(
++ buf, len_tmp);
++ /* try to use a trusted value for len_full */
++ if (len_full < len_max)
++ len_full = (encode ?
++ len_tmp * 2 : len_tmp);
++ p += len_tmp + 1;
++ }
++ len_buf += len_tmp;
++ buf_head[len_buf] = '\0';
+
+- /*
+- * first record needs to say how long the original string was
+- * so we can be sure nothing was lost.
+- */
+- if ((i == 0) && (too_long))
+- audit_log_format(*ab, " a%d_len=%zu", arg_num,
+- has_cntl ? 2*len : len);
+-
+- /*
+- * normally arguments are small enough to fit and we already
+- * filled buf above when we checked for control characters
+- * so don't bother with another copy_from_user
+- */
+- if (len >= max_execve_audit_len)
+- ret = copy_from_user(buf, p, to_send);
+- else
+- ret = 0;
+- if (ret) {
+- WARN_ON(1);
+- send_sig(SIGKILL, current, 0);
+- return -1;
++ /* length of the buffer in the audit record? */
++ len_abuf = (encode ? len_buf * 2 : len_buf + 2);
+ }
+- buf[to_send] = '\0';
+-
+- /* actually log it */
+- audit_log_format(*ab, " a%d", arg_num);
+- if (too_long)
+- audit_log_format(*ab, "[%d]", i);
+- audit_log_format(*ab, "=");
+- if (has_cntl)
+- audit_log_n_hex(*ab, buf, to_send);
+- else
+- audit_log_string(*ab, buf);
+-
+- p += to_send;
+- len_left -= to_send;
+- *len_sent += arg_num_len;
+- if (has_cntl)
+- *len_sent += to_send * 2;
+- else
+- *len_sent += to_send;
+- }
+- /* include the null we didn't log */
+- return len + 1;
+-}
+
+-static void audit_log_execve_info(struct audit_context *context,
+- struct audit_buffer **ab)
+-{
+- int i, len;
+- size_t len_sent = 0;
+- const char __user *p;
+- char *buf;
++ /* write as much as we can to the audit log */
++ if (len_buf > 0) {
++ /* NOTE: some magic numbers here - basically if we
++ * can't fit a reasonable amount of data into the
++ * existing audit buffer, flush it and start with
++ * a new buffer */
++ if ((sizeof(abuf) + 8) > len_rem) {
++ len_rem = len_max;
++ audit_log_end(*ab);
++ *ab = audit_log_start(context,
++ GFP_KERNEL, AUDIT_EXECVE);
++ if (!*ab)
++ goto out;
++ }
+
+- p = (const char __user *)current->mm->arg_start;
++ /* create the non-arg portion of the arg record */
++ len_tmp = 0;
++ if (require_data || (iter > 0) ||
++ ((len_abuf + sizeof(abuf)) > len_rem)) {
++ if (iter == 0) {
++ len_tmp += snprintf(&abuf[len_tmp],
++ sizeof(abuf) - len_tmp,
++ " a%d_len=%lu",
++ arg, len_full);
++ }
++ len_tmp += snprintf(&abuf[len_tmp],
++ sizeof(abuf) - len_tmp,
++ " a%d[%d]=", arg, iter++);
++ } else
++ len_tmp += snprintf(&abuf[len_tmp],
++ sizeof(abuf) - len_tmp,
++ " a%d=", arg);
++ WARN_ON(len_tmp >= sizeof(abuf));
++ abuf[sizeof(abuf) - 1] = '\0';
++
++ /* log the arg in the audit record */
++ audit_log_format(*ab, "%s", abuf);
++ len_rem -= len_tmp;
++ len_tmp = len_buf;
++ if (encode) {
++ if (len_abuf > len_rem)
++ len_tmp = len_rem / 2; /* encoding */
++ audit_log_n_hex(*ab, buf, len_tmp);
++ len_rem -= len_tmp * 2;
++ len_abuf -= len_tmp * 2;
++ } else {
++ if (len_abuf > len_rem)
++ len_tmp = len_rem - 2; /* quotes */
++ audit_log_n_string(*ab, buf, len_tmp);
++ len_rem -= len_tmp + 2;
++ /* don't subtract the "2" because we still need
++ * to add quotes to the remaining string */
++ len_abuf -= len_tmp;
++ }
++ len_buf -= len_tmp;
++ buf += len_tmp;
++ }
+
+- audit_log_format(*ab, "argc=%d", context->execve.argc);
++ /* ready to move to the next argument? */
++ if ((len_buf == 0) && !require_data) {
++ arg++;
++ iter = 0;
++ len_full = 0;
++ require_data = true;
++ encode = false;
++ }
++ } while (arg < context->execve.argc);
+
+- /*
+- * we need some kernel buffer to hold the userspace args. Just
+- * allocate one big one rather than allocating one of the right size
+- * for every single argument inside audit_log_single_execve_arg()
+- * should be <8k allocation so should be pretty safe.
+- */
+- buf = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL);
+- if (!buf) {
+- audit_panic("out of memory for argv string");
+- return;
+- }
++ /* NOTE: the caller handles the final audit_log_end() call */
+
+- for (i = 0; i < context->execve.argc; i++) {
+- len = audit_log_single_execve_arg(context, ab, i,
+- &len_sent, p, buf);
+- if (len <= 0)
+- break;
+- p += len;
+- }
+- kfree(buf);
++out:
++ kfree(buf_head);
+ }
+
+ static void show_special(struct audit_context *context, int *call_panic)
+diff --git a/kernel/cgroup.c b/kernel/cgroup.c
+index 75c0ff00aca6..e0be49fc382f 100644
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -2215,12 +2215,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
+ goto out_unlock;
+ }
+
+- /*
+- * We know this subsystem has not yet been bound. Users in a non-init
+- * user namespace may only mount hierarchies with no bound subsystems,
+- * i.e. 'none,name=user1'
+- */
+- if (!opts.none && !capable(CAP_SYS_ADMIN)) {
++ /* Hierarchies may only be created in the initial cgroup namespace. */
++ if (ns != &init_cgroup_ns) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+@@ -2962,6 +2958,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+ int retval = 0;
+
+ mutex_lock(&cgroup_mutex);
++ percpu_down_write(&cgroup_threadgroup_rwsem);
+ for_each_root(root) {
+ struct cgroup *from_cgrp;
+
+@@ -2976,6 +2973,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+ if (retval)
+ break;
+ }
++ percpu_up_write(&cgroup_threadgroup_rwsem);
+ mutex_unlock(&cgroup_mutex);
+
+ return retval;
+@@ -4343,6 +4341,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
+
+ mutex_lock(&cgroup_mutex);
+
++ percpu_down_write(&cgroup_threadgroup_rwsem);
++
+ /* all tasks in @from are being moved, all csets are source */
+ spin_lock_irq(&css_set_lock);
+ list_for_each_entry(link, &from->cset_links, cset_link)
+@@ -4371,6 +4371,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
+ } while (task && !ret);
+ out_err:
+ cgroup_migrate_finish(&preloaded_csets);
++ percpu_up_write(&cgroup_threadgroup_rwsem);
+ mutex_unlock(&cgroup_mutex);
+ return ret;
+ }
+@@ -6309,14 +6310,11 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+ if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+- mutex_lock(&cgroup_mutex);
++ /* It is not safe to take cgroup_mutex here */
+ spin_lock_irq(&css_set_lock);
+-
+ cset = task_css_set(current);
+ get_css_set(cset);
+-
+ spin_unlock_irq(&css_set_lock);
+- mutex_unlock(&cgroup_mutex);
+
+ new_ns = alloc_cgroup_ns();
+ if (IS_ERR(new_ns)) {
+diff --git a/kernel/module.c b/kernel/module.c
+index 5f71aa63ed2a..6458a2f17d58 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -2687,13 +2687,18 @@ static inline void kmemleak_load_module(const struct module *mod,
+ #endif
+
+ #ifdef CONFIG_MODULE_SIG
+-static int module_sig_check(struct load_info *info)
++static int module_sig_check(struct load_info *info, int flags)
+ {
+ int err = -ENOKEY;
+ const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+ const void *mod = info->hdr;
+
+- if (info->len > markerlen &&
++ /*
++ * Require flags == 0, as a module with version information
++ * removed is no longer the module that was signed
++ */
++ if (flags == 0 &&
++ info->len > markerlen &&
+ memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
+ /* We truncate the module to discard the signature */
+ info->len -= markerlen;
+@@ -2712,7 +2717,7 @@ static int module_sig_check(struct load_info *info)
+ return err;
+ }
+ #else /* !CONFIG_MODULE_SIG */
+-static int module_sig_check(struct load_info *info)
++static int module_sig_check(struct load_info *info, int flags)
+ {
+ return 0;
+ }
+@@ -3498,7 +3503,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
+ long err;
+ char *after_dashes;
+
+- err = module_sig_check(info);
++ err = module_sig_check(info, flags);
+ if (err)
+ goto free_copy;
+
+diff --git a/mm/backing-dev.c b/mm/backing-dev.c
+index ed173b8ae8f2..9269911d10dd 100644
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -825,6 +825,20 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
+ }
+ EXPORT_SYMBOL(bdi_register_dev);
+
++int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
++{
++ int rc;
++
++ rc = bdi_register(bdi, NULL, "%u:%u", MAJOR(owner->devt),
++ MINOR(owner->devt));
++ if (rc)
++ return rc;
++ bdi->owner = owner;
++ get_device(owner);
++ return 0;
++}
++EXPORT_SYMBOL(bdi_register_owner);
++
+ /*
+ * Remove bdi from bdi_list, and ensure that it is no longer visible
+ */
+@@ -849,6 +863,11 @@ void bdi_unregister(struct backing_dev_info *bdi)
+ device_unregister(bdi->dev);
+ bdi->dev = NULL;
+ }
++
++ if (bdi->owner) {
++ put_device(bdi->owner);
++ bdi->owner = NULL;
++ }
+ }
+
+ void bdi_exit(struct backing_dev_info *bdi)
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index addfe4accc07..d9ec1a5b37ff 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2214,6 +2214,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
+ * and reducing the surplus.
+ */
+ spin_unlock(&hugetlb_lock);
++
++ /* yield cpu to avoid soft lockup */
++ cond_resched();
++
+ if (hstate_is_gigantic(h))
+ ret = alloc_fresh_gigantic_page(h, nodes_allowed);
+ else
+diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
+index 388ee8b59145..1842141baedb 100644
+--- a/net/bluetooth/l2cap_sock.c
++++ b/net/bluetooth/l2cap_sock.c
+@@ -927,7 +927,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
+ break;
+ }
+
+- if (get_user(opt, (u32 __user *) optval)) {
++ if (get_user(opt, (u16 __user *) optval)) {
+ err = -EFAULT;
+ break;
+ }
+diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
+index 1325776daa27..bd007a9fd921 100644
+--- a/net/netlabel/netlabel_kapi.c
++++ b/net/netlabel/netlabel_kapi.c
+@@ -824,7 +824,11 @@ socket_setattr_return:
+ */
+ void netlbl_sock_delattr(struct sock *sk)
+ {
+- cipso_v4_sock_delattr(sk);
++ switch (sk->sk_family) {
++ case AF_INET:
++ cipso_v4_sock_delattr(sk);
++ break;
++ }
+ }
+
+ /**
+@@ -987,7 +991,11 @@ req_setattr_return:
+ */
+ void netlbl_req_delattr(struct request_sock *req)
+ {
+- cipso_v4_req_delattr(req);
++ switch (req->rsk_ops->family) {
++ case AF_INET:
++ cipso_v4_req_delattr(req);
++ break;
++ }
+ }
+
+ /**
+diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
+index e167592793a7..42396a74405d 100644
+--- a/scripts/recordmcount.c
++++ b/scripts/recordmcount.c
+@@ -33,10 +33,17 @@
+ #include <string.h>
+ #include <unistd.h>
+
++/*
++ * glibc synced up and added the metag number but didn't add the relocations.
++ * Work around this in a crude manner for now.
++ */
+ #ifndef EM_METAG
+-/* Remove this when these make it to the standard system elf.h. */
+ #define EM_METAG 174
++#endif
++#ifndef R_METAG_ADDR32
+ #define R_METAG_ADDR32 2
++#endif
++#ifndef R_METAG_NONE
+ #define R_METAG_NONE 3
+ #endif
+
+diff --git a/sound/hda/array.c b/sound/hda/array.c
+index 516795baa7db..5dfa610e4471 100644
+--- a/sound/hda/array.c
++++ b/sound/hda/array.c
+@@ -21,13 +21,15 @@ void *snd_array_new(struct snd_array *array)
+ return NULL;
+ if (array->used >= array->alloced) {
+ int num = array->alloced + array->alloc_align;
++ int oldsize = array->alloced * array->elem_size;
+ int size = (num + 1) * array->elem_size;
+ void *nlist;
+ if (snd_BUG_ON(num >= 4096))
+ return NULL;
+- nlist = krealloc(array->list, size, GFP_KERNEL | __GFP_ZERO);
++ nlist = krealloc(array->list, size, GFP_KERNEL);
+ if (!nlist)
+ return NULL;
++ memset(nlist + oldsize, 0, size - oldsize);
+ array->list = nlist;
+ array->alloced = num;
+ }
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index 6f8ea13323c1..89dacf9b4e6c 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -2265,6 +2265,8 @@ static const struct pci_device_id azx_ids[] = {
+ { PCI_DEVICE(0x1022, 0x780d),
+ .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
+ /* ATI HDMI */
++ { PCI_DEVICE(0x1002, 0x0002),
++ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+ { PCI_DEVICE(0x1002, 0x1308),
+ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+ { PCI_DEVICE(0x1002, 0x157a),
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index abcb5a6a1cd9..f25479ba3981 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -4674,6 +4674,22 @@ static void alc290_fixup_mono_speakers(struct hda_codec *codec,
+ }
+ }
+
++static void alc298_fixup_speaker_volume(struct hda_codec *codec,
++ const struct hda_fixup *fix, int action)
++{
++ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
++ /* The speaker is routed to the Node 0x06 by a mistake, as a result
++ we can't adjust the speaker's volume since this node does not has
++ Amp-out capability. we change the speaker's route to:
++ Node 0x02 (Audio Output) -> Node 0x0c (Audio Mixer) -> Node 0x17 (
++ Pin Complex), since Node 0x02 has Amp-out caps, we can adjust
++ speaker's volume now. */
++
++ hda_nid_t conn1[1] = { 0x0c };
++ snd_hda_override_conn_list(codec, 0x17, 1, conn1);
++ }
++}
++
+ /* Hook to update amp GPIO4 for automute */
+ static void alc280_hp_gpio4_automute_hook(struct hda_codec *codec,
+ struct hda_jack_callback *jack)
+@@ -4823,6 +4839,7 @@ enum {
+ ALC280_FIXUP_HP_HEADSET_MIC,
+ ALC221_FIXUP_HP_FRONT_MIC,
+ ALC292_FIXUP_TPT460,
++ ALC298_FIXUP_SPK_VOLUME,
+ };
+
+ static const struct hda_fixup alc269_fixups[] = {
+@@ -5478,6 +5495,12 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE,
+ },
++ [ALC298_FIXUP_SPK_VOLUME] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = alc298_fixup_speaker_volume,
++ .chained = true,
++ .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
++ },
+ };
+
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -5524,6 +5547,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
++ SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
+ SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
+@@ -5799,6 +5823,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ {0x1b, 0x01014020},
+ {0x21, 0x0221103f}),
+ SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++ {0x14, 0x90170130},
++ {0x1b, 0x02011020},
++ {0x21, 0x0221103f}),
++ SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x14, 0x90170150},
+ {0x1b, 0x02011020},
+ {0x21, 0x0221105f}),
+diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
+index a0b3128bb31f..d3102c865a95 100644
+--- a/tools/objtool/.gitignore
++++ b/tools/objtool/.gitignore
+@@ -1,2 +1,3 @@
+ arch/x86/insn/inat-tables.c
+ objtool
++fixdep
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 48bd520fc702..dd25346ec356 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -148,6 +148,7 @@ int vcpu_load(struct kvm_vcpu *vcpu)
+ put_cpu();
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(vcpu_load);
+
+ void vcpu_put(struct kvm_vcpu *vcpu)
+ {
+@@ -157,6 +158,7 @@ void vcpu_put(struct kvm_vcpu *vcpu)
+ preempt_enable();
+ mutex_unlock(&vcpu->mutex);
+ }
++EXPORT_SYMBOL_GPL(vcpu_put);
+
+ static void ack_flush(void *_completed)
+ {
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-08-22 14:48 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-08-22 14:48 UTC (permalink / raw
To: gentoo-commits
commit: b0e8d6a9cbc65c09037bd00a8ee549053007b79e
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Mon Aug 22 14:48:04 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Mon Aug 22 14:48:04 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=b0e8d6a9
Update for BFQ patchset to v8r2. See bug #591828
0000_README | 2 +-
...-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1 | 568 +++++++++++++--------
2 files changed, 350 insertions(+), 220 deletions(-)
diff --git a/0000_README b/0000_README
index 23c35b0..d857e6a 100644
--- a/0000_README
+++ b/0000_README
@@ -91,7 +91,7 @@ Patch: 5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch
From: http://algo.ing.unimo.it/people/paolo/disk_sched/
Desc: BFQ v7r11 patch 3 for 4.7: Early Queue Merge (EQM)
-Patch: 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2
+Patch: 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1
From: http://algo.ing.unimo.it/people/paolo/disk_sched/
Desc: BFQ v7r11 patch 4 for 4.7: Early Queue Merge (EQM)
diff --git a/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1 b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1
similarity index 94%
rename from 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1
rename to 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1
index 372f093..cbc051f 100644
--- a/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1
+++ b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1
@@ -1,16 +1,16 @@
-From 21d90fdc7488cd7c28f47b5ba759e62c697c0382 Mon Sep 17 00:00:00 2001
+From 0061399c3c07fb8d119c0d581b613b870e63b165 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 17 May 2016 08:28:04 +0200
-Subject: [PATCH 4/4] block, bfq: turn BFQ-v7r11 for 4.7.0 into BFQ-v8 for
+Subject: [PATCH 4/4] block, bfq: turn BFQ-v7r11 for 4.7.0 into BFQ-v8r2 for
4.7.0
---
block/Kconfig.iosched | 2 +-
- block/bfq-cgroup.c | 448 +++++----
- block/bfq-iosched.c | 2581 +++++++++++++++++++++++++++++--------------------
- block/bfq-sched.c | 432 +++++++--
- block/bfq.h | 697 +++++++------
- 5 files changed, 2433 insertions(+), 1727 deletions(-)
+ block/bfq-cgroup.c | 480 +++++----
+ block/bfq-iosched.c | 2601 +++++++++++++++++++++++++++++--------------------
+ block/bfq-sched.c | 441 +++++++--
+ block/bfq.h | 708 +++++++-------
+ 5 files changed, 2483 insertions(+), 1749 deletions(-)
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index f78cd1a..6d92579 100644
@@ -26,7 +26,7 @@ index f78cd1a..6d92579 100644
---help---
Enable hierarchical scheduling in BFQ, using the blkio controller.
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
-index 5ee99ec..bc01663 100644
+index 5ee99ec..c83d90c 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -162,7 +162,6 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
@@ -228,7 +228,7 @@ index 5ee99ec..bc01663 100644
entity->orig_weight = entity->weight = entity->new_weight = d->weight;
entity->my_sched_data = &bfqg->sched_data;
-@@ -445,45 +426,28 @@ static void bfq_pd_free(struct blkg_policy_data *pd)
+@@ -445,70 +426,53 @@ static void bfq_pd_free(struct blkg_policy_data *pd)
struct bfq_group *bfqg = pd_to_bfqg(pd);
bfqg_stats_exit(&bfqg->stats);
@@ -262,30 +262,70 @@ index 5ee99ec..bc01663 100644
+ struct bfq_group *parent)
{
- struct blkg_rwstat a, b;
--
++ struct bfq_entity *entity;
++
++ BUG_ON(!parent);
++ BUG_ON(!bfqg);
++ BUG_ON(bfqg == parent);
+
- a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
- b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
- off + dead_stats_off_delta);
- blkg_rwstat_add_aux(&a, &b);
- return a;
--}
-+ struct bfq_entity *entity;
++ entity = &bfqg->entity;
++ entity->parent = parent->my_entity;
++ entity->sched_data = &parent->sched_data;
+ }
-static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
--{
++static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
++ struct blkcg *blkcg)
+ {
- struct bfq_group *bfqg = pd_to_bfqg(pd);
-+ BUG_ON(!parent);
-+ BUG_ON(!bfqg);
-+ BUG_ON(bfqg == parent);
++ struct blkcg_gq *blkg;
- bfqg_stats_reset(&bfqg->stats);
- bfqg_stats_reset(&bfqg->dead_stats);
-+ entity = &bfqg->entity;
-+ entity->parent = parent->my_entity;
-+ entity->sched_data = &parent->sched_data;
++ blkg = blkg_lookup(blkcg, bfqd->queue);
++ if (likely(blkg))
++ return blkg_to_bfqg(blkg);
++ return NULL;
}
- static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+-static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+- struct blkcg *blkcg)
++static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
++ struct blkcg *blkcg)
+ {
+- struct request_queue *q = bfqd->queue;
+- struct bfq_group *bfqg = NULL, *parent;
+- struct bfq_entity *entity = NULL;
++ struct bfq_group *bfqg, *parent;
++ struct bfq_entity *entity;
+
+ assert_spin_locked(bfqd->queue->queue_lock);
+
+- /* avoid lookup for the common case where there's no blkcg */
+- if (blkcg == &blkcg_root) {
+- bfqg = bfqd->root_group;
+- } else {
+- struct blkcg_gq *blkg;
+-
+- blkg = blkg_lookup_create(blkcg, q);
+- if (!IS_ERR(blkg))
+- bfqg = blkg_to_bfqg(blkg);
+- else /* fallback to root_group */
+- bfqg = bfqd->root_group;
+- }
++ bfqg = bfq_lookup_bfqg(bfqd, blkcg);
+
+- BUG_ON(!bfqg);
++ if (unlikely(!bfqg))
++ return NULL;
+
+ /*
+ * Update chain of bfq_groups as we might be handling a leaf group
@@ -531,13 +495,18 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
return bfqg;
}
@@ -315,11 +355,11 @@ index 5ee99ec..bc01663 100644
+ struct bfq_group *bfqg)
{
- int busy, resume;
-+ struct bfq_entity *entity = &bfqq->entity;
-
+-
- busy = bfq_bfqq_busy(bfqq);
- resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
--
++ struct bfq_entity *entity = &bfqq->entity;
+
- BUG_ON(resume && !entity->on_st);
- BUG_ON(busy && !resume && entity->on_st &&
+ BUG_ON(!bfq_bfqq_busy(bfqq) && !RB_EMPTY_ROOT(&bfqq->sort_list));
@@ -383,6 +423,15 @@ index 5ee99ec..bc01663 100644
}
/**
+@@ -613,7 +599,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+
+ lockdep_assert_held(bfqd->queue->queue_lock);
+
+- bfqg = bfq_find_alloc_group(bfqd, blkcg);
++ bfqg = bfq_find_set_group(bfqd, blkcg);
+ if (async_bfqq) {
+ entity = &async_bfqq->entity;
+
@@ -621,7 +607,8 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
bic_set_bfqq(bic, NULL, 0);
bfq_log_bfqq(bfqd, async_bfqq,
@@ -650,8 +699,9 @@ index 5ee99ec..bc01663 100644
return NULL;
- return blkg_to_bfqg(bfqd->queue->root_blkg);
--}
--
++ return blkg_to_bfqg(bfqd->queue->root_blkg);
+ }
+
-static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
-{
- struct bfq_group_data *bgd;
@@ -660,9 +710,8 @@ index 5ee99ec..bc01663 100644
- if (!bgd)
- return NULL;
- return &bgd->pd;
-+ return blkg_to_bfqg(bfqd->queue->root_blkg);
- }
-
+-}
+-
-static void bfq_cpd_free(struct blkcg_policy_data *cpd)
-{
- kfree(cpd_to_bfqgd(cpd));
@@ -798,7 +847,7 @@ index 5ee99ec..bc01663 100644
static void bfq_init_entity(struct bfq_entity *entity,
struct bfq_group *bfqg)
-@@ -1146,29 +1145,22 @@ bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+@@ -1146,27 +1145,20 @@ bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
return bfqd->root_group;
}
@@ -815,26 +864,24 @@ index 5ee99ec..bc01663 100644
}
-static void bfq_disconnect_groups(struct bfq_data *bfqd)
--{
-- bfq_put_async_queues(bfqd, bfqd->root_group);
--}
--
- static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
- struct blkcg *blkcg)
++static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
++ struct blkcg *blkcg)
{
- return bfqd->root_group;
+- bfq_put_async_queues(bfqd, bfqd->root_group);
++ return bfqd->root_group;
}
+-static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+- struct blkcg *blkcg)
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
-+{
+ {
+- return bfqd->root_group;
+ return bfqq->bfqd->root_group;
-+}
-+
+ }
+
static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
- {
- struct bfq_group *bfqg;
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
-index d1f648d..5469442 100644
+index d1f648d..5bff378 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7,25 +7,26 @@
@@ -1321,7 +1368,7 @@ index d1f648d..5469442 100644
}
/*
-@@ -856,25 +875,492 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -856,25 +875,497 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* queue. Then we add bfqq to the burst.
*/
bfq_add_to_burst(bfqd, bfqq);
@@ -1529,12 +1576,15 @@ index d1f648d..5469442 100644
+{
+ if (old_wr_coeff == 1 && wr_or_deserves_wr) {
+ /* start a weight-raising period */
-+ bfqq->wr_coeff = bfqd->bfq_wr_coeff;
-+ if (interactive) /* update wr duration */
++ if (interactive) {
++ bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
-+ else
++ } else {
++ bfqq->wr_coeff = bfqd->bfq_wr_coeff *
++ BFQ_SOFTRT_WEIGHT_FACTOR;
+ bfqq->wr_cur_max_time =
+ bfqd->bfq_wr_rt_max_time;
++ }
+ /*
+ * If needed, further reduce budget to make sure it is
+ * close to bfqq's backlog, so as to reduce the
@@ -1614,6 +1664,8 @@ index d1f648d..5469442 100644
+ bfqq->last_wr_start_finish = jiffies;
+ bfqq->wr_cur_max_time =
+ bfqd->bfq_wr_rt_max_time;
++ bfqq->wr_coeff = bfqd->bfq_wr_coeff *
++ BFQ_SOFTRT_WEIGHT_FACTOR;
+ bfq_log_bfqq(bfqd, bfqq,
+ "switching to soft_rt wr, or "
+ " just moving forward duration");
@@ -1773,7 +1825,7 @@ index d1f648d..5469442 100644
+ * function bfq_bfqq_update_budg_for_activation).
+ */
+ if (bfqd->in_service_queue && bfqq_wants_to_preempt &&
-+ bfqd->in_service_queue->wr_coeff == 1 &&
++ bfqd->in_service_queue->wr_coeff < bfqq->wr_coeff &&
+ next_queue_may_preempt(bfqd)) {
+ struct bfq_queue *in_serv =
+ bfqd->in_service_queue;
@@ -1818,7 +1870,7 @@ index d1f648d..5469442 100644
*/
prev = bfqq->next_rq;
next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
-@@ -887,160 +1373,10 @@ static void bfq_add_request(struct request *rq)
+@@ -887,160 +1378,10 @@ static void bfq_add_request(struct request *rq)
if (prev != bfqq->next_rq)
bfq_pos_tree_add_move(bfqd, bfqq);
@@ -1983,7 +2035,7 @@ index d1f648d..5469442 100644
if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
time_is_before_jiffies(
bfqq->last_wr_start_finish +
-@@ -1049,16 +1385,43 @@ add_bfqq_busy:
+@@ -1049,16 +1390,43 @@ add_bfqq_busy:
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
bfqd->wr_busy_queues++;
@@ -2031,7 +2083,7 @@ index d1f648d..5469442 100644
if (bfqd->low_latency &&
(old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
bfqq->last_wr_start_finish = jiffies;
-@@ -1106,6 +1469,9 @@ static void bfq_remove_request(struct request *rq)
+@@ -1106,6 +1474,9 @@ static void bfq_remove_request(struct request *rq)
struct bfq_data *bfqd = bfqq->bfqd;
const int sync = rq_is_sync(rq);
@@ -2041,7 +2093,7 @@ index d1f648d..5469442 100644
if (bfqq->next_rq == rq) {
bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
bfq_updated_next_req(bfqd, bfqq);
-@@ -1119,8 +1485,25 @@ static void bfq_remove_request(struct request *rq)
+@@ -1119,8 +1490,25 @@ static void bfq_remove_request(struct request *rq)
elv_rb_del(&bfqq->sort_list, rq);
if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
@@ -2068,7 +2120,7 @@ index d1f648d..5469442 100644
/*
* Remove queue from request-position tree as it is empty.
*/
-@@ -1134,9 +1517,7 @@ static void bfq_remove_request(struct request *rq)
+@@ -1134,9 +1522,7 @@ static void bfq_remove_request(struct request *rq)
BUG_ON(bfqq->meta_pending == 0);
bfqq->meta_pending--;
}
@@ -2078,7 +2130,7 @@ index d1f648d..5469442 100644
}
static int bfq_merge(struct request_queue *q, struct request **req,
-@@ -1221,21 +1602,25 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
+@@ -1221,21 +1607,25 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
bfqq->next_rq = rq;
bfq_remove_request(next);
@@ -2107,7 +2159,7 @@ index d1f648d..5469442 100644
}
static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
-@@ -1278,7 +1663,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request,
+@@ -1278,7 +1668,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request,
sector_t sector)
{
return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
@@ -2116,7 +2168,7 @@ index d1f648d..5469442 100644
}
static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
-@@ -1400,7 +1785,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+@@ -1400,7 +1790,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
* throughput.
*/
bfqq->new_bfqq = new_bfqq;
@@ -2125,7 +2177,7 @@ index d1f648d..5469442 100644
return new_bfqq;
}
-@@ -1431,9 +1816,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+@@ -1431,9 +1821,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
}
/*
@@ -2152,7 +2204,7 @@ index d1f648d..5469442 100644
* structure otherwise.
*
* The OOM queue is not allowed to participate to cooperation: in fact, since
-@@ -1442,6 +1841,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+@@ -1442,6 +1846,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
* handle merging with the OOM queue would be quite complex and expensive
* to maintain. Besides, in such a critical condition as an out of memory,
* the benefits of queue merging may be little relevant, or even negligible.
@@ -2171,7 +2223,7 @@ index d1f648d..5469442 100644
*/
static struct bfq_queue *
bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-@@ -1451,16 +1862,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1451,16 +1867,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (bfqq->new_bfqq)
return bfqq->new_bfqq;
@@ -2207,7 +2259,7 @@ index d1f648d..5469442 100644
unlikely(in_service_bfqq == &bfqd->oom_bfqq))
goto check_scheduled;
-@@ -1482,7 +1909,15 @@ check_scheduled:
+@@ -1482,7 +1914,15 @@ check_scheduled:
BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent);
@@ -2224,7 +2276,7 @@ index d1f648d..5469442 100644
bfq_may_be_close_cooperator(bfqq, new_bfqq))
return bfq_setup_merge(bfqq, new_bfqq);
-@@ -1498,46 +1933,11 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+@@ -1498,46 +1938,11 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
*/
if (!bfqq->bic)
return;
@@ -2272,7 +2324,7 @@ index d1f648d..5469442 100644
}
static void bfq_get_bic_reference(struct bfq_queue *bfqq)
-@@ -1562,6 +1962,40 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+@@ -1562,6 +1967,40 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
if (bfq_bfqq_IO_bound(bfqq))
bfq_mark_bfqq_IO_bound(new_bfqq);
bfq_clear_bfqq_IO_bound(bfqq);
@@ -2313,7 +2365,7 @@ index d1f648d..5469442 100644
/*
* Grab a reference to the bic, to prevent it from being destroyed
* before being possibly touched by a bfq_split_bfqq().
-@@ -1588,18 +2022,6 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+@@ -1588,18 +2027,6 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_put_queue(bfqq);
}
@@ -2332,7 +2384,7 @@ index d1f648d..5469442 100644
static int bfq_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
-@@ -1637,30 +2059,86 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+@@ -1637,30 +2064,86 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
* to decide whether bio and rq can be merged.
*/
bfqq = new_bfqq;
@@ -2425,7 +2477,7 @@ index d1f648d..5469442 100644
bfqd->in_service_queue = bfqq;
}
-@@ -1676,31 +2154,6 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
+@@ -1676,31 +2159,6 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
return bfqq;
}
@@ -2457,12 +2509,10 @@ index d1f648d..5469442 100644
static void bfq_arm_slice_timer(struct bfq_data *bfqd)
{
struct bfq_queue *bfqq = bfqd->in_service_queue;
-@@ -1723,64 +2176,36 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
- *
- * To prevent processes with (partly) seeky workloads from
+@@ -1725,62 +2183,34 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
* being too ill-treated, grant them a small fraction of the
-- * assigned budget before reducing the waiting time to
-- * BFQ_MIN_TT. This happened to help reduce latency.
+ * assigned budget before reducing the waiting time to
+ * BFQ_MIN_TT. This happened to help reduce latency.
- */
- sl = bfqd->bfq_slice_idle;
- /*
@@ -2507,8 +2557,6 @@ index d1f648d..5469442 100644
- bfq_clear_bfqq_budget_new(bfqq);
- bfqq->budget_timeout = jiffies +
- bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
-+ * assigned budget before reducing the waiting time to
-+ * BFQ_MIN_TT. This happened to help reduce latency.
+ */
+ sl = bfqd->bfq_slice_idle;
+ /*
@@ -2545,7 +2593,7 @@ index d1f648d..5469442 100644
struct bfq_queue *bfqq = RQ_BFQQ(rq);
/*
-@@ -1794,15 +2219,9 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
+@@ -1794,15 +2224,9 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
* incrementing bfqq->dispatched.
*/
bfqq->dispatched++;
@@ -2562,7 +2610,7 @@ index d1f648d..5469442 100644
}
/*
-@@ -1822,18 +2241,12 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
+@@ -1822,18 +2246,12 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
rq = rq_entry_fifo(bfqq->fifo.next);
@@ -2582,7 +2630,7 @@ index d1f648d..5469442 100644
static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
BUG_ON(bfqq != bfqd->in_service_queue);
-@@ -1850,12 +2263,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+@@ -1850,12 +2268,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_mark_bfqq_split_coop(bfqq);
if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
@@ -2604,7 +2652,7 @@ index d1f648d..5469442 100644
bfq_del_bfqq_busy(bfqd, bfqq, 1);
} else {
bfq_activate_bfqq(bfqd, bfqq);
-@@ -1882,10 +2298,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1882,10 +2303,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
struct request *next_rq;
int budget, min_budget;
@@ -2626,7 +2674,7 @@ index d1f648d..5469442 100644
bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
-@@ -1894,7 +2319,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1894,7 +2324,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
@@ -2635,7 +2683,7 @@ index d1f648d..5469442 100644
switch (reason) {
/*
* Caveat: in all the following cases we trade latency
-@@ -1936,14 +2361,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1936,14 +2366,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
break;
case BFQ_BFQQ_BUDGET_TIMEOUT:
/*
@@ -2654,7 +2702,7 @@ index d1f648d..5469442 100644
*/
budget = min(budget * 2, bfqd->bfq_max_budget);
break;
-@@ -1960,17 +2381,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1960,17 +2386,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
budget = min(budget * 4, bfqd->bfq_max_budget);
break;
case BFQ_BFQQ_NO_MORE_REQUESTS:
@@ -2711,7 +2759,7 @@ index d1f648d..5469442 100644
*/
budget = bfqd->bfq_max_budget;
-@@ -1981,65 +2434,105 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1981,65 +2439,105 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
/*
@@ -2846,7 +2894,7 @@ index d1f648d..5469442 100644
/*
* Calculate the bandwidth for the last slice. We use a 64 bit
-@@ -2048,32 +2541,51 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -2048,32 +2546,51 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* and to avoid overflows.
*/
bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
@@ -2914,7 +2962,7 @@ index d1f648d..5469442 100644
}
update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
-@@ -2086,9 +2598,8 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -2086,9 +2603,8 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
int dev_type = blk_queue_nonrot(bfqd->queue);
if (bfqd->bfq_user_max_budget == 0) {
bfqd->bfq_max_budget =
@@ -2926,7 +2974,7 @@ index d1f648d..5469442 100644
bfqd->bfq_max_budget);
}
if (bfqd->device_speed == BFQ_BFQD_FAST &&
-@@ -2102,38 +2613,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -2102,38 +2618,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqd->RT_prod = R_fast[dev_type] *
T_fast[dev_type];
}
@@ -2990,7 +3038,7 @@ index d1f648d..5469442 100644
}
/*
-@@ -2191,6 +2699,15 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -2191,6 +2704,15 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
@@ -3006,7 +3054,7 @@ index d1f648d..5469442 100644
return max(bfqq->last_idle_bklogged +
HZ * bfqq->service_from_backlogged /
bfqd->bfq_wr_max_softrt_rate,
-@@ -2198,13 +2715,21 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+@@ -2198,13 +2720,21 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
}
/*
@@ -3033,7 +3081,7 @@ index d1f648d..5469442 100644
}
/**
-@@ -2214,28 +2739,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now)
+@@ -2214,28 +2744,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now)
* @compensate: if true, compensate for the time spent idling.
* @reason: the reason causing the expiration.
*
@@ -3079,7 +3127,7 @@ index d1f648d..5469442 100644
*/
static void bfq_bfqq_expire(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
-@@ -2243,40 +2764,51 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2243,40 +2769,53 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
enum bfqq_expiration reason)
{
bool slow;
@@ -3120,13 +3168,6 @@ index d1f648d..5469442 100644
+ bfqq->service_from_backlogged += entity->service;
- bfqq->service_from_backlogged += bfqq->entity.service;
--
-- if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
-- !bfq_bfqq_constantly_seeky(bfqq)) {
-- bfq_mark_bfqq_constantly_seeky(bfqq);
-- if (!blk_queue_nonrot(bfqd->queue))
-- bfqd->const_seeky_busy_in_flight_queues++;
-- }
+ /*
+ * As above explained, charge slow (typically seeky) and
+ * timed-out queues with the time and not the service
@@ -3148,13 +3189,21 @@ index d1f648d..5469442 100644
+ bfq_bfqq_budget_left(bfqq) >= entity->budget / 3)))
+ bfq_bfqq_charge_time(bfqd, bfqq, delta);
+- if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+- !bfq_bfqq_constantly_seeky(bfqq)) {
+- bfq_mark_bfqq_constantly_seeky(bfqq);
+- if (!blk_queue_nonrot(bfqd->queue))
+- bfqd->const_seeky_busy_in_flight_queues++;
+- }
++ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+
if (reason == BFQ_BFQQ_TOO_IDLE &&
- bfqq->entity.service <= 2 * bfqq->entity.budget / 10 )
+ entity->service <= 2 * entity->budget / 10 )
bfq_clear_bfqq_IO_bound(bfqq);
if (bfqd->low_latency && bfqq->wr_coeff == 1)
-@@ -2285,19 +2817,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2285,19 +2824,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
RB_EMPTY_ROOT(&bfqq->sort_list)) {
/*
@@ -3186,7 +3235,7 @@ index d1f648d..5469442 100644
/*
* The application is still waiting for the
* completion of one or more requests:
-@@ -2314,7 +2850,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2314,7 +2857,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
* happened to be in the past.
*/
bfqq->soft_rt_next_start =
@@ -3195,7 +3244,7 @@ index d1f648d..5469442 100644
/*
* Schedule an update of soft_rt_next_start to when
* the task may be discovered to be isochronous.
-@@ -2324,8 +2860,9 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2324,15 +2867,27 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
}
bfq_log_bfqq(bfqd, bfqq,
@@ -3207,9 +3256,12 @@ index d1f648d..5469442 100644
/*
* Increase, decrease or leave budget unchanged according to
-@@ -2333,6 +2870,14 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ * reason.
*/
++ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
++ BUG_ON(bfqq->next_rq == NULL &&
++ bfqq->entity.budget < bfqq->entity.service);
__bfq_bfqq_expire(bfqd, bfqq);
+
+ BUG_ON(!bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
@@ -3222,7 +3274,7 @@ index d1f648d..5469442 100644
}
/*
-@@ -2342,20 +2887,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2342,20 +2897,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
*/
static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
{
@@ -3251,7 +3303,7 @@ index d1f648d..5469442 100644
static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
{
bfq_log_bfqq(bfqq->bfqd, bfqq,
-@@ -2397,10 +2939,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2397,10 +2949,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
bool idling_boosts_thr, idling_boosts_thr_without_issues,
@@ -3265,7 +3317,7 @@ index d1f648d..5469442 100644
/*
* The next variable takes into account the cases where idling
* boosts the throughput.
-@@ -2422,7 +2966,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2422,7 +2976,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
*/
idling_boosts_thr = !bfqd->hw_tag ||
(!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
@@ -3274,7 +3326,7 @@ index d1f648d..5469442 100644
/*
* The value of the next variable,
-@@ -2463,74 +3007,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2463,74 +3017,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
bfqd->wr_busy_queues == 0;
/*
@@ -3299,7 +3351,8 @@ index d1f648d..5469442 100644
- * it. And, beneficially, this would imply that throughput
- * would always be boosted also with random I/O on NCQ-capable
- * HDDs.
-- *
++ * guarantees.
+ *
- * But we must be careful on this point, to avoid an unfair
- * treatment for bfqq. In fact, because of the same above
- * assignments, idling_boosts_thr_without_issues is, on the
@@ -3313,8 +3366,7 @@ index d1f648d..5469442 100644
- * latter might then get a low share of the device throughput,
- * simply because the former would get many requests served
- * after being set as in service, while the latter would not.
-+ * guarantees.
- *
+- *
- * To address this issue, we start by setting to true a
- * sentinel variable, on_hdd_and_not_all_queues_seeky, if the
- * device is rotational and not all queues with pending or
@@ -3368,7 +3420,7 @@ index d1f648d..5469442 100644
* (i) each of these processes must get the same throughput as
* the others;
* (ii) all these processes have the same I/O pattern
-@@ -2552,26 +3049,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2552,26 +3059,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* words, only if sub-condition (i) holds, then idling is
* allowed, and the device tends to be prevented from queueing
* many requests, possibly of several processes. The reason
@@ -3442,7 +3494,7 @@ index d1f648d..5469442 100644
*
* According to the above considerations, the next variable is
* true (only) if sub-condition (i) holds. To compute the
-@@ -2579,7 +3103,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2579,7 +3113,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* the function bfq_symmetric_scenario(), but also check
* whether bfqq is being weight-raised, because
* bfq_symmetric_scenario() does not take into account also
@@ -3451,7 +3503,7 @@ index d1f648d..5469442 100644
* bfq_weights_tree_add()).
*
* As a side note, it is worth considering that the above
-@@ -2601,17 +3125,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2601,17 +3135,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* bfqq. Such a case is when bfqq became active in a burst of
* queue activations. Queues that became active during a large
* burst benefit only from throughput, as discussed in the
@@ -3474,7 +3526,7 @@ index d1f648d..5469442 100644
/*
* We have now all the components we need to compute the return
-@@ -2621,6 +3144,14 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2621,6 +3154,14 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* 2) idling either boosts the throughput (without issues), or
* is necessary to preserve service guarantees.
*/
@@ -3489,7 +3541,7 @@ index d1f648d..5469442 100644
return bfq_bfqq_sync(bfqq) &&
(idling_boosts_thr_without_issues ||
idling_needed_for_service_guarantees);
-@@ -2632,7 +3163,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2632,7 +3173,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* 1) the queue must remain in service and cannot be expired, and
* 2) the device must be idled to wait for the possible arrival of a new
* request for the queue.
@@ -3498,7 +3550,7 @@ index d1f648d..5469442 100644
* why performing device idling is the best choice to boost the throughput
* and preserve service guarantees when bfq_bfqq_may_idle itself
* returns true.
-@@ -2698,9 +3229,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+@@ -2698,9 +3239,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
*/
bfq_clear_bfqq_wait_request(bfqq);
del_timer(&bfqd->idle_slice_timer);
@@ -3508,7 +3560,7 @@ index d1f648d..5469442 100644
}
goto keep_queue;
}
-@@ -2745,14 +3274,11 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+@@ -2745,14 +3284,11 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
/*
@@ -3526,7 +3578,11 @@ index d1f648d..5469442 100644
time_is_before_jiffies(bfqq->last_wr_start_finish +
bfqq->wr_cur_max_time)) {
bfqq->last_wr_start_finish = jiffies;
-@@ -2814,10 +3340,25 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+@@ -2811,13 +3347,29 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+ */
+ if (!bfqd->rq_in_driver)
+ bfq_schedule_dispatch(bfqd);
++ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
goto expire;
}
@@ -3552,7 +3608,7 @@ index d1f648d..5469442 100644
bfq_update_wr_data(bfqd, bfqq);
bfq_log_bfqq(bfqd, bfqq,
-@@ -2833,9 +3374,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+@@ -2833,9 +3385,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
bfqd->in_service_bic = RQ_BIC(rq);
}
@@ -3563,7 +3619,7 @@ index d1f648d..5469442 100644
goto expire;
return dispatched;
-@@ -2881,8 +3420,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd)
+@@ -2881,8 +3431,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd)
st = bfq_entity_service_tree(&bfqq->entity);
dispatched += __bfq_forced_dispatch_bfqq(bfqq);
@@ -3573,7 +3629,7 @@ index d1f648d..5469442 100644
bfq_forget_idle(st);
}
-@@ -2895,9 +3434,9 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+@@ -2895,9 +3445,9 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
{
struct bfq_data *bfqd = q->elevator->elevator_data;
struct bfq_queue *bfqq;
@@ -3584,7 +3640,7 @@ index d1f648d..5469442 100644
if (bfqd->busy_queues == 0)
return 0;
-@@ -2908,21 +3447,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+@@ -2908,21 +3458,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
if (!bfqq)
return 0;
@@ -3607,15 +3663,21 @@ index d1f648d..5469442 100644
bfq_clear_bfqq_wait_request(bfqq);
BUG_ON(timer_pending(&bfqd->idle_slice_timer));
-@@ -2933,6 +3458,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+@@ -2933,6 +3469,8 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
bfq_log_bfqq(bfqd, bfqq, "dispatched %s request",
bfq_bfqq_sync(bfqq) ? "sync" : "async");
-+ BUG_ON(bfqq->entity.budget < bfqq->entity.service);
++ BUG_ON(bfqq->next_rq == NULL &&
++ bfqq->entity.budget < bfqq->entity.service);
return 1;
}
-@@ -2949,11 +3475,11 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+@@ -2944,23 +3482,22 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+ */
+ static void bfq_put_queue(struct bfq_queue *bfqq)
+ {
+- struct bfq_data *bfqd = bfqq->bfqd;
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_group *bfqg = bfqq_group(bfqq);
#endif
@@ -3625,13 +3687,30 @@ index d1f648d..5469442 100644
- bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
- atomic_read(&bfqq->ref));
- if (!atomic_dec_and_test(&bfqq->ref))
-+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref);
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref);
+ bfqq->ref--;
+ if (bfqq->ref)
return;
BUG_ON(rb_first(&bfqq->sort_list));
-@@ -3007,8 +3533,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
+ BUG_ON(bfqq->entity.tree);
+ BUG_ON(bfq_bfqq_busy(bfqq));
+- BUG_ON(bfqd->in_service_queue == bfqq);
++ BUG_ON(bfqq->bfqd->in_service_queue == bfqq);
+
+ if (bfq_bfqq_sync(bfqq))
+ /*
+@@ -2973,7 +3510,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+ */
+ hlist_del_init(&bfqq->burst_list_node);
+
+- bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq);
+
+ kmem_cache_free(bfq_pool, bfqq);
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+@@ -3007,8 +3544,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_schedule_dispatch(bfqd);
}
@@ -3641,7 +3720,7 @@ index d1f648d..5469442 100644
bfq_put_cooperator(bfqq);
-@@ -3019,26 +3544,7 @@ static void bfq_init_icq(struct io_cq *icq)
+@@ -3019,26 +3555,7 @@ static void bfq_init_icq(struct io_cq *icq)
{
struct bfq_io_cq *bic = icq_to_bic(icq);
@@ -3669,7 +3748,7 @@ index d1f648d..5469442 100644
}
static void bfq_exit_icq(struct io_cq *icq)
-@@ -3046,21 +3552,21 @@ static void bfq_exit_icq(struct io_cq *icq)
+@@ -3046,21 +3563,21 @@ static void bfq_exit_icq(struct io_cq *icq)
struct bfq_io_cq *bic = icq_to_bic(icq);
struct bfq_data *bfqd = bic_to_bfqd(bic);
@@ -3698,7 +3777,7 @@ index d1f648d..5469442 100644
}
}
-@@ -3068,7 +3574,8 @@ static void bfq_exit_icq(struct io_cq *icq)
+@@ -3068,7 +3585,8 @@ static void bfq_exit_icq(struct io_cq *icq)
* Update the entity prio values; note that the new values will not
* be used until the next (re)activation.
*/
@@ -3708,7 +3787,7 @@ index d1f648d..5469442 100644
{
struct task_struct *tsk = current;
int ioprio_class;
-@@ -3100,7 +3607,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
+@@ -3100,7 +3618,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
break;
}
@@ -3717,7 +3796,7 @@ index d1f648d..5469442 100644
printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
bfqq->new_ioprio);
BUG();
-@@ -3108,45 +3615,40 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
+@@ -3108,45 +3626,40 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
bfqq->entity.prio_changed = 1;
@@ -3777,7 +3856,7 @@ index d1f648d..5469442 100644
}
static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-@@ -3155,8 +3657,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3155,8 +3668,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
RB_CLEAR_NODE(&bfqq->entity.rb_node);
INIT_LIST_HEAD(&bfqq->fifo);
INIT_HLIST_NODE(&bfqq->burst_list_node);
@@ -3788,7 +3867,7 @@ index d1f648d..5469442 100644
bfqq->bfqd = bfqd;
if (bic)
-@@ -3166,6 +3669,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3166,6 +3680,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfq_class_idle(bfqq))
bfq_mark_bfqq_idle_window(bfqq);
bfq_mark_bfqq_sync(bfqq);
@@ -3796,7 +3875,7 @@ index d1f648d..5469442 100644
} else
bfq_clear_bfqq_sync(bfqq);
bfq_mark_bfqq_IO_bound(bfqq);
-@@ -3175,72 +3679,17 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3175,72 +3690,17 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqq->pid = pid;
bfqq->wr_coeff = 1;
@@ -3865,17 +3944,17 @@ index d1f648d..5469442 100644
-
- if (new_bfqq)
- kmem_cache_free(bfq_pool, new_bfqq);
+-
+- rcu_read_unlock();
+ bfqq->soft_rt_next_start = bfq_greatest_from_now();
-- rcu_read_unlock();
--
- return bfqq;
+ /* first request is almost certainly seeky */
+ bfqq->seek_history = 1;
}
static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
-@@ -3263,44 +3712,56 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+@@ -3263,44 +3723,60 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
}
static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
@@ -3895,13 +3974,17 @@ index d1f648d..5469442 100644
- struct blkcg *blkcg;
- struct bfq_group *bfqg;
+ rcu_read_lock();
++
++ bfqg = bfq_find_set_group(bfqd,bio_blkcg(bio));
++ if (!bfqg) {
++ bfqq = &bfqd->oom_bfqq;
++ goto out;
++ }
- rcu_read_lock();
- blkcg = bio_blkcg(bio);
- rcu_read_unlock();
- bfqg = bfq_find_alloc_group(bfqd, blkcg);
-+ bfqg = bfq_find_alloc_group(bfqd,bio_blkcg(bio));
-+
+ if (!is_sync) {
async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
ioprio);
@@ -3950,7 +4033,7 @@ index d1f648d..5469442 100644
return bfqq;
}
-@@ -3316,37 +3777,21 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
+@@ -3316,37 +3792,21 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
bic->ttime.ttime_samples;
}
@@ -4001,7 +4084,7 @@ index d1f648d..5469442 100644
}
/*
-@@ -3364,7 +3809,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
+@@ -3364,7 +3824,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
return;
/* Idle window just restored, statistics are meaningless. */
@@ -4011,7 +4094,7 @@ index d1f648d..5469442 100644
return;
enable_idle = bfq_bfqq_idle_window(bfqq);
-@@ -3404,22 +3850,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3404,22 +3865,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_update_io_thinktime(bfqd, bic);
bfq_update_io_seektime(bfqd, bfqq, rq);
@@ -4036,7 +4119,7 @@ index d1f648d..5469442 100644
bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
-@@ -3433,14 +3870,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3433,14 +3885,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* is small and the queue is not to be expired, then
* just exit.
*
@@ -4060,7 +4143,7 @@ index d1f648d..5469442 100644
*/
if (small_req && !budget_timeout)
return;
-@@ -3453,9 +3891,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3453,9 +3906,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
bfq_clear_bfqq_wait_request(bfqq);
del_timer(&bfqd->idle_slice_timer);
@@ -4070,7 +4153,7 @@ index d1f648d..5469442 100644
/*
* The queue is not empty, because a new request just
-@@ -3499,27 +3935,19 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+@@ -3499,27 +3950,19 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
*/
new_bfqq->allocated[rq_data_dir(rq)]++;
bfqq->allocated[rq_data_dir(rq)]--;
@@ -4101,7 +4184,7 @@ index d1f648d..5469442 100644
rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &bfqq->fifo);
-@@ -3528,8 +3956,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+@@ -3528,8 +3971,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
static void bfq_update_hw_tag(struct bfq_data *bfqd)
{
@@ -4112,9 +4195,16 @@ index d1f648d..5469442 100644
if (bfqd->hw_tag == 1)
return;
-@@ -3560,43 +3988,41 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
- bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)",
- blk_rq_sectors(rq), sync);
+@@ -3555,48 +3998,45 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
+ {
+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
+ struct bfq_data *bfqd = bfqq->bfqd;
+- bool sync = bfq_bfqq_sync(bfqq);
+
+- bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)",
+- blk_rq_sectors(rq), sync);
++ bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left",
++ blk_rq_sectors(rq));
+ assert_spin_locked(bfqd->queue->queue_lock);
bfq_update_hw_tag(bfqd);
@@ -4174,7 +4264,7 @@ index d1f648d..5469442 100644
*/
if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
RB_EMPTY_ROOT(&bfqq->sort_list))
-@@ -3608,10 +4034,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
+@@ -3608,10 +4048,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
* or if we want to idle in case it has no pending requests.
*/
if (bfqd->in_service_queue == bfqq) {
@@ -4186,7 +4276,7 @@ index d1f648d..5469442 100644
bfq_arm_slice_timer(bfqd);
goto out;
} else if (bfq_may_expire_for_budg_timeout(bfqq))
-@@ -3682,14 +4105,14 @@ static void bfq_put_request(struct request *rq)
+@@ -3682,14 +4119,14 @@ static void bfq_put_request(struct request *rq)
rq->elv.priv[1] = NULL;
bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
@@ -4203,7 +4293,7 @@ index d1f648d..5469442 100644
*/
static struct bfq_queue *
bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
-@@ -3727,11 +4150,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+@@ -3727,11 +4164,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
unsigned long flags;
bool split = false;
@@ -4216,7 +4306,7 @@ index d1f648d..5469442 100644
if (!bic)
goto queue_fail;
-@@ -3741,23 +4161,47 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+@@ -3741,23 +4175,47 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
new_queue:
bfqq = bic_to_bfqq(bic, is_sync);
if (!bfqq || bfqq == &bfqd->oom_bfqq) {
@@ -4271,7 +4361,7 @@ index d1f648d..5469442 100644
bfqq = bfq_split_bfqq(bic, bfqq);
split = true;
if (!bfqq)
-@@ -3766,9 +4210,8 @@ new_queue:
+@@ -3766,9 +4224,8 @@ new_queue:
}
bfqq->allocated[rw]++;
@@ -4283,7 +4373,7 @@ index d1f648d..5469442 100644
rq->elv.priv[0] = bic;
rq->elv.priv[1] = bfqq;
-@@ -3783,7 +4226,6 @@ new_queue:
+@@ -3783,7 +4240,6 @@ new_queue:
if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
bfqq->bic = bic;
if (split) {
@@ -4291,7 +4381,7 @@ index d1f648d..5469442 100644
/*
* If the queue has just been split from a shared
* queue, restore the idle window and the possible
-@@ -3793,6 +4235,9 @@ new_queue:
+@@ -3793,6 +4249,9 @@ new_queue:
}
}
@@ -4301,7 +4391,7 @@ index d1f648d..5469442 100644
spin_unlock_irqrestore(q->queue_lock, flags);
return 0;
-@@ -3872,6 +4317,7 @@ static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
+@@ -3872,6 +4331,7 @@ static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
cancel_work_sync(&bfqd->unplug_work);
}
@@ -4309,7 +4399,7 @@ index d1f648d..5469442 100644
static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
struct bfq_queue **bfqq_ptr)
{
-@@ -3880,9 +4326,9 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+@@ -3880,9 +4340,9 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
if (bfqq) {
@@ -4321,7 +4411,7 @@ index d1f648d..5469442 100644
bfq_put_queue(bfqq);
*bfqq_ptr = NULL;
}
-@@ -3904,6 +4350,7 @@ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
+@@ -3904,6 +4364,7 @@ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
}
@@ -4329,7 +4419,7 @@ index d1f648d..5469442 100644
static void bfq_exit_queue(struct elevator_queue *e)
{
-@@ -3923,8 +4370,6 @@ static void bfq_exit_queue(struct elevator_queue *e)
+@@ -3923,8 +4384,6 @@ static void bfq_exit_queue(struct elevator_queue *e)
bfq_shutdown_timer_wq(bfqd);
@@ -4338,7 +4428,7 @@ index d1f648d..5469442 100644
BUG_ON(timer_pending(&bfqd->idle_slice_timer));
#ifdef CONFIG_BFQ_GROUP_IOSCHED
-@@ -3973,11 +4418,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -3973,11 +4432,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
* will not attempt to free it.
*/
bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
@@ -4354,7 +4444,7 @@ index d1f648d..5469442 100644
/*
* Trigger weight initialization, according to ioprio, at the
* oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
-@@ -3996,9 +4444,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -3996,9 +4458,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
goto out_free;
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
@@ -4364,7 +4454,7 @@ index d1f648d..5469442 100644
init_timer(&bfqd->idle_slice_timer);
bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
-@@ -4023,20 +4468,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -4023,20 +4482,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->bfq_back_penalty = bfq_back_penalty;
bfqd->bfq_slice_idle = bfq_slice_idle;
bfqd->bfq_class_idle_last_service = 0;
@@ -4392,7 +4482,7 @@ index d1f648d..5469442 100644
bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
bfqd->bfq_wr_max_time = 0;
bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
-@@ -4048,16 +4492,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -4048,16 +4506,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
* video.
*/
bfqd->wr_busy_queues = 0;
@@ -4413,7 +4503,7 @@ index d1f648d..5469442 100644
bfqd->device_speed = BFQ_BFQD_FAST;
return 0;
-@@ -4161,10 +4604,8 @@ SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
+@@ -4161,10 +4618,8 @@ SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
@@ -4426,7 +4516,7 @@ index d1f648d..5469442 100644
SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
-@@ -4199,10 +4640,6 @@ STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
+@@ -4199,10 +4654,6 @@ STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
INT_MAX, 0);
STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
@@ -4437,7 +4527,7 @@ index d1f648d..5469442 100644
STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
-@@ -4224,10 +4661,8 @@ static ssize_t bfq_weights_store(struct elevator_queue *e,
+@@ -4224,10 +4675,8 @@ static ssize_t bfq_weights_store(struct elevator_queue *e,
static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
{
@@ -4449,7 +4539,7 @@ index d1f648d..5469442 100644
else
return bfq_default_max_budget;
}
-@@ -4252,6 +4687,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
+@@ -4252,6 +4701,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
return ret;
}
@@ -4460,7 +4550,7 @@ index d1f648d..5469442 100644
static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
const char *page, size_t count)
{
-@@ -4264,13 +4703,31 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+@@ -4264,13 +4717,31 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
else if (__data > INT_MAX)
__data = INT_MAX;
@@ -4493,7 +4583,7 @@ index d1f648d..5469442 100644
static ssize_t bfq_low_latency_store(struct elevator_queue *e,
const char *page, size_t count)
{
-@@ -4297,9 +4754,8 @@ static struct elv_fs_entry bfq_attrs[] = {
+@@ -4297,9 +4768,8 @@ static struct elv_fs_entry bfq_attrs[] = {
BFQ_ATTR(back_seek_penalty),
BFQ_ATTR(slice_idle),
BFQ_ATTR(max_budget),
@@ -4504,7 +4594,7 @@ index d1f648d..5469442 100644
BFQ_ATTR(low_latency),
BFQ_ATTR(wr_coeff),
BFQ_ATTR(wr_max_time),
-@@ -4342,9 +4798,28 @@ static struct elevator_type iosched_bfq = {
+@@ -4342,9 +4812,28 @@ static struct elevator_type iosched_bfq = {
.elevator_owner = THIS_MODULE,
};
@@ -4529,11 +4619,11 @@ index d1f648d..5469442 100644
static int __init bfq_init(void)
{
int ret;
-+ char msg[50] = "BFQ I/O-scheduler: v8";
++ char msg[50] = "BFQ I/O-scheduler: v8r2";
/*
* Can be 0 on HZ < 1000 setups.
-@@ -4352,9 +4827,6 @@ static int __init bfq_init(void)
+@@ -4352,9 +4841,6 @@ static int __init bfq_init(void)
if (bfq_slice_idle == 0)
bfq_slice_idle = 1;
@@ -4543,7 +4633,7 @@ index d1f648d..5469442 100644
#ifdef CONFIG_BFQ_GROUP_IOSCHED
ret = blkcg_policy_register(&blkcg_policy_bfq);
if (ret)
-@@ -4370,23 +4842,34 @@ static int __init bfq_init(void)
+@@ -4370,23 +4856,34 @@ static int __init bfq_init(void)
* installed on the reference devices (see the comments before the
* definitions of the two arrays).
*/
@@ -4588,7 +4678,7 @@ index d1f648d..5469442 100644
return 0;
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
-index a64fec1..e54b149 100644
+index a64fec1..7d73b9d 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -7,9 +7,11 @@
@@ -4741,6 +4831,15 @@ index a64fec1..e54b149 100644
bfq_put_queue(bfqq);
}
}
+@@ -602,7 +627,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
+
+ if (entity->prio_changed) {
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+- unsigned short prev_weight, new_weight;
++ unsigned int prev_weight, new_weight;
+ struct bfq_data *bfqd = NULL;
+ struct rb_root *root;
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
@@ -628,12 +653,14 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
if (entity->new_weight != entity->orig_weight) {
if (entity->new_weight < BFQ_MIN_WEIGHT ||
@@ -4760,7 +4859,21 @@ index a64fec1..e54b149 100644
if (bfqq)
bfqq->ioprio =
bfq_weight_to_ioprio(entity->orig_weight);
-@@ -708,7 +735,7 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
+@@ -662,6 +689,13 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
+ * associated with its new weight.
+ */
+ if (prev_weight != new_weight) {
++ if (bfqq)
++ bfq_log_bfqq(bfqq->bfqd, bfqq,
++ "weight changed %d %d(%d %d)",
++ prev_weight, new_weight,
++ entity->orig_weight,
++ bfqq->wr_coeff);
++
+ root = bfqq ? &bfqd->queue_weights_tree :
+ &bfqd->group_weights_tree;
+ bfq_weights_tree_remove(bfqd, entity, root);
+@@ -708,7 +742,7 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
st = bfq_entity_service_tree(entity);
entity->service += served;
@@ -4769,7 +4882,7 @@ index a64fec1..e54b149 100644
BUG_ON(st->wsum == 0);
st->vtime += bfq_delta(served, st->wsum);
-@@ -717,31 +744,69 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
+@@ -717,31 +751,69 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
#ifdef CONFIG_BFQ_GROUP_IOSCHED
bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
#endif
@@ -4826,18 +4939,18 @@ index a64fec1..e54b149 100644
+
+ if (tot_serv_to_charge < entity->service)
+ tot_serv_to_charge = entity->service;
-+
+
+- bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
+ bfq_log_bfqq(bfqq->bfqd, bfqq,
+ "charge_time: %lu/%u ms, %d/%d/%d sectors",
+ time_ms, timeout_ms, entity->service,
+ tot_serv_to_charge, entity->budget);
-- bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
+- bfq_bfqq_served(bfqq, entity->budget - entity->service);
+ /* Increase budget to avoid inconsistencies */
+ if (tot_serv_to_charge > entity->budget)
+ entity->budget = tot_serv_to_charge;
-
-- bfq_bfqq_served(bfqq, entity->budget - entity->service);
++
+ bfq_bfqq_served(bfqq,
+ max_t(int, 0, tot_serv_to_charge - entity->service));
}
@@ -4849,7 +4962,7 @@ index a64fec1..e54b149 100644
*
* Called whenever an entity is activated, i.e., it is not active and one
* of its children receives a new request, or has to be reactivated due to
-@@ -749,11 +814,16 @@ static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
+@@ -749,11 +821,16 @@ static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
* service received if @entity is active) of the queue to calculate its
* timestamps.
*/
@@ -4867,7 +4980,7 @@ index a64fec1..e54b149 100644
if (entity == sd->in_service_entity) {
BUG_ON(entity->tree);
/*
-@@ -771,45 +841,133 @@ static void __bfq_activate_entity(struct bfq_entity *entity)
+@@ -771,45 +848,133 @@ static void __bfq_activate_entity(struct bfq_entity *entity)
* old start time.
*/
bfq_active_extract(st, entity);
@@ -4889,16 +5002,16 @@ index a64fec1..e54b149 100644
- st->wsum += entity->weight;
- bfq_get_entity(entity);
+ unsigned long long min_vstart;
-+
+
+- BUG_ON(entity->on_st);
+- entity->on_st = 1;
+ /* See comments on bfq_fqq_update_budg_for_activation */
+ if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
+ backshifted = true;
+ min_vstart = entity->finish;
+ } else
+ min_vstart = st->vtime;
-
-- BUG_ON(entity->on_st);
-- entity->on_st = 1;
++
+ if (entity->tree == &st->idle) {
+ /*
+ * Must be on the idle tree, bfq_idle_extract() will
@@ -5021,7 +5134,7 @@ index a64fec1..e54b149 100644
sd = entity->sched_data;
if (!bfq_update_next_in_service(sd))
-@@ -890,23 +1048,24 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+@@ -890,23 +1055,24 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
if (!__bfq_deactivate_entity(entity, requeue))
/*
@@ -5054,7 +5167,7 @@ index a64fec1..e54b149 100644
*/
requeue = 1;
}
-@@ -916,9 +1075,23 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+@@ -916,9 +1082,23 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
update:
entity = parent;
for_each_entity(entity) {
@@ -5079,7 +5192,7 @@ index a64fec1..e54b149 100644
if (!bfq_update_next_in_service(sd))
break;
}
-@@ -997,10 +1170,11 @@ left:
+@@ -997,10 +1177,11 @@ left:
* Update the virtual time in @st and return the first eligible entity
* it contains.
*/
@@ -5093,7 +5206,7 @@ index a64fec1..e54b149 100644
if (RB_EMPTY_ROOT(&st->active))
return NULL;
-@@ -1009,6 +1183,24 @@ static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
+@@ -1009,6 +1190,24 @@ static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
entity = bfq_first_active_entity(st);
BUG_ON(bfq_gt(entity->start, st->vtime));
@@ -5118,7 +5231,7 @@ index a64fec1..e54b149 100644
/*
* If the chosen entity does not match with the sched_data's
* next_in_service and we are forcedly serving the IDLE priority
-@@ -1045,10 +1237,28 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+@@ -1045,10 +1244,28 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
BUG_ON(sd->in_service_entity);
if (bfqd &&
@@ -5148,7 +5261,7 @@ index a64fec1..e54b149 100644
i = BFQ_IOPRIO_CLASSES - 1;
bfqd->bfq_class_idle_last_service = jiffies;
sd->next_in_service = entity;
-@@ -1057,6 +1267,24 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+@@ -1057,6 +1274,24 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
for (; i < BFQ_IOPRIO_CLASSES; i++) {
entity = __bfq_lookup_next_entity(st + i, false);
if (entity) {
@@ -5173,7 +5286,7 @@ index a64fec1..e54b149 100644
if (extract) {
bfq_check_next_in_service(sd, entity);
bfq_active_extract(st + i, entity);
-@@ -1070,6 +1298,13 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+@@ -1070,6 +1305,13 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
return entity;
}
@@ -5187,7 +5300,7 @@ index a64fec1..e54b149 100644
/*
* Get next queue for service.
*/
-@@ -1086,7 +1321,36 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+@@ -1086,7 +1328,36 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
sd = &bfqd->root_group->sched_data;
for (; sd ; sd = entity->my_sched_data) {
@@ -5224,7 +5337,7 @@ index a64fec1..e54b149 100644
BUG_ON(!entity);
entity->service = 0;
}
-@@ -1113,9 +1377,7 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1113,9 +1384,7 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
{
struct bfq_entity *entity = &bfqq->entity;
@@ -5235,7 +5348,7 @@ index a64fec1..e54b149 100644
bfq_deactivate_entity(entity, requeue);
}
-@@ -1123,12 +1385,11 @@ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+@@ -1123,12 +1392,11 @@ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
struct bfq_entity *entity = &bfqq->entity;
@@ -5250,7 +5363,7 @@ index a64fec1..e54b149 100644
/*
* Called when the bfqq no longer has requests pending, remove it from
-@@ -1139,6 +1400,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1139,6 +1407,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
{
BUG_ON(!bfq_bfqq_busy(bfqq));
BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
@@ -5258,7 +5371,7 @@ index a64fec1..e54b149 100644
bfq_log_bfqq(bfqd, bfqq, "del from busy");
-@@ -1147,27 +1409,20 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1147,27 +1416,20 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
BUG_ON(bfqd->busy_queues == 0);
bfqd->busy_queues--;
@@ -5292,7 +5405,7 @@ index a64fec1..e54b149 100644
}
/*
-@@ -1185,16 +1440,11 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+@@ -1185,16 +1447,11 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_mark_bfqq_busy(bfqq);
bfqd->busy_queues++;
@@ -5312,17 +5425,17 @@ index a64fec1..e54b149 100644
bfqd->wr_busy_queues++;
}
diff --git a/block/bfq.h b/block/bfq.h
-index f73c942..b8ad02a 100644
+index f73c942..c6ba099 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
/*
- * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes.
-+ * BFQ-v8 for 4.7.0: data structures and common functions prototypes.
++ * BFQ-v8r2 for 4.7.0: data structures and common functions prototypes.
*
* Based on ideas and code from CFQ:
* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
-@@ -28,7 +28,7 @@
+@@ -28,20 +28,21 @@
#define BFQ_DEFAULT_QUEUE_IOPRIO 4
@@ -5331,7 +5444,14 @@ index f73c942..b8ad02a 100644
#define BFQ_DEFAULT_GRP_IOPRIO 0
#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
-@@ -36,12 +36,6 @@ struct bfq_entity;
++/*
++ * Soft real-time applications are extremely more latency sensitive
++ * than interactive ones. Over-raise the weight of the former to
++ * privilege them against the latter.
++ */
++#define BFQ_SOFTRT_WEIGHT_FACTOR 100
++
+ struct bfq_entity;
/**
* struct bfq_service_tree - per ioprio_class service tree.
@@ -5344,7 +5464,7 @@ index f73c942..b8ad02a 100644
*
* Each service tree represents a B-WF2Q+ scheduler on its own. Each
* ioprio_class has its own independent scheduler, and so its own
-@@ -49,27 +43,28 @@ struct bfq_entity;
+@@ -49,27 +50,28 @@ struct bfq_entity;
* of the containing bfqd.
*/
struct bfq_service_tree {
@@ -5383,7 +5503,7 @@ index f73c942..b8ad02a 100644
*
* The supported ioprio_classes are the same as in CFQ, in descending
* priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
-@@ -79,48 +74,29 @@ struct bfq_service_tree {
+@@ -79,48 +81,29 @@ struct bfq_service_tree {
* All the fields are protected by the queue lock of the containing bfqd.
*/
struct bfq_sched_data {
@@ -5406,7 +5526,7 @@ index f73c942..b8ad02a 100644
struct bfq_weight_counter {
- short int weight;
- unsigned int num_active;
-+ short int weight; /* weight of the entities this counter refers to */
++ unsigned int weight; /* weight of the entities this counter refers to */
+ unsigned int num_active; /* nr of active entities with this weight */
+ /*
+ * Weights tree member (see bfq_data's @queue_weights_tree and
@@ -5441,7 +5561,7 @@ index f73c942..b8ad02a 100644
*
* A bfq_entity is used to represent either a bfq_queue (leaf node in the
* cgroup hierarchy) or a bfq_group into the upper level scheduler. Each
-@@ -147,27 +123,52 @@ struct bfq_weight_counter {
+@@ -147,27 +130,52 @@ struct bfq_weight_counter {
* containing bfqd.
*/
struct bfq_entity {
@@ -5472,17 +5592,18 @@ index f73c942..b8ad02a 100644
- int service, budget;
- unsigned short weight, new_weight;
+- unsigned short orig_weight;
+ /* amount of service received during the last service slot */
+ int service;
+
+ /* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
+ int budget;
+
-+ unsigned short weight; /* weight of the queue */
-+ unsigned short new_weight; /* next weight if a change is in progress */
++ unsigned int weight; /* weight of the queue */
++ unsigned int new_weight; /* next weight if a change is in progress */
+
+ /* original weight, used to implement weight boosting */
- unsigned short orig_weight;
++ unsigned int orig_weight;
+ /* parent entity, for hierarchical scheduling */
struct bfq_entity *parent;
@@ -5499,7 +5620,7 @@ index f73c942..b8ad02a 100644
int prio_changed;
};
-@@ -175,56 +176,6 @@ struct bfq_group;
+@@ -175,56 +183,6 @@ struct bfq_group;
/**
* struct bfq_queue - leaf schedulable entity.
@@ -5556,7 +5677,7 @@ index f73c942..b8ad02a 100644
*
* A bfq_queue is a leaf request queue; it can be associated with an
* io_context or more, if it is async or shared between cooperating
-@@ -235,117 +186,163 @@ struct bfq_group;
+@@ -235,117 +193,163 @@ struct bfq_group;
* All the fields are protected by the queue lock of the containing bfqd.
*/
struct bfq_queue {
@@ -5767,7 +5888,7 @@ index f73c942..b8ad02a 100644
};
enum bfq_device_speed {
-@@ -354,224 +351,216 @@ enum bfq_device_speed {
+@@ -354,224 +358,216 @@ enum bfq_device_speed {
};
/**
@@ -6128,7 +6249,7 @@ index f73c942..b8ad02a 100644
BFQ_BFQQ_FLAG_IO_bound, /*
* bfqq has timed-out at least once
* having consumed at most 2/10 of
-@@ -581,17 +570,12 @@ enum bfqq_state_flags {
+@@ -581,17 +577,12 @@ enum bfqq_state_flags {
* bfqq activated in a large burst,
* see comments to bfq_handle_burst.
*/
@@ -6147,7 +6268,7 @@ index f73c942..b8ad02a 100644
};
#define BFQ_BFQQ_FNS(name) \
-@@ -608,25 +592,53 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
+@@ -608,25 +599,53 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \
}
@@ -6206,7 +6327,7 @@ index f73c942..b8ad02a 100644
#define bfq_log(bfqd, fmt, args...) \
blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
-@@ -640,15 +652,12 @@ enum bfqq_expiration {
+@@ -640,15 +659,12 @@ enum bfqq_expiration {
BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */
BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */
BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */
@@ -6224,7 +6345,7 @@ index f73c942..b8ad02a 100644
/* number of ios merged */
struct blkg_rwstat merged;
/* total time spent on device in ns, may not be accurate w/ queueing */
-@@ -657,12 +666,8 @@ struct bfqg_stats {
+@@ -657,12 +673,8 @@ struct bfqg_stats {
struct blkg_rwstat wait_time;
/* number of IOs queued up */
struct blkg_rwstat queued;
@@ -6237,7 +6358,7 @@ index f73c942..b8ad02a 100644
/* sum of number of ios queued across all samples */
struct blkg_stat avg_queue_size_sum;
/* count of samples taken for average */
-@@ -680,8 +685,10 @@ struct bfqg_stats {
+@@ -680,8 +692,10 @@ struct bfqg_stats {
uint64_t start_idle_time;
uint64_t start_empty_time;
uint16_t flags;
@@ -6248,7 +6369,16 @@ index f73c942..b8ad02a 100644
/*
* struct bfq_group_data - per-blkcg storage for the blkio subsystem.
*
-@@ -712,7 +719,7 @@ struct bfq_group_data {
+@@ -692,7 +706,7 @@ struct bfq_group_data {
+ /* must be the first member */
+ struct blkcg_policy_data pd;
+
+- unsigned short weight;
++ unsigned int weight;
+ };
+
+ /**
+@@ -712,7 +726,7 @@ struct bfq_group_data {
* unused for the root group. Used to know whether there
* are groups with more than one active @bfq_entity
* (see the comments to the function
@@ -6257,7 +6387,7 @@ index f73c942..b8ad02a 100644
* @rq_pos_tree: rbtree sorted by next_request position, used when
* determining if two or more queues have interleaving
* requests (see bfq_find_close_cooperator()).
-@@ -745,7 +752,6 @@ struct bfq_group {
+@@ -745,7 +759,6 @@ struct bfq_group {
struct rb_root rq_pos_tree;
struct bfqg_stats stats;
@@ -6265,7 +6395,7 @@ index f73c942..b8ad02a 100644
};
#else
-@@ -767,11 +773,25 @@ bfq_entity_service_tree(struct bfq_entity *entity)
+@@ -767,11 +780,25 @@ bfq_entity_service_tree(struct bfq_entity *entity)
struct bfq_sched_data *sched_data = entity->sched_data;
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
@@ -6292,7 +6422,7 @@ index f73c942..b8ad02a 100644
return sched_data->service_tree + idx;
}
-@@ -791,47 +811,6 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
+@@ -791,47 +818,6 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
return bic->icq.q->elevator->elevator_data;
}
@@ -6340,7 +6470,7 @@ index f73c942..b8ad02a 100644
#ifdef CONFIG_BFQ_GROUP_IOSCHED
static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
-@@ -857,11 +836,13 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
+@@ -857,11 +843,13 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
static void bfq_put_queue(struct bfq_queue *bfqq);
static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-08-30 18:40 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-08-30 18:40 UTC (permalink / raw
To: gentoo-commits
commit: 803475eaf1c8954eae2282923790e4b72efdcde9
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Tue Aug 30 18:40:34 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Tue Aug 30 18:40:34 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=803475ea
Update gentoo kconfig patch to remove DEVPTS_MULTIPLE_INSTANCES. See kernel upstream commit: eedf265aa003b4781de24cfed40a655a664457e6. Thanks to Ralf Ramsauer.
4567_distro-Gentoo-Kconfig.patch | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/4567_distro-Gentoo-Kconfig.patch b/4567_distro-Gentoo-Kconfig.patch
index 499b21f..cf5a20c 100644
--- a/4567_distro-Gentoo-Kconfig.patch
+++ b/4567_distro-Gentoo-Kconfig.patch
@@ -1,15 +1,14 @@
---- a/Kconfig 2016-07-01 19:22:17.117439707 -0400
-+++ b/Kconfig 2016-07-01 19:21:54.371440596 -0400
-@@ -8,4 +8,6 @@ config SRCARCH
- string
+--- a/Kconfig 2016-08-30 14:30:48.508361013 -0400
++++ b/Kconfig 2016-08-30 14:31:40.718683061 -0400
+@@ -9,3 +9,5 @@ config SRCARCH
option env="SRCARCH"
-+source "distro/Kconfig"
-+
source "arch/$SRCARCH/Kconfig"
---- /dev/null 2016-07-01 11:23:26.087932647 -0400
-+++ b/distro/Kconfig 2016-07-01 19:32:35.581415519 -0400
-@@ -0,0 +1,134 @@
++
++source "distro/Kconfig"
+--- /dev/null 2016-08-30 01:47:09.760073185 -0400
++++ b/distro/Kconfig 2016-08-30 14:32:21.378933599 -0400
+@@ -0,0 +1,133 @@
+menu "Gentoo Linux"
+
+config GENTOO_LINUX
@@ -112,7 +111,6 @@
+ select AUTOFS4_FS
+ select BLK_DEV_BSG
+ select CGROUPS
-+ select DEVPTS_MULTIPLE_INSTANCES
+ select EPOLL
+ select FANOTIFY
+ select FHANDLE
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-09-07 19:22 Tom Wijsman
0 siblings, 0 replies; 20+ messages in thread
From: Tom Wijsman @ 2016-09-07 19:22 UTC (permalink / raw
To: gentoo-commits
commit: 4f7ff0bebfab8a80780e0e4659ec867c01082a33
Author: Tom Wijsman (TomWij) <TomWij <AT> gentoo <DOT> org>
AuthorDate: Wed Sep 7 21:20:18 2016 +0000
Commit: Tom Wijsman <tomwij <AT> gentoo <DOT> org>
CommitDate: Wed Sep 7 21:20:18 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=4f7ff0be
Linux patch 4.7.3
0000_README | 4 +
1002_linux-4.7.3.patch | 4418 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 4422 insertions(+)
diff --git a/0000_README b/0000_README
index c4afba8..6a88eb0 100644
--- a/0000_README
+++ b/0000_README
@@ -51,6 +51,10 @@ Patch: 1001_linux-4.7.2.patch
From: http://www.kernel.org
Desc: Linux 4.7.2
+Patch: 1002_linux-4.7.3.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.3
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1002_linux-4.7.3.patch b/1002_linux-4.7.3.patch
new file mode 100644
index 0000000..59f9b93
--- /dev/null
+++ b/1002_linux-4.7.3.patch
@@ -0,0 +1,4418 @@
+diff --git a/Makefile b/Makefile
+index bb98f1ce854e..4afff18fcb12 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 2
++SUBLEVEL = 3
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
+index ad7860c5ce15..51597f344a62 100644
+--- a/arch/arc/include/asm/entry.h
++++ b/arch/arc/include/asm/entry.h
+@@ -142,7 +142,7 @@
+
+ #ifdef CONFIG_ARC_CURR_IN_REG
+ ; Retrieve orig r25 and save it with rest of callee_regs
+- ld.as r12, [r12, PT_user_r25]
++ ld r12, [r12, PT_user_r25]
+ PUSH r12
+ #else
+ PUSH r25
+@@ -198,7 +198,7 @@
+
+ ; SP is back to start of pt_regs
+ #ifdef CONFIG_ARC_CURR_IN_REG
+- st.as r12, [sp, PT_user_r25]
++ st r12, [sp, PT_user_r25]
+ #endif
+ .endm
+
+diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
+index c1d36458bfb7..4c6eed80cd8b 100644
+--- a/arch/arc/include/asm/irqflags-compact.h
++++ b/arch/arc/include/asm/irqflags-compact.h
+@@ -188,10 +188,10 @@ static inline int arch_irqs_disabled(void)
+ .endm
+
+ .macro IRQ_ENABLE scratch
++ TRACE_ASM_IRQ_ENABLE
+ lr \scratch, [status32]
+ or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+ flag \scratch
+- TRACE_ASM_IRQ_ENABLE
+ .endm
+
+ #endif /* __ASSEMBLY__ */
+diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
+index 0f92d97432a2..89eeb3720051 100644
+--- a/arch/arc/include/asm/pgtable.h
++++ b/arch/arc/include/asm/pgtable.h
+@@ -280,7 +280,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
+
+ #define pte_page(pte) pfn_to_page(pte_pfn(pte))
+ #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
+-#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
++#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+ /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
+ #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
+diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
+index 5a294b2c3cb3..0b10efe3a6a7 100644
+--- a/arch/arc/mm/cache.c
++++ b/arch/arc/mm/cache.c
+@@ -921,6 +921,15 @@ void arc_cache_init(void)
+
+ printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
++ /*
++ * Only master CPU needs to execute rest of function:
++ * - Assume SMP so all cores will have same cache config so
++ * any geomtry checks will be same for all
++ * - IOC setup / dma callbacks only need to be setup once
++ */
++ if (cpu)
++ return;
++
+ if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
+ struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+
+diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+index 080203e3aa2f..dc7f3bcc9fa2 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+@@ -270,6 +270,8 @@
+ #io-channel-cells = <1>;
+ clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
+ clock-names = "saradc", "apb_pclk";
++ resets = <&cru SRST_SARADC>;
++ reset-names = "saradc-apb";
+ status = "disabled";
+ };
+
+diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
+index 579b6e654f2d..a55384f4a5d7 100644
+--- a/arch/arm64/include/asm/elf.h
++++ b/arch/arm64/include/asm/elf.h
+@@ -140,6 +140,7 @@ typedef struct user_fpsimd_state elf_fpregset_t;
+
+ #define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT);
+
++/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+ #define ARCH_DLINFO \
+ do { \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h
+index 22d6d8885854..4cf0c17787a8 100644
+--- a/arch/arm64/include/uapi/asm/auxvec.h
++++ b/arch/arm64/include/uapi/asm/auxvec.h
+@@ -19,4 +19,6 @@
+ /* vDSO location */
+ #define AT_SYSINFO_EHDR 33
+
++#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
++
+ #endif
+diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
+index 2c6e598a94dc..aa68aadcdeea 100644
+--- a/arch/arm64/kernel/head.S
++++ b/arch/arm64/kernel/head.S
+@@ -757,6 +757,9 @@ ENTRY(__enable_mmu)
+ isb
+ bl __create_page_tables // recreate kernel mapping
+
++ tlbi vmalle1 // Remove any stale TLB entries
++ dsb nsh
++
+ msr sctlr_el1, x19 // re-enable the MMU
+ isb
+ ic iallu // flush instructions fetched
+diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
+index 9a3aec97ac09..ccf79d849e0a 100644
+--- a/arch/arm64/kernel/sleep.S
++++ b/arch/arm64/kernel/sleep.S
+@@ -101,12 +101,20 @@ ENTRY(cpu_resume)
+ bl el2_setup // if in EL2 drop to EL1 cleanly
+ /* enable the MMU early - so we can access sleep_save_stash by va */
+ adr_l lr, __enable_mmu /* __cpu_setup will return here */
+- ldr x27, =_cpu_resume /* __enable_mmu will branch here */
++ adr_l x27, _resume_switched /* __enable_mmu will branch here */
+ adrp x25, idmap_pg_dir
+ adrp x26, swapper_pg_dir
+ b __cpu_setup
+ ENDPROC(cpu_resume)
+
++ .pushsection ".idmap.text", "ax"
++_resume_switched:
++ ldr x8, =_cpu_resume
++ br x8
++ENDPROC(_resume_switched)
++ .ltorg
++ .popsection
++
+ ENTRY(_cpu_resume)
+ mrs x1, mpidr_el1
+ adrp x8, mpidr_hash
+diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
+index 5bb61de23201..9d37e967fa19 100644
+--- a/arch/arm64/mm/proc.S
++++ b/arch/arm64/mm/proc.S
+@@ -100,7 +100,16 @@ ENTRY(cpu_do_resume)
+
+ msr tcr_el1, x8
+ msr vbar_el1, x9
++
++ /*
++ * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking
++ * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug
++ * exception. Mask them until local_dbg_restore() in cpu_suspend()
++ * resets them.
++ */
++ disable_dbg
+ msr mdscr_el1, x10
++
+ msr sctlr_el1, x12
+ /*
+ * Restore oslsr_el1 by writing oslar_el1
+diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h
+index c0ae62520d15..274d5bc6ecce 100644
+--- a/arch/parisc/include/uapi/asm/errno.h
++++ b/arch/parisc/include/uapi/asm/errno.h
+@@ -97,10 +97,10 @@
+ #define ENOTCONN 235 /* Transport endpoint is not connected */
+ #define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */
+ #define ETOOMANYREFS 237 /* Too many references: cannot splice */
+-#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */
+ #define ETIMEDOUT 238 /* Connection timed out */
+ #define ECONNREFUSED 239 /* Connection refused */
+-#define EREMOTERELEASE 240 /* Remote peer released connection */
++#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */
++#define EREMOTERELEASE 240 /* Remote peer released connection */
+ #define EHOSTDOWN 241 /* Host is down */
+ #define EHOSTUNREACH 242 /* No route to host */
+
+diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
+index 5adc339eb7c8..0c2a94a0f751 100644
+--- a/arch/parisc/kernel/processor.c
++++ b/arch/parisc/kernel/processor.c
+@@ -51,8 +51,6 @@ EXPORT_SYMBOL(_parisc_requires_coherency);
+
+ DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data);
+
+-extern int update_cr16_clocksource(void); /* from time.c */
+-
+ /*
+ ** PARISC CPU driver - claim "device" and initialize CPU data structures.
+ **
+@@ -228,12 +226,6 @@ static int processor_probe(struct parisc_device *dev)
+ }
+ #endif
+
+- /* If we've registered more than one cpu,
+- * we'll use the jiffies clocksource since cr16
+- * is not synchronized between CPUs.
+- */
+- update_cr16_clocksource();
+-
+ return 0;
+ }
+
+diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
+index 31ec99a5f119..5eea7dc01ba5 100644
+--- a/arch/parisc/kernel/time.c
++++ b/arch/parisc/kernel/time.c
+@@ -220,18 +220,6 @@ static struct clocksource clocksource_cr16 = {
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ };
+
+-int update_cr16_clocksource(void)
+-{
+- /* since the cr16 cycle counters are not synchronized across CPUs,
+- we'll check if we should switch to a safe clocksource: */
+- if (clocksource_cr16.rating != 0 && num_online_cpus() > 1) {
+- clocksource_change_rating(&clocksource_cr16, 0);
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+ void __init start_cpu_itimer(void)
+ {
+ unsigned int cpu = smp_processor_id();
+diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
+index 1dd5bd8a8c59..133055311dce 100644
+--- a/arch/um/include/asm/common.lds.S
++++ b/arch/um/include/asm/common.lds.S
+@@ -81,7 +81,7 @@
+ .altinstr_replacement : { *(.altinstr_replacement) }
+ /* .exit.text is discard at runtime, not link time, to deal with references
+ from .altinstructions and .eh_frame */
+- .exit.text : { *(.exit.text) }
++ .exit.text : { EXIT_TEXT }
+ .exit.data : { *(.exit.data) }
+
+ .preinit_array : {
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 4e5be94e079a..6fa85944af83 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
+
+ static inline void __native_flush_tlb(void)
+ {
++ /*
++ * If current->mm == NULL then we borrow a mm which may change during a
++ * task switch and therefore we must not be preempted while we write CR3
++ * back:
++ */
++ preempt_disable();
+ native_write_cr3(native_read_cr3());
++ preempt_enable();
+ }
+
+ static inline void __native_flush_tlb_global_irq_disabled(void)
+diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
+index 6c1ff31d99ff..495c776de4b4 100644
+--- a/arch/x86/kernel/uprobes.c
++++ b/arch/x86/kernel/uprobes.c
+@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
+ *cursor &= 0xfe;
+ }
+ /*
+- * Similar treatment for VEX3 prefix.
+- * TODO: add XOP/EVEX treatment when insn decoder supports them
++ * Similar treatment for VEX3/EVEX prefix.
++ * TODO: add XOP treatment when insn decoder supports them
+ */
+- if (insn->vex_prefix.nbytes == 3) {
++ if (insn->vex_prefix.nbytes >= 3) {
+ /*
+ * vex2: c5 rvvvvLpp (has no b bit)
+ * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
+ * evex: 62 rxbR00mm wvvvv1pp zllBVaaa
+- * (evex will need setting of both b and x since
+- * in non-sib encoding evex.x is 4th bit of MODRM.rm)
+- * Setting VEX3.b (setting because it has inverted meaning):
++ * Setting VEX3.b (setting because it has inverted meaning).
++ * Setting EVEX.x since (in non-SIB encoding) EVEX.x
++ * is the 4th bit of MODRM.rm, and needs the same treatment.
++ * For VEX3-encoded insns, VEX3.x value has no effect in
++ * non-SIB encoding, the change is superfluous but harmless.
+ */
+ cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
+- *cursor |= 0x20;
++ *cursor |= 0x60;
+ }
+
+ /*
+@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
+
+ reg = MODRM_REG(insn); /* Fetch modrm.reg */
+ reg2 = 0xff; /* Fetch vex.vvvv */
+- if (insn->vex_prefix.nbytes == 2)
+- reg2 = insn->vex_prefix.bytes[1];
+- else if (insn->vex_prefix.nbytes == 3)
++ if (insn->vex_prefix.nbytes)
+ reg2 = insn->vex_prefix.bytes[2];
+ /*
+- * TODO: add XOP, EXEV vvvv reading.
++ * TODO: add XOP vvvv reading.
+ *
+ * vex.vvvv field is in bits 6-3, bits are inverted.
+ * But in 32-bit mode, high-order bit may be ignored.
+diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
+index 815fec6e05e2..17943a89d518 100644
+--- a/arch/x86/platform/uv/bios_uv.c
++++ b/arch/x86/platform/uv/bios_uv.c
+@@ -188,7 +188,8 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
+ void uv_bios_init(void)
+ {
+ uv_systab = NULL;
+- if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
++ if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
++ !efi.uv_systab || efi_runtime_disabled()) {
+ pr_crit("UV: UVsystab: missing\n");
+ return;
+ }
+diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
+index 8adac69dba3d..2e981732805b 100644
+--- a/drivers/acpi/cppc_acpi.c
++++ b/drivers/acpi/cppc_acpi.c
+@@ -299,8 +299,10 @@ int acpi_get_psd_map(struct cpudata **all_cpu_data)
+ continue;
+
+ cpc_ptr = per_cpu(cpc_desc_ptr, i);
+- if (!cpc_ptr)
+- continue;
++ if (!cpc_ptr) {
++ retval = -EFAULT;
++ goto err_ret;
++ }
+
+ pdomain = &(cpc_ptr->domain_info);
+ cpumask_set_cpu(i, pr->shared_cpu_map);
+@@ -322,8 +324,10 @@ int acpi_get_psd_map(struct cpudata **all_cpu_data)
+ continue;
+
+ match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
+- if (!match_cpc_ptr)
+- continue;
++ if (!match_cpc_ptr) {
++ retval = -EFAULT;
++ goto err_ret;
++ }
+
+ match_pdomain = &(match_cpc_ptr->domain_info);
+ if (match_pdomain->domain != pdomain->domain)
+@@ -353,8 +357,10 @@ int acpi_get_psd_map(struct cpudata **all_cpu_data)
+ continue;
+
+ match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
+- if (!match_cpc_ptr)
+- continue;
++ if (!match_cpc_ptr) {
++ retval = -EFAULT;
++ goto err_ret;
++ }
+
+ match_pdomain = &(match_cpc_ptr->domain_info);
+ if (match_pdomain->domain != pdomain->domain)
+@@ -595,9 +601,6 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
+ /* Store CPU Logical ID */
+ cpc_ptr->cpu_id = pr->id;
+
+- /* Plug it into this CPUs CPC descriptor. */
+- per_cpu(cpc_desc_ptr, pr->id) = cpc_ptr;
+-
+ /* Parse PSD data for this CPU */
+ ret = acpi_get_psd(cpc_ptr, handle);
+ if (ret)
+@@ -610,6 +613,9 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
+ goto out_free;
+ }
+
++ /* Plug PSD data into this CPUs CPC descriptor. */
++ per_cpu(cpc_desc_ptr, pr->id) = cpc_ptr;
++
+ /* Everything looks okay */
+ pr_debug("Parsed CPC struct for CPU: %d\n", pr->id);
+
+diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
+index 1f0e06065ae6..375c10f38c63 100644
+--- a/drivers/acpi/nfit.c
++++ b/drivers/acpi/nfit.c
+@@ -1396,11 +1396,12 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
+ {
+ struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+ u64 offset = nfit_blk->stat_offset + mmio->size * bw;
++ const u32 STATUS_MASK = 0x80000037;
+
+ if (mmio->num_lines)
+ offset = to_interleave_offset(offset, mmio);
+
+- return readl(mmio->addr.base + offset);
++ return readl(mmio->addr.base + offset) & STATUS_MASK;
+ }
+
+ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
+diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
+index 5f28cf778349..f3c022292586 100644
+--- a/drivers/acpi/scan.c
++++ b/drivers/acpi/scan.c
+@@ -1967,7 +1967,7 @@ int __init acpi_scan_init(void)
+
+ static struct acpi_probe_entry *ape;
+ static int acpi_probe_count;
+-static DEFINE_SPINLOCK(acpi_probe_lock);
++static DEFINE_MUTEX(acpi_probe_mutex);
+
+ static int __init acpi_match_madt(struct acpi_subtable_header *header,
+ const unsigned long end)
+@@ -1986,7 +1986,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr)
+ if (acpi_disabled)
+ return 0;
+
+- spin_lock(&acpi_probe_lock);
++ mutex_lock(&acpi_probe_mutex);
+ for (ape = ap_head; nr; ape++, nr--) {
+ if (ACPI_COMPARE_NAME(ACPI_SIG_MADT, ape->id)) {
+ acpi_probe_count = 0;
+@@ -1999,7 +1999,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr)
+ count++;
+ }
+ }
+- spin_unlock(&acpi_probe_lock);
++ mutex_unlock(&acpi_probe_mutex);
+
+ return count;
+ }
+diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
+index a1dcf12d3dad..84708a5f8c52 100644
+--- a/drivers/block/floppy.c
++++ b/drivers/block/floppy.c
+@@ -3663,6 +3663,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
+
+ opened_bdev[drive] = bdev;
+
++ if (!(mode & (FMODE_READ|FMODE_WRITE))) {
++ res = -EINVAL;
++ goto out;
++ }
++
+ res = -ENXIO;
+
+ if (!floppy_track_buffer) {
+@@ -3706,15 +3711,13 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
+ if (UFDCS->rawcmd == 1)
+ UFDCS->rawcmd = 2;
+
+- if (mode & (FMODE_READ|FMODE_WRITE)) {
+- UDRS->last_checked = 0;
+- clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+- check_disk_change(bdev);
+- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+- goto out;
+- if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+- goto out;
+- }
++ UDRS->last_checked = 0;
++ clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
++ check_disk_change(bdev);
++ if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
++ goto out;
++ if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
++ goto out;
+
+ res = -EROFS;
+
+diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
+index ca5519c583d4..e7b98c4d49ad 100644
+--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
++++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
+@@ -91,6 +91,7 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
+ DEF_FIXED(".s1", CLK_S1, CLK_PLL1_DIV2, 3, 1),
+ DEF_FIXED(".s2", CLK_S2, CLK_PLL1_DIV2, 4, 1),
+ DEF_FIXED(".s3", CLK_S3, CLK_PLL1_DIV2, 6, 1),
++ DEF_FIXED(".sdsrc", CLK_SDSRC, CLK_PLL1_DIV2, 2, 1),
+
+ /* Core Clock Outputs */
+ DEF_FIXED("ztr", R8A7795_CLK_ZTR, CLK_PLL1_DIV2, 6, 1),
+@@ -109,10 +110,10 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
+ DEF_FIXED("s3d2", R8A7795_CLK_S3D2, CLK_S3, 2, 1),
+ DEF_FIXED("s3d4", R8A7795_CLK_S3D4, CLK_S3, 4, 1),
+
+- DEF_GEN3_SD("sd0", R8A7795_CLK_SD0, CLK_PLL1_DIV2, 0x0074),
+- DEF_GEN3_SD("sd1", R8A7795_CLK_SD1, CLK_PLL1_DIV2, 0x0078),
+- DEF_GEN3_SD("sd2", R8A7795_CLK_SD2, CLK_PLL1_DIV2, 0x0268),
+- DEF_GEN3_SD("sd3", R8A7795_CLK_SD3, CLK_PLL1_DIV2, 0x026c),
++ DEF_GEN3_SD("sd0", R8A7795_CLK_SD0, CLK_SDSRC, 0x0074),
++ DEF_GEN3_SD("sd1", R8A7795_CLK_SD1, CLK_SDSRC, 0x0078),
++ DEF_GEN3_SD("sd2", R8A7795_CLK_SD2, CLK_SDSRC, 0x0268),
++ DEF_GEN3_SD("sd3", R8A7795_CLK_SD3, CLK_SDSRC, 0x026c),
+
+ DEF_FIXED("cl", R8A7795_CLK_CL, CLK_PLL1_DIV2, 48, 1),
+ DEF_FIXED("cp", R8A7795_CLK_CP, CLK_EXTAL, 2, 1),
+diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
+index ea8189f4b021..6dc597126b79 100644
+--- a/drivers/crypto/caam/caamalg.c
++++ b/drivers/crypto/caam/caamalg.c
+@@ -441,6 +441,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
+ OP_ALG_AAI_CTR_MOD128);
+ const bool is_rfc3686 = alg->caam.rfc3686;
+
++ if (!ctx->authsize)
++ return 0;
++
+ /* NULL encryption / decryption */
+ if (!ctx->enckeylen)
+ return aead_null_set_sh_desc(aead);
+@@ -614,7 +617,7 @@ skip_enc:
+ keys_fit_inline = true;
+
+ /* aead_givencrypt shared descriptor */
+- desc = ctx->sh_desc_givenc;
++ desc = ctx->sh_desc_enc;
+
+ /* Note: Context registers are saved. */
+ init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
+@@ -645,13 +648,13 @@ copy_iv:
+ append_operation(desc, ctx->class2_alg_type |
+ OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
+
+- /* ivsize + cryptlen = seqoutlen - authsize */
+- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
+-
+ /* Read and write assoclen bytes */
+ append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+ append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
++ /* ivsize + cryptlen = seqoutlen - authsize */
++ append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
++
+ /* Skip assoc data */
+ append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+@@ -697,7 +700,7 @@ copy_iv:
+ ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
+ desc_bytes(desc),
+ DMA_TO_DEVICE);
+- if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) {
++ if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
+ dev_err(jrdev, "unable to map shared descriptor\n");
+ return -ENOMEM;
+ }
+diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
+index 5845d4a08797..e9703f9d15dd 100644
+--- a/drivers/crypto/caam/caamhash.c
++++ b/drivers/crypto/caam/caamhash.c
+@@ -1897,6 +1897,7 @@ caam_hash_alloc(struct caam_hash_template *template,
+ template->name);
+ snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+ template->driver_name);
++ t_alg->ahash_alg.setkey = NULL;
+ }
+ alg->cra_module = THIS_MODULE;
+ alg->cra_init = caam_hash_cra_init;
+diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
+index 0794f1cc0018..42f0f229f7f7 100644
+--- a/drivers/crypto/nx/nx.c
++++ b/drivers/crypto/nx/nx.c
+@@ -392,7 +392,7 @@ static void nx_of_update_msc(struct device *dev,
+ ((bytes_so_far + sizeof(struct msc_triplet)) <= lenp) &&
+ i < msc->triplets;
+ i++) {
+- if (msc->fc > NX_MAX_FC || msc->mode > NX_MAX_MODE) {
++ if (msc->fc >= NX_MAX_FC || msc->mode >= NX_MAX_MODE) {
+ dev_err(dev, "unknown function code/mode "
+ "combo: %d/%d (ignored)\n", msc->fc,
+ msc->mode);
+diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
+index 1e8852a8a057..4c9deef6a3e4 100644
+--- a/drivers/crypto/qat/qat_common/qat_algs.c
++++ b/drivers/crypto/qat/qat_common/qat_algs.c
+@@ -1260,8 +1260,8 @@ static struct crypto_alg qat_algs[] = { {
+ .setkey = qat_alg_ablkcipher_xts_setkey,
+ .decrypt = qat_alg_ablkcipher_decrypt,
+ .encrypt = qat_alg_ablkcipher_encrypt,
+- .min_keysize = AES_MIN_KEY_SIZE,
+- .max_keysize = AES_MAX_KEY_SIZE,
++ .min_keysize = 2 * AES_MIN_KEY_SIZE,
++ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ },
+ },
+diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
+index 55d510e36cd1..82e6743c48f8 100644
+--- a/drivers/dax/pmem.c
++++ b/drivers/dax/pmem.c
+@@ -118,6 +118,9 @@ static int dax_pmem_probe(struct device *dev)
+ return rc;
+ }
+
++ /* adjust the dax_region resource to the start of data */
++ res.start += le64_to_cpu(pfn_sb->dataoff);
++
+ nd_region = to_nd_region(dev->parent);
+ dax_region = alloc_dax_region(dev, nd_region->id, &res,
+ le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
+diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
+index 749f1bd5d65d..06ecdc38cee0 100644
+--- a/drivers/dma/sh/usb-dmac.c
++++ b/drivers/dma/sh/usb-dmac.c
+@@ -600,27 +600,30 @@ static irqreturn_t usb_dmac_isr_channel(int irq, void *dev)
+ {
+ struct usb_dmac_chan *chan = dev;
+ irqreturn_t ret = IRQ_NONE;
+- u32 mask = USB_DMACHCR_TE;
+- u32 check_bits = USB_DMACHCR_TE | USB_DMACHCR_SP;
++ u32 mask = 0;
+ u32 chcr;
++ bool xfer_end = false;
+
+ spin_lock(&chan->vc.lock);
+
+ chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+- if (chcr & check_bits)
+- mask |= USB_DMACHCR_DE | check_bits;
++ if (chcr & (USB_DMACHCR_TE | USB_DMACHCR_SP)) {
++ mask |= USB_DMACHCR_DE | USB_DMACHCR_TE | USB_DMACHCR_SP;
++ if (chcr & USB_DMACHCR_DE)
++ xfer_end = true;
++ ret |= IRQ_HANDLED;
++ }
+ if (chcr & USB_DMACHCR_NULL) {
+ /* An interruption of TE will happen after we set FTE */
+ mask |= USB_DMACHCR_NULL;
+ chcr |= USB_DMACHCR_FTE;
+ ret |= IRQ_HANDLED;
+ }
+- usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
++ if (mask)
++ usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
+
+- if (chcr & check_bits) {
++ if (xfer_end)
+ usb_dmac_isr_transfer_end(chan);
+- ret |= IRQ_HANDLED;
+- }
+
+ spin_unlock(&chan->vc.lock);
+
+diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
+index 4fb2eb7c800d..ce0067b7a2f6 100644
+--- a/drivers/edac/sb_edac.c
++++ b/drivers/edac/sb_edac.c
+@@ -552,9 +552,9 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
+ /* Knight's Landing Support */
+ /*
+ * KNL's memory channels are swizzled between memory controllers.
+- * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2
++ * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2
+ */
+-#define knl_channel_remap(channel) ((channel + 3) % 6)
++#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3)
+
+ /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
+ #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840
+@@ -1286,7 +1286,7 @@ static u32 knl_get_mc_route(int entry, u32 reg)
+ mc = GET_BITFIELD(reg, entry*3, (entry*3)+2);
+ chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1);
+
+- return knl_channel_remap(mc*3 + chan);
++ return knl_channel_remap(mc, chan);
+ }
+
+ /*
+@@ -2997,8 +2997,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
+ } else {
+ char A = *("A");
+
+- channel = knl_channel_remap(channel);
++ /*
++ * Reported channel is in range 0-2, so we can't map it
++ * back to mc. To figure out mc we check machine check
++ * bank register that reported this error.
++ * bank15 means mc0 and bank16 means mc1.
++ */
++ channel = knl_channel_remap(m->bank == 16, channel);
+ channel_mask = 1 << channel;
++
+ snprintf(msg, sizeof(msg),
+ "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
+ overflow ? " OVERFLOW" : "",
+diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
+index c99c24bc79b0..9ae6c116c474 100644
+--- a/drivers/firmware/efi/capsule-loader.c
++++ b/drivers/firmware/efi/capsule-loader.c
+@@ -16,6 +16,7 @@
+ #include <linux/slab.h>
+ #include <linux/mutex.h>
+ #include <linux/efi.h>
++#include <linux/vmalloc.h>
+
+ #define NO_FURTHER_WRITE_ACTION -1
+
+@@ -108,14 +109,15 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
+ int ret;
+ void *cap_hdr_temp;
+
+- cap_hdr_temp = kmap(cap_info->pages[0]);
++ cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
++ VM_MAP, PAGE_KERNEL);
+ if (!cap_hdr_temp) {
+- pr_debug("%s: kmap() failed\n", __func__);
++ pr_debug("%s: vmap() failed\n", __func__);
+ return -EFAULT;
+ }
+
+ ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
+- kunmap(cap_info->pages[0]);
++ vunmap(cap_hdr_temp);
+ if (ret) {
+ pr_err("%s: efi_capsule_update() failed\n", __func__);
+ return ret;
+diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
+index 53b9fd2293ee..6eedff45e6d7 100644
+--- a/drivers/firmware/efi/capsule.c
++++ b/drivers/firmware/efi/capsule.c
+@@ -190,9 +190,9 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule,
+ * map the capsule described by @capsule with its data in @pages and
+ * send it to the firmware via the UpdateCapsule() runtime service.
+ *
+- * @capsule must be a virtual mapping of the first page in @pages
+- * (@pages[0]) in the kernel address space. That is, a
+- * capsule_header_t that describes the entire contents of the capsule
++ * @capsule must be a virtual mapping of the complete capsule update in the
++ * kernel address space, as the capsule can be consumed immediately.
++ * A capsule_header_t that describes the entire contents of the capsule
+ * must be at the start of the first data page.
+ *
+ * Even though this function will validate that the firmware supports
+diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
+index d7860614f87f..5d457ff61325 100644
+--- a/drivers/gpio/Kconfig
++++ b/drivers/gpio/Kconfig
+@@ -50,6 +50,7 @@ config GPIO_DEVRES
+ config OF_GPIO
+ def_bool y
+ depends on OF
++ depends on HAS_IOMEM
+
+ config GPIO_ACPI
+ def_bool y
+diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c
+index 08807368f007..946d09195598 100644
+--- a/drivers/gpio/gpio-max730x.c
++++ b/drivers/gpio/gpio-max730x.c
+@@ -192,6 +192,10 @@ int __max730x_probe(struct max7301 *ts)
+ ts->chip.parent = dev;
+ ts->chip.owner = THIS_MODULE;
+
++ ret = gpiochip_add_data(&ts->chip, ts);
++ if (ret)
++ goto exit_destroy;
++
+ /*
+ * initialize pullups according to platform data and cache the
+ * register values for later use.
+@@ -213,10 +217,6 @@ int __max730x_probe(struct max7301 *ts)
+ }
+ }
+
+- ret = gpiochip_add_data(&ts->chip, ts);
+- if (ret)
+- goto exit_destroy;
+-
+ return ret;
+
+ exit_destroy:
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index e055d5be1c3c..56475b1f1581 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -415,6 +415,8 @@ struct amdgpu_mman {
+
+ /* custom LRU management */
+ struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE];
++ /* guard for log2_size array, don't add anything in between */
++ struct amdgpu_mman_lru guard;
+ };
+
+ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
+@@ -637,9 +639,9 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
+ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
+ int amdgpu_gart_init(struct amdgpu_device *adev);
+ void amdgpu_gart_fini(struct amdgpu_device *adev);
+-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
++void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+ int pages);
+-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
++int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+ int pages, struct page **pagelist,
+ dma_addr_t *dma_addr, uint32_t flags);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+index 983175363b06..fe872b82e619 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+@@ -321,6 +321,19 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
+ (le16_to_cpu(path->usConnObjectId) &
+ OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
+
++ /* Skip TV/CV support */
++ if ((le16_to_cpu(path->usDeviceTag) ==
++ ATOM_DEVICE_TV1_SUPPORT) ||
++ (le16_to_cpu(path->usDeviceTag) ==
++ ATOM_DEVICE_CV_SUPPORT))
++ continue;
++
++ if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
++ DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
++ con_obj_id, le16_to_cpu(path->usDeviceTag));
++ continue;
++ }
++
+ connector_type =
+ object_connector_convert[con_obj_id];
+ connector_object_id = con_obj_id;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+index 921bce2df0b0..0feea347f680 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+@@ -221,7 +221,7 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
+ * Unbinds the requested pages from the gart page table and
+ * replaces them with the dummy page (all asics).
+ */
+-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
++void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+ int pages)
+ {
+ unsigned t;
+@@ -268,7 +268,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+ * (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
+-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
++int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+ int pages, struct page **pagelist, dma_addr_t *dma_addr,
+ uint32_t flags)
+ {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+index 34e35423b78e..194cfc1a8cca 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+@@ -288,7 +288,7 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
+ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
+ {
+ unsigned i;
+- int r;
++ int r, ret = 0;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+@@ -309,10 +309,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
+ } else {
+ /* still not good, but we can live with it */
+ DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
++ ret = r;
+ }
+ }
+ }
+- return 0;
++ return ret;
+ }
+
+ /*
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index 3b9053af4762..46c5297f6dfe 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -251,8 +251,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+
+ adev = amdgpu_get_adev(bo->bdev);
+ ring = adev->mman.buffer_funcs_ring;
+- old_start = old_mem->start << PAGE_SHIFT;
+- new_start = new_mem->start << PAGE_SHIFT;
++ old_start = (u64)old_mem->start << PAGE_SHIFT;
++ new_start = (u64)new_mem->start << PAGE_SHIFT;
+
+ switch (old_mem->mem_type) {
+ case TTM_PL_VRAM:
+@@ -943,6 +943,8 @@ static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
+ struct list_head *res = lru->lru[tbo->mem.mem_type];
+
+ lru->lru[tbo->mem.mem_type] = &tbo->lru;
++ while ((++lru)->lru[tbo->mem.mem_type] == res)
++ lru->lru[tbo->mem.mem_type] = &tbo->lru;
+
+ return res;
+ }
+@@ -953,6 +955,8 @@ static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
+ struct list_head *res = lru->swap_lru;
+
+ lru->swap_lru = &tbo->swap;
++ while ((++lru)->swap_lru == res)
++ lru->swap_lru = &tbo->swap;
+
+ return res;
+ }
+@@ -1004,6 +1008,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
+ lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
+ }
+
++ for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
++ adev->mman.guard.lru[j] = NULL;
++ adev->mman.guard.swap_lru = NULL;
++
+ adev->mman.initialized = true;
+ r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
+ adev->mc.real_vram_size >> PAGE_SHIFT);
+diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+index 9dc4e24e31e7..3a1bbe2d87ba 100644
+--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
++++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+@@ -52,6 +52,7 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
+ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
+ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
+ static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
++static int cik_sdma_soft_reset(void *handle);
+
+ MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
+ MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
+@@ -1051,6 +1052,8 @@ static int cik_sdma_resume(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ cik_sdma_soft_reset(handle);
++
+ return cik_sdma_hw_init(adev);
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+index fc8ff4d3ccf8..6b40809bbed5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+@@ -2777,8 +2777,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+ u64 wb_gpu_addr;
+ u32 *buf;
+ struct bonaire_mqd *mqd;
+-
+- gfx_v7_0_cp_compute_enable(adev, true);
++ struct amdgpu_ring *ring;
+
+ /* fix up chicken bits */
+ tmp = RREG32(mmCP_CPF_DEBUG);
+@@ -2813,7 +2812,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+
+ /* init the queues. Just two for now. */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+- struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
++ ring = &adev->gfx.compute_ring[i];
+
+ if (ring->mqd_obj == NULL) {
+ r = amdgpu_bo_create(adev,
+@@ -2992,6 +2991,13 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+ amdgpu_bo_unreserve(ring->mqd_obj);
+
+ ring->ready = true;
++ }
++
++ gfx_v7_0_cp_compute_enable(adev, true);
++
++ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
++ ring = &adev->gfx.compute_ring[i];
++
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ ring->ready = false;
+diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
+index 85c4debf47e0..fd3553bebab2 100644
+--- a/drivers/gpu/drm/i915/i915_drv.c
++++ b/drivers/gpu/drm/i915/i915_drv.c
+@@ -1578,6 +1578,9 @@ static int intel_runtime_suspend(struct device *device)
+
+ assert_forcewakes_inactive(dev_priv);
+
++ if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv))
++ intel_hpd_poll_init(dev_priv);
++
+ DRM_DEBUG_KMS("Device suspended\n");
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index 227a63ee0067..0ed5fd3b1804 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -281,6 +281,9 @@ struct i915_hotplug {
+ u32 short_port_mask;
+ struct work_struct dig_port_work;
+
++ struct work_struct poll_init_work;
++ bool poll_enabled;
++
+ /*
+ * if we get a HPD irq from DP and a HPD irq from non-DP
+ * the non-DP HPD could block the workqueue on a mode config
+@@ -2791,6 +2794,8 @@ void intel_hpd_init(struct drm_i915_private *dev_priv);
+ void intel_hpd_init_work(struct drm_i915_private *dev_priv);
+ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
+ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port);
++bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin);
++void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin);
+
+ /* i915_irq.c */
+ void i915_queue_hangcheck(struct drm_device *dev);
+diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
+index 92acdff9dad3..e856f7906a48 100644
+--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
+@@ -2826,6 +2826,7 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev)
+ struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+
+ ppgtt->base.cleanup(&ppgtt->base);
++ kfree(ppgtt);
+ }
+
+ i915_gem_cleanup_stolen(dev);
+diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
+index bc3b6dde7b4b..5369a6d87fd8 100644
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -1522,6 +1522,7 @@ enum skl_disp_power_wells {
+ #define BALANCE_LEG_MASK(port) (7<<(8+3*(port)))
+ /* Balance leg disable bits */
+ #define BALANCE_LEG_DISABLE_SHIFT 23
++#define BALANCE_LEG_DISABLE(port) (1 << (23 + (port)))
+
+ /*
+ * Fence registers
+diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
+index 02a7527ce7bb..74eca43b2818 100644
+--- a/drivers/gpu/drm/i915/intel_audio.c
++++ b/drivers/gpu/drm/i915/intel_audio.c
+@@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
+ if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
+ return;
+
++ i915_audio_component_get_power(dev);
++
+ /*
+ * Enable/disable generating the codec wake signal, overriding the
+ * internal logic to generate the codec wake to controller.
+@@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
+ I915_WRITE(HSW_AUD_CHICKENBIT, tmp);
+ usleep_range(1000, 1500);
+ }
++
++ i915_audio_component_put_power(dev);
+ }
+
+ /* Get CDCLK in kHz */
+@@ -654,6 +658,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
+ !IS_HASWELL(dev_priv))
+ return 0;
+
++ i915_audio_component_get_power(dev);
+ mutex_lock(&dev_priv->av_mutex);
+ /* 1. get the pipe */
+ intel_encoder = dev_priv->dig_port_map[port];
+@@ -704,6 +709,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
+
+ unlock:
+ mutex_unlock(&dev_priv->av_mutex);
++ i915_audio_component_put_power(dev);
+ return err;
+ }
+
+diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
+index 3fbb6fc66451..a3f87d66829d 100644
+--- a/drivers/gpu/drm/i915/intel_crt.c
++++ b/drivers/gpu/drm/i915/intel_crt.c
+@@ -327,10 +327,25 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector)
+ struct drm_device *dev = connector->dev;
+ struct intel_crt *crt = intel_attached_crt(connector);
+ struct drm_i915_private *dev_priv = dev->dev_private;
++ bool reenable_hpd;
+ u32 adpa;
+ bool ret;
+ u32 save_adpa;
+
++ /*
++ * Doing a force trigger causes a hpd interrupt to get sent, which can
++ * get us stuck in a loop if we're polling:
++ * - We enable power wells and reset the ADPA
++ * - output_poll_exec does force probe on VGA, triggering a hpd
++ * - HPD handler waits for poll to unlock dev->mode_config.mutex
++ * - output_poll_exec shuts off the ADPA, unlocks
++ * dev->mode_config.mutex
++ * - HPD handler runs, resets ADPA and brings us back to the start
++ *
++ * Just disable HPD interrupts here to prevent this
++ */
++ reenable_hpd = intel_hpd_disable(dev_priv, crt->base.hpd_pin);
++
+ save_adpa = adpa = I915_READ(crt->adpa_reg);
+ DRM_DEBUG_KMS("trigger hotplug detect cycle: adpa=0x%x\n", adpa);
+
+@@ -353,6 +368,9 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector)
+
+ DRM_DEBUG_KMS("valleyview hotplug adpa=0x%x, result %d\n", adpa, ret);
+
++ if (reenable_hpd)
++ intel_hpd_enable(dev_priv, crt->base.hpd_pin);
++
+ return ret;
+ }
+
+@@ -713,11 +731,11 @@ static int intel_crt_set_property(struct drm_connector *connector,
+ return 0;
+ }
+
+-static void intel_crt_reset(struct drm_connector *connector)
++void intel_crt_reset(struct drm_encoder *encoder)
+ {
+- struct drm_device *dev = connector->dev;
++ struct drm_device *dev = encoder->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+- struct intel_crt *crt = intel_attached_crt(connector);
++ struct intel_crt *crt = intel_encoder_to_crt(to_intel_encoder(encoder));
+
+ if (INTEL_INFO(dev)->gen >= 5) {
+ u32 adpa;
+@@ -739,7 +757,6 @@ static void intel_crt_reset(struct drm_connector *connector)
+ */
+
+ static const struct drm_connector_funcs intel_crt_connector_funcs = {
+- .reset = intel_crt_reset,
+ .dpms = drm_atomic_helper_connector_dpms,
+ .detect = intel_crt_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+@@ -757,6 +774,7 @@ static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs
+ };
+
+ static const struct drm_encoder_funcs intel_crt_enc_funcs = {
++ .reset = intel_crt_reset,
+ .destroy = intel_encoder_destroy,
+ };
+
+@@ -902,5 +920,5 @@ void intel_crt_init(struct drm_device *dev)
+ dev_priv->fdi_rx_config = I915_READ(FDI_RX_CTL(PIPE_A)) & fdi_config;
+ }
+
+- intel_crt_reset(connector);
++ intel_crt_reset(&crt->base.base);
+ }
+diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
+index 01e523df363b..12c4f4356fd9 100644
+--- a/drivers/gpu/drm/i915/intel_ddi.c
++++ b/drivers/gpu/drm/i915/intel_ddi.c
+@@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = {
+ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
+ { 0x0000201B, 0x000000A2, 0x0 },
+ { 0x00005012, 0x00000088, 0x0 },
+- { 0x80007011, 0x000000CD, 0x0 },
++ { 0x80007011, 0x000000CD, 0x1 },
+ { 0x80009010, 0x000000C0, 0x1 },
+ { 0x0000201B, 0x0000009D, 0x0 },
+ { 0x80005012, 0x000000C0, 0x1 },
+@@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
+ static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = {
+ { 0x00000018, 0x000000A2, 0x0 },
+ { 0x00005012, 0x00000088, 0x0 },
+- { 0x80007011, 0x000000CD, 0x0 },
++ { 0x80007011, 0x000000CD, 0x3 },
+ { 0x80009010, 0x000000C0, 0x3 },
+ { 0x00000018, 0x0000009D, 0x0 },
+ { 0x80005012, 0x000000C0, 0x3 },
+@@ -388,6 +388,40 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries)
+ }
+ }
+
++static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
++{
++ int n_hdmi_entries;
++ int hdmi_level;
++ int hdmi_default_entry;
++
++ hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
++
++ if (IS_BROXTON(dev_priv))
++ return hdmi_level;
++
++ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
++ skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
++ hdmi_default_entry = 8;
++ } else if (IS_BROADWELL(dev_priv)) {
++ n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
++ hdmi_default_entry = 7;
++ } else if (IS_HASWELL(dev_priv)) {
++ n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
++ hdmi_default_entry = 6;
++ } else {
++ WARN(1, "ddi translation table missing\n");
++ n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
++ hdmi_default_entry = 7;
++ }
++
++ /* Choose a good default if VBT is badly populated */
++ if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
++ hdmi_level >= n_hdmi_entries)
++ hdmi_level = hdmi_default_entry;
++
++ return hdmi_level;
++}
++
+ /*
+ * Starting with Haswell, DDI port buffers must be programmed with correct
+ * values in advance. The buffer values are different for FDI and DP modes,
+@@ -399,7 +433,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
+ {
+ struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+ u32 iboost_bit = 0;
+- int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry,
++ int i, n_hdmi_entries, n_dp_entries, n_edp_entries,
+ size;
+ int hdmi_level;
+ enum port port;
+@@ -410,7 +444,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
+ const struct ddi_buf_trans *ddi_translations;
+
+ port = intel_ddi_get_encoder_port(encoder);
+- hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
++ hdmi_level = intel_ddi_hdmi_level(dev_priv, port);
+
+ if (IS_BROXTON(dev_priv)) {
+ if (encoder->type != INTEL_OUTPUT_HDMI)
+@@ -430,7 +464,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
+ skl_get_buf_trans_edp(dev_priv, &n_edp_entries);
+ ddi_translations_hdmi =
+ skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
+- hdmi_default_entry = 8;
+ /* If we're boosting the current, set bit 31 of trans1 */
+ if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level ||
+ dev_priv->vbt.ddi_port_info[port].dp_boost_level)
+@@ -456,7 +489,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
+
+ n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
+ n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+- hdmi_default_entry = 7;
+ } else if (IS_HASWELL(dev_priv)) {
+ ddi_translations_fdi = hsw_ddi_translations_fdi;
+ ddi_translations_dp = hsw_ddi_translations_dp;
+@@ -464,7 +496,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
+ ddi_translations_hdmi = hsw_ddi_translations_hdmi;
+ n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp);
+ n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
+- hdmi_default_entry = 6;
+ } else {
+ WARN(1, "ddi translation table missing\n");
+ ddi_translations_edp = bdw_ddi_translations_dp;
+@@ -474,7 +505,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
+ n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
+ n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
+ n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+- hdmi_default_entry = 7;
+ }
+
+ switch (encoder->type) {
+@@ -505,11 +535,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
+ if (encoder->type != INTEL_OUTPUT_HDMI)
+ return;
+
+- /* Choose a good default if VBT is badly populated */
+- if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
+- hdmi_level >= n_hdmi_entries)
+- hdmi_level = hdmi_default_entry;
+-
+ /* Entry 9 is for HDMI: */
+ I915_WRITE(DDI_BUF_TRANS_LO(port, i),
+ ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit);
+@@ -1371,14 +1396,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc)
+ TRANS_CLK_SEL_DISABLED);
+ }
+
+-static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
+- u32 level, enum port port, int type)
++static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
++ enum port port, uint8_t iboost)
+ {
++ u32 tmp;
++
++ tmp = I915_READ(DISPIO_CR_TX_BMU_CR0);
++ tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port));
++ if (iboost)
++ tmp |= iboost << BALANCE_LEG_SHIFT(port);
++ else
++ tmp |= BALANCE_LEG_DISABLE(port);
++ I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp);
++}
++
++static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level)
++{
++ struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
++ struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
++ enum port port = intel_dig_port->port;
++ int type = encoder->type;
+ const struct ddi_buf_trans *ddi_translations;
+ uint8_t iboost;
+ uint8_t dp_iboost, hdmi_iboost;
+ int n_entries;
+- u32 reg;
+
+ /* VBT may override standard boost values */
+ dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level;
+@@ -1420,16 +1461,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
+ return;
+ }
+
+- reg = I915_READ(DISPIO_CR_TX_BMU_CR0);
+- reg &= ~BALANCE_LEG_MASK(port);
+- reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port));
+-
+- if (iboost)
+- reg |= iboost << BALANCE_LEG_SHIFT(port);
+- else
+- reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port);
++ _skl_ddi_set_iboost(dev_priv, port, iboost);
+
+- I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg);
++ if (port == PORT_A && intel_dig_port->max_lanes == 4)
++ _skl_ddi_set_iboost(dev_priv, PORT_E, iboost);
+ }
+
+ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv,
+@@ -1560,7 +1595,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
+ level = translate_signal_level(signal_levels);
+
+ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+- skl_ddi_set_iboost(dev_priv, level, port, encoder->type);
++ skl_ddi_set_iboost(encoder, level);
+ else if (IS_BROXTON(dev_priv))
+ bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type);
+
+@@ -1629,6 +1664,10 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
+ intel_dp_stop_link_train(intel_dp);
+ } else if (type == INTEL_OUTPUT_HDMI) {
+ struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
++ int level = intel_ddi_hdmi_level(dev_priv, port);
++
++ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
++ skl_ddi_set_iboost(intel_encoder, level);
+
+ intel_hdmi->set_infoframes(encoder,
+ crtc->config->has_hdmi_sink,
+diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
+index f7f0f01814f6..94144a70b225 100644
+--- a/drivers/gpu/drm/i915/intel_drv.h
++++ b/drivers/gpu/drm/i915/intel_drv.h
+@@ -1052,7 +1052,7 @@ void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv,
+
+ /* intel_crt.c */
+ void intel_crt_init(struct drm_device *dev);
+-
++void intel_crt_reset(struct drm_encoder *encoder);
+
+ /* intel_ddi.c */
+ void intel_ddi_clk_select(struct intel_encoder *encoder,
+@@ -1346,6 +1346,8 @@ void intel_dsi_init(struct drm_device *dev);
+
+ /* intel_dvo.c */
+ void intel_dvo_init(struct drm_device *dev);
++/* intel_hotplug.c */
++void intel_hpd_poll_init(struct drm_i915_private *dev_priv);
+
+
+ /* legacy fbdev emulation in intel_fbdev.c */
+diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
+index bee673005d48..2c49458a9316 100644
+--- a/drivers/gpu/drm/i915/intel_hotplug.c
++++ b/drivers/gpu/drm/i915/intel_hotplug.c
+@@ -453,20 +453,47 @@ void intel_hpd_irq_handler(struct drm_device *dev,
+ *
+ * This is a separate step from interrupt enabling to simplify the locking rules
+ * in the driver load and resume code.
++ *
++ * Also see: intel_hpd_poll_init(), which enables connector polling
+ */
+ void intel_hpd_init(struct drm_i915_private *dev_priv)
+ {
+- struct drm_device *dev = dev_priv->dev;
+- struct drm_mode_config *mode_config = &dev->mode_config;
+- struct drm_connector *connector;
+ int i;
+
+ for_each_hpd_pin(i) {
+ dev_priv->hotplug.stats[i].count = 0;
+ dev_priv->hotplug.stats[i].state = HPD_ENABLED;
+ }
++
++ WRITE_ONCE(dev_priv->hotplug.poll_enabled, false);
++ schedule_work(&dev_priv->hotplug.poll_init_work);
++
++ /*
++ * Interrupt setup is already guaranteed to be single-threaded, this is
++ * just to make the assert_spin_locked checks happy.
++ */
++ spin_lock_irq(&dev_priv->irq_lock);
++ if (dev_priv->display.hpd_irq_setup)
++ dev_priv->display.hpd_irq_setup(dev_priv->dev);
++ spin_unlock_irq(&dev_priv->irq_lock);
++}
++
++void i915_hpd_poll_init_work(struct work_struct *work) {
++ struct drm_i915_private *dev_priv =
++ container_of(work, struct drm_i915_private,
++ hotplug.poll_init_work);
++ struct drm_device *dev = dev_priv->dev;
++ struct drm_mode_config *mode_config = &dev->mode_config;
++ struct drm_connector *connector;
++ bool enabled;
++
++ mutex_lock(&dev->mode_config.mutex);
++
++ enabled = READ_ONCE(dev_priv->hotplug.poll_enabled);
++
+ list_for_each_entry(connector, &mode_config->connector_list, head) {
+- struct intel_connector *intel_connector = to_intel_connector(connector);
++ struct intel_connector *intel_connector =
++ to_intel_connector(connector);
+ connector->polled = intel_connector->polled;
+
+ /* MST has a dynamic intel_connector->encoder and it's reprobing
+@@ -475,24 +502,62 @@ void intel_hpd_init(struct drm_i915_private *dev_priv)
+ continue;
+
+ if (!connector->polled && I915_HAS_HOTPLUG(dev) &&
+- intel_connector->encoder->hpd_pin > HPD_NONE)
+- connector->polled = DRM_CONNECTOR_POLL_HPD;
++ intel_connector->encoder->hpd_pin > HPD_NONE) {
++ connector->polled = enabled ?
++ DRM_CONNECTOR_POLL_CONNECT |
++ DRM_CONNECTOR_POLL_DISCONNECT :
++ DRM_CONNECTOR_POLL_HPD;
++ }
+ }
+
++ if (enabled)
++ drm_kms_helper_poll_enable_locked(dev);
++
++ mutex_unlock(&dev->mode_config.mutex);
++
+ /*
+- * Interrupt setup is already guaranteed to be single-threaded, this is
+- * just to make the assert_spin_locked checks happy.
++ * We might have missed any hotplugs that happened while we were
++ * in the middle of disabling polling
+ */
+- spin_lock_irq(&dev_priv->irq_lock);
+- if (dev_priv->display.hpd_irq_setup)
+- dev_priv->display.hpd_irq_setup(dev);
+- spin_unlock_irq(&dev_priv->irq_lock);
++ if (!enabled)
++ drm_helper_hpd_irq_event(dev);
++}
++
++/**
++ * intel_hpd_poll_init - enables/disables polling for connectors with hpd
++ * @dev_priv: i915 device instance
++ * @enabled: Whether to enable or disable polling
++ *
++ * This function enables polling for all connectors, regardless of whether or
++ * not they support hotplug detection. Under certain conditions HPD may not be
++ * functional. On most Intel GPUs, this happens when we enter runtime suspend.
++ * On Valleyview and Cherryview systems, this also happens when we shut off all
++ * of the powerwells.
++ *
++ * Since this function can get called in contexts where we're already holding
++ * dev->mode_config.mutex, we do the actual hotplug enabling in a seperate
++ * worker.
++ *
++ * Also see: intel_hpd_init(), which restores hpd handling.
++ */
++void intel_hpd_poll_init(struct drm_i915_private *dev_priv)
++{
++ WRITE_ONCE(dev_priv->hotplug.poll_enabled, true);
++
++ /*
++ * We might already be holding dev->mode_config.mutex, so do this in a
++ * seperate worker
++ * As well, there's no issue if we race here since we always reschedule
++ * this worker anyway
++ */
++ schedule_work(&dev_priv->hotplug.poll_init_work);
+ }
+
+ void intel_hpd_init_work(struct drm_i915_private *dev_priv)
+ {
+ INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func);
+ INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
++ INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
+ INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
+ intel_hpd_irq_storm_reenable_work);
+ }
+@@ -509,5 +574,33 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv)
+
+ cancel_work_sync(&dev_priv->hotplug.dig_port_work);
+ cancel_work_sync(&dev_priv->hotplug.hotplug_work);
++ cancel_work_sync(&dev_priv->hotplug.poll_init_work);
+ cancel_delayed_work_sync(&dev_priv->hotplug.reenable_work);
+ }
++
++bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin)
++{
++ bool ret = false;
++
++ if (pin == HPD_NONE)
++ return false;
++
++ spin_lock_irq(&dev_priv->irq_lock);
++ if (dev_priv->hotplug.stats[pin].state == HPD_ENABLED) {
++ dev_priv->hotplug.stats[pin].state = HPD_DISABLED;
++ ret = true;
++ }
++ spin_unlock_irq(&dev_priv->irq_lock);
++
++ return ret;
++}
++
++void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin)
++{
++ if (pin == HPD_NONE)
++ return;
++
++ spin_lock_irq(&dev_priv->irq_lock);
++ dev_priv->hotplug.stats[pin].state = HPD_ENABLED;
++ spin_unlock_irq(&dev_priv->irq_lock);
++}
+diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
+index 7fb1da4e7fc3..2592b39ff43b 100644
+--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
++++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
+@@ -952,6 +952,7 @@ static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
+
+ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv)
+ {
++ struct intel_encoder *encoder;
+ enum pipe pipe;
+
+ /*
+@@ -987,6 +988,12 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv)
+
+ intel_hpd_init(dev_priv);
+
++ /* Re-enable the ADPA, if we have one */
++ for_each_intel_encoder(dev_priv->dev, encoder) {
++ if (encoder->type == INTEL_OUTPUT_ANALOG)
++ intel_crt_reset(&encoder->base);
++ }
++
+ i915_redisable_vga_power_on(dev_priv->dev);
+ }
+
+@@ -1000,6 +1007,8 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv)
+ synchronize_irq(dev_priv->dev->irq);
+
+ vlv_power_sequencer_reset(dev_priv);
++
++ intel_hpd_poll_init(dev_priv);
+ }
+
+ static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv,
+diff --git a/drivers/hwmon/iio_hwmon.c b/drivers/hwmon/iio_hwmon.c
+index b550ba5fa58a..89449871bca7 100644
+--- a/drivers/hwmon/iio_hwmon.c
++++ b/drivers/hwmon/iio_hwmon.c
+@@ -110,24 +110,24 @@ static int iio_hwmon_probe(struct platform_device *pdev)
+
+ switch (type) {
+ case IIO_VOLTAGE:
+- a->dev_attr.attr.name = kasprintf(GFP_KERNEL,
+- "in%d_input",
+- in_i++);
++ a->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL,
++ "in%d_input",
++ in_i++);
+ break;
+ case IIO_TEMP:
+- a->dev_attr.attr.name = kasprintf(GFP_KERNEL,
+- "temp%d_input",
+- temp_i++);
++ a->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL,
++ "temp%d_input",
++ temp_i++);
+ break;
+ case IIO_CURRENT:
+- a->dev_attr.attr.name = kasprintf(GFP_KERNEL,
+- "curr%d_input",
+- curr_i++);
++ a->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL,
++ "curr%d_input",
++ curr_i++);
+ break;
+ case IIO_HUMIDITYRELATIVE:
+- a->dev_attr.attr.name = kasprintf(GFP_KERNEL,
+- "humidity%d_input",
+- humidity_i++);
++ a->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL,
++ "humidity%d_input",
++ humidity_i++);
+ break;
+ default:
+ ret = -EINVAL;
+diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
+index 730d84028260..55bf479349ff 100644
+--- a/drivers/hwmon/it87.c
++++ b/drivers/hwmon/it87.c
+@@ -2015,6 +2015,7 @@ static struct attribute *it87_attributes_in[] = {
+ &sensor_dev_attr_in10_input.dev_attr.attr, /* 41 */
+ &sensor_dev_attr_in11_input.dev_attr.attr, /* 41 */
+ &sensor_dev_attr_in12_input.dev_attr.attr, /* 41 */
++ NULL
+ };
+
+ static const struct attribute_group it87_group_in = {
+diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+index a0d95ff682ae..2d5ff86398d0 100644
+--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
++++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+@@ -215,7 +215,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
+ msg->outsize = request_len;
+ msg->insize = response_len;
+
+- result = cros_ec_cmd_xfer(bus->ec, msg);
++ result = cros_ec_cmd_xfer_status(bus->ec, msg);
+ if (result < 0) {
+ dev_err(dev, "Error transferring EC i2c message %d\n", result);
+ goto exit;
+diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c
+index 8de073aed001..215ac87f606d 100644
+--- a/drivers/i2c/muxes/i2c-demux-pinctrl.c
++++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c
+@@ -68,7 +68,7 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
+ adap = of_find_i2c_adapter_by_node(priv->chan[new_chan].parent_np);
+ if (!adap) {
+ ret = -ENODEV;
+- goto err;
++ goto err_with_revert;
+ }
+
+ p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name);
+@@ -103,6 +103,8 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
+
+ err_with_put:
+ i2c_put_adapter(adap);
++ err_with_revert:
++ of_changeset_revert(&priv->chan[new_chan].chgset);
+ err:
+ dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret);
+ return ret;
+diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
+index 90462fcf5436..49bf9c59f117 100644
+--- a/drivers/iio/industrialio-buffer.c
++++ b/drivers/iio/industrialio-buffer.c
+@@ -107,6 +107,7 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf,
+ {
+ struct iio_dev *indio_dev = filp->private_data;
+ struct iio_buffer *rb = indio_dev->buffer;
++ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ size_t datum_size;
+ size_t to_wait;
+ int ret;
+@@ -131,19 +132,29 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf,
+ else
+ to_wait = min_t(size_t, n / datum_size, rb->watermark);
+
++ add_wait_queue(&rb->pollq, &wait);
+ do {
+- ret = wait_event_interruptible(rb->pollq,
+- iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size));
+- if (ret)
+- return ret;
++ if (!indio_dev->info) {
++ ret = -ENODEV;
++ break;
++ }
+
+- if (!indio_dev->info)
+- return -ENODEV;
++ if (!iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)) {
++ if (signal_pending(current)) {
++ ret = -ERESTARTSYS;
++ break;
++ }
++
++ wait_woken(&wait, TASK_INTERRUPTIBLE,
++ MAX_SCHEDULE_TIMEOUT);
++ continue;
++ }
+
+ ret = rb->access->read_first_n(rb, n, buf);
+ if (ret == 0 && (filp->f_flags & O_NONBLOCK))
+ ret = -EAGAIN;
+ } while (ret == 0);
++ remove_wait_queue(&rb->pollq, &wait);
+
+ return ret;
+ }
+diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
+index acc5394afb03..29485bc4221c 100644
+--- a/drivers/input/keyboard/tegra-kbc.c
++++ b/drivers/input/keyboard/tegra-kbc.c
+@@ -376,7 +376,7 @@ static int tegra_kbc_start(struct tegra_kbc *kbc)
+ /* Reset the KBC controller to clear all previous status.*/
+ reset_control_assert(kbc->rst);
+ udelay(100);
+- reset_control_assert(kbc->rst);
++ reset_control_deassert(kbc->rst);
+ udelay(100);
+
+ tegra_kbc_config_pins(kbc);
+diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
+index faa295ec4f31..c83bce89028b 100644
+--- a/drivers/input/rmi4/rmi_driver.c
++++ b/drivers/input/rmi4/rmi_driver.c
+@@ -553,7 +553,6 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr,
+ goto free_struct_buff;
+
+ reg = find_first_bit(rdesc->presense_map, RMI_REG_DESC_PRESENSE_BITS);
+- map_offset = 0;
+ for (i = 0; i < rdesc->num_registers; i++) {
+ struct rmi_register_desc_item *item = &rdesc->registers[i];
+ int reg_size = struct_buf[offset];
+@@ -576,6 +575,8 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr,
+ item->reg = reg;
+ item->reg_size = reg_size;
+
++ map_offset = 0;
++
+ do {
+ for (b = 0; b < 7; b++) {
+ if (struct_buf[offset] & (0x1 << b))
+diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
+index 454195709a82..405252a884dd 100644
+--- a/drivers/input/serio/i8042.c
++++ b/drivers/input/serio/i8042.c
+@@ -1277,6 +1277,7 @@ static int __init i8042_create_kbd_port(void)
+ serio->start = i8042_start;
+ serio->stop = i8042_stop;
+ serio->close = i8042_port_close;
++ serio->ps2_cmd_mutex = &i8042_mutex;
+ serio->port_data = port;
+ serio->dev.parent = &i8042_platform_device->dev;
+ strlcpy(serio->name, "i8042 KBD port", sizeof(serio->name));
+@@ -1304,6 +1305,7 @@ static int __init i8042_create_aux_port(int idx)
+ serio->write = i8042_aux_write;
+ serio->start = i8042_start;
+ serio->stop = i8042_stop;
++ serio->ps2_cmd_mutex = &i8042_mutex;
+ serio->port_data = port;
+ serio->dev.parent = &i8042_platform_device->dev;
+ if (idx < 0) {
+@@ -1373,21 +1375,6 @@ static void i8042_unregister_ports(void)
+ }
+ }
+
+-/*
+- * Checks whether port belongs to i8042 controller.
+- */
+-bool i8042_check_port_owner(const struct serio *port)
+-{
+- int i;
+-
+- for (i = 0; i < I8042_NUM_PORTS; i++)
+- if (i8042_ports[i].serio == port)
+- return true;
+-
+- return false;
+-}
+-EXPORT_SYMBOL(i8042_check_port_owner);
+-
+ static void i8042_free_irqs(void)
+ {
+ if (i8042_aux_irq_registered)
+diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
+index 316f2c897101..83e9c663aa67 100644
+--- a/drivers/input/serio/libps2.c
++++ b/drivers/input/serio/libps2.c
+@@ -56,19 +56,17 @@ EXPORT_SYMBOL(ps2_sendbyte);
+
+ void ps2_begin_command(struct ps2dev *ps2dev)
+ {
+- mutex_lock(&ps2dev->cmd_mutex);
++ struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex;
+
+- if (i8042_check_port_owner(ps2dev->serio))
+- i8042_lock_chip();
++ mutex_lock(m);
+ }
+ EXPORT_SYMBOL(ps2_begin_command);
+
+ void ps2_end_command(struct ps2dev *ps2dev)
+ {
+- if (i8042_check_port_owner(ps2dev->serio))
+- i8042_unlock_chip();
++ struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex;
+
+- mutex_unlock(&ps2dev->cmd_mutex);
++ mutex_unlock(m);
+ }
+ EXPORT_SYMBOL(ps2_end_command);
+
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 5f6b3bcab078..46ba2b64b6b3 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -879,7 +879,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
+ * We may have concurrent producers, so we need to be careful
+ * not to touch any of the shadow cmdq state.
+ */
+- queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
++ queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
+ dev_err(smmu->dev, "skipping command in error state:\n");
+ for (i = 0; i < ARRAY_SIZE(cmd); ++i)
+ dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
+@@ -890,7 +890,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
+ return;
+ }
+
+- queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
++ queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
+ }
+
+ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+@@ -1034,6 +1034,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
+ case STRTAB_STE_0_CFG_S2_TRANS:
+ ste_live = true;
+ break;
++ case STRTAB_STE_0_CFG_ABORT:
++ if (disable_bypass)
++ break;
+ default:
+ BUG(); /* STE corruption */
+ }
+diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
+index 9345a3fcb706..31422d440d07 100644
+--- a/drivers/iommu/arm-smmu.c
++++ b/drivers/iommu/arm-smmu.c
+@@ -686,8 +686,7 @@ static struct iommu_gather_ops arm_smmu_gather_ops = {
+
+ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
+ {
+- int flags, ret;
+- u32 fsr, fsynr, resume;
++ u32 fsr, fsynr;
+ unsigned long iova;
+ struct iommu_domain *domain = dev;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+@@ -701,34 +700,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
+ if (!(fsr & FSR_FAULT))
+ return IRQ_NONE;
+
+- if (fsr & FSR_IGN)
+- dev_err_ratelimited(smmu->dev,
+- "Unexpected context fault (fsr 0x%x)\n",
+- fsr);
+-
+ fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
+- flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
+-
+ iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+- if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
+- ret = IRQ_HANDLED;
+- resume = RESUME_RETRY;
+- } else {
+- dev_err_ratelimited(smmu->dev,
+- "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+- iova, fsynr, cfg->cbndx);
+- ret = IRQ_NONE;
+- resume = RESUME_TERMINATE;
+- }
+-
+- /* Clear the faulting FSR */
+- writel(fsr, cb_base + ARM_SMMU_CB_FSR);
+
+- /* Retry or terminate any stalled transactions */
+- if (fsr & FSR_SS)
+- writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
++ dev_err_ratelimited(smmu->dev,
++ "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
++ fsr, iova, fsynr, cfg->cbndx);
+
+- return ret;
++ writel(fsr, cb_base + ARM_SMMU_CB_FSR);
++ return IRQ_HANDLED;
+ }
+
+ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
+@@ -837,7 +817,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
+ }
+
+ /* SCTLR */
+- reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
++ reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
+ if (stage1)
+ reg |= SCTLR_S1_ASIDPNE;
+ #ifdef __BIG_ENDIAN
+diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
+index ea5a9ebf0f78..97a23082e18a 100644
+--- a/drivers/iommu/dma-iommu.c
++++ b/drivers/iommu/dma-iommu.c
+@@ -68,7 +68,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
+ if (!iovad)
+ return;
+
+- put_iova_domain(iovad);
++ if (iovad->granule)
++ put_iova_domain(iovad);
+ kfree(iovad);
+ domain->iova_cookie = NULL;
+ }
+diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
+index 8c6139986d7d..def8ca1c982d 100644
+--- a/drivers/iommu/io-pgtable-arm-v7s.c
++++ b/drivers/iommu/io-pgtable-arm-v7s.c
+@@ -286,12 +286,14 @@ static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
+ int prot = IOMMU_READ;
+ arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
+
+- if (attr & ARM_V7S_PTE_AP_RDONLY)
++ if (!(attr & ARM_V7S_PTE_AP_RDONLY))
+ prot |= IOMMU_WRITE;
+ if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
+ prot |= IOMMU_MMIO;
+ else if (pte & ARM_V7S_ATTR_C)
+ prot |= IOMMU_CACHE;
++ if (pte & ARM_V7S_ATTR_XN(lvl))
++ prot |= IOMMU_NOEXEC;
+
+ return prot;
+ }
+diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c
+index 4ace1da17db8..6c25213ab38c 100644
+--- a/drivers/md/dm-round-robin.c
++++ b/drivers/md/dm-round-robin.c
+@@ -210,14 +210,17 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes)
+ struct path_info *pi = NULL;
+ struct dm_path *current_path = NULL;
+
++ local_irq_save(flags);
+ current_path = *this_cpu_ptr(s->current_path);
+ if (current_path) {
+ percpu_counter_dec(&s->repeat_count);
+- if (percpu_counter_read_positive(&s->repeat_count) > 0)
++ if (percpu_counter_read_positive(&s->repeat_count) > 0) {
++ local_irq_restore(flags);
+ return current_path;
++ }
+ }
+
+- spin_lock_irqsave(&s->lock, flags);
++ spin_lock(&s->lock);
+ if (!list_empty(&s->valid_paths)) {
+ pi = list_entry(s->valid_paths.next, struct path_info, list);
+ list_move_tail(&pi->list, &s->valid_paths);
+diff --git a/drivers/of/base.c b/drivers/of/base.c
+index 8bb3d1adf1b0..c6a8f47f505e 100644
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -2318,20 +2318,13 @@ struct device_node *of_graph_get_endpoint_by_regs(
+ const struct device_node *parent, int port_reg, int reg)
+ {
+ struct of_endpoint endpoint;
+- struct device_node *node, *prev_node = NULL;
+-
+- while (1) {
+- node = of_graph_get_next_endpoint(parent, prev_node);
+- of_node_put(prev_node);
+- if (!node)
+- break;
++ struct device_node *node = NULL;
+
++ for_each_endpoint_of_node(parent, node) {
+ of_graph_parse_endpoint(node, &endpoint);
+ if (((port_reg == -1) || (endpoint.port == port_reg)) &&
+ ((reg == -1) || (endpoint.id == reg)))
+ return node;
+-
+- prev_node = node;
+ }
+
+ return NULL;
+diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
+index a080f4496fe2..565e2a4e6afa 100644
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -1277,6 +1277,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
+ if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
+ pci_msi_domain_update_chip_ops(info);
+
++ info->flags |= MSI_FLAG_ACTIVATE_EARLY;
++
+ domain = msi_create_irq_domain(fwnode, info, parent);
+ if (!domain)
+ return NULL;
+diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c
+index 11623c6b0cb3..44e69c963f5d 100644
+--- a/drivers/pinctrl/meson/pinctrl-meson.c
++++ b/drivers/pinctrl/meson/pinctrl-meson.c
+@@ -727,13 +727,7 @@ static int meson_pinctrl_probe(struct platform_device *pdev)
+ return PTR_ERR(pc->pcdev);
+ }
+
+- ret = meson_gpiolib_register(pc);
+- if (ret) {
+- pinctrl_unregister(pc->pcdev);
+- return ret;
+- }
+-
+- return 0;
++ return meson_gpiolib_register(pc);
+ }
+
+ static struct platform_driver meson_pinctrl_driver = {
+diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
+index 634b4d30eefb..b3e772390ab6 100644
+--- a/drivers/pinctrl/pinctrl-amd.c
++++ b/drivers/pinctrl/pinctrl-amd.c
+@@ -43,17 +43,6 @@ static int amd_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
+
+ spin_lock_irqsave(&gpio_dev->lock, flags);
+ pin_reg = readl(gpio_dev->base + offset * 4);
+- /*
+- * Suppose BIOS or Bootloader sets specific debounce for the
+- * GPIO. if not, set debounce to be 2.75ms and remove glitch.
+- */
+- if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
+- pin_reg |= 0xf;
+- pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
+- pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF;
+- pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
+- }
+-
+ pin_reg &= ~BIT(OUTPUT_ENABLE_OFF);
+ writel(pin_reg, gpio_dev->base + offset * 4);
+ spin_unlock_irqrestore(&gpio_dev->lock, flags);
+@@ -326,15 +315,6 @@ static void amd_gpio_irq_enable(struct irq_data *d)
+
+ spin_lock_irqsave(&gpio_dev->lock, flags);
+ pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
+- /*
+- Suppose BIOS or Bootloader sets specific debounce for the
+- GPIO. if not, set debounce to be 2.75ms.
+- */
+- if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
+- pin_reg |= 0xf;
+- pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
+- pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
+- }
+ pin_reg |= BIT(INTERRUPT_ENABLE_OFF);
+ pin_reg |= BIT(INTERRUPT_MASK_OFF);
+ writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
+diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
+index b6e161f71b26..6c084b266651 100644
+--- a/drivers/platform/chrome/cros_ec_proto.c
++++ b/drivers/platform/chrome/cros_ec_proto.c
+@@ -380,3 +380,20 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
+ return ret;
+ }
+ EXPORT_SYMBOL(cros_ec_cmd_xfer);
++
++int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
++ struct cros_ec_command *msg)
++{
++ int ret;
++
++ ret = cros_ec_cmd_xfer(ec_dev, msg);
++ if (ret < 0) {
++ dev_err(ec_dev->dev, "Command xfer error (err:%d)\n", ret);
++ } else if (msg->result != EC_RES_SUCCESS) {
++ dev_dbg(ec_dev->dev, "Command result (err: %d)\n", msg->result);
++ return -EPROTO;
++ }
++
++ return ret;
++}
++EXPORT_SYMBOL(cros_ec_cmd_xfer_status);
+diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
+index 8973d34ce5ba..fb1b56a71475 100644
+--- a/drivers/s390/block/dasd.c
++++ b/drivers/s390/block/dasd.c
+@@ -1643,9 +1643,18 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
+ u8 *sense = NULL;
+ int expires;
+
++ cqr = (struct dasd_ccw_req *) intparm;
+ if (IS_ERR(irb)) {
+ switch (PTR_ERR(irb)) {
+ case -EIO:
++ if (cqr && cqr->status == DASD_CQR_CLEAR_PENDING) {
++ device = (struct dasd_device *) cqr->startdev;
++ cqr->status = DASD_CQR_CLEARED;
++ dasd_device_clear_timer(device);
++ wake_up(&dasd_flush_wq);
++ dasd_schedule_device_bh(device);
++ return;
++ }
+ break;
+ case -ETIMEDOUT:
+ DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: "
+@@ -1661,7 +1670,6 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
+ }
+
+ now = get_tod_clock();
+- cqr = (struct dasd_ccw_req *) intparm;
+ /* check for conditions that should be handled immediately */
+ if (!cqr ||
+ !(scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) &&
+diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
+index 4b3bb52b5108..c424e8bc2e31 100644
+--- a/drivers/scsi/aacraid/commctrl.c
++++ b/drivers/scsi/aacraid/commctrl.c
+@@ -63,7 +63,7 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
+ struct fib *fibptr;
+ struct hw_fib * hw_fib = (struct hw_fib *)0;
+ dma_addr_t hw_fib_pa = (dma_addr_t)0LL;
+- unsigned size;
++ unsigned int size, osize;
+ int retval;
+
+ if (dev->in_reset) {
+@@ -87,7 +87,8 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
+ * will not overrun the buffer when we copy the memory. Return
+ * an error if we would.
+ */
+- size = le16_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr);
++ osize = size = le16_to_cpu(kfib->header.Size) +
++ sizeof(struct aac_fibhdr);
+ if (size < le16_to_cpu(kfib->header.SenderSize))
+ size = le16_to_cpu(kfib->header.SenderSize);
+ if (size > dev->max_fib_size) {
+@@ -118,6 +119,14 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
+ goto cleanup;
+ }
+
++ /* Sanity check the second copy */
++ if ((osize != le16_to_cpu(kfib->header.Size) +
++ sizeof(struct aac_fibhdr))
++ || (size < le16_to_cpu(kfib->header.SenderSize))) {
++ retval = -EINVAL;
++ goto cleanup;
++ }
++
+ if (kfib->header.Command == cpu_to_le16(TakeABreakPt)) {
+ aac_adapter_interrupt(dev);
+ /*
+diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
+index 2dab3dc2aa69..c1ed25adb17e 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_base.c
++++ b/drivers/scsi/megaraid/megaraid_sas_base.c
+@@ -5037,7 +5037,7 @@ static int megasas_init_fw(struct megasas_instance *instance)
+ /* Find first memory bar */
+ bar_list = pci_select_bars(instance->pdev, IORESOURCE_MEM);
+ instance->bar = find_first_bit(&bar_list, sizeof(unsigned long));
+- if (pci_request_selected_regions(instance->pdev, instance->bar,
++ if (pci_request_selected_regions(instance->pdev, 1<<instance->bar,
+ "megasas: LSI")) {
+ dev_printk(KERN_DEBUG, &instance->pdev->dev, "IO memory region busy!\n");
+ return -EBUSY;
+@@ -5339,7 +5339,7 @@ fail_ready_state:
+ iounmap(instance->reg_set);
+
+ fail_ioremap:
+- pci_release_selected_regions(instance->pdev, instance->bar);
++ pci_release_selected_regions(instance->pdev, 1<<instance->bar);
+
+ return -EINVAL;
+ }
+@@ -5360,7 +5360,7 @@ static void megasas_release_mfi(struct megasas_instance *instance)
+
+ iounmap(instance->reg_set);
+
+- pci_release_selected_regions(instance->pdev, instance->bar);
++ pci_release_selected_regions(instance->pdev, 1<<instance->bar);
+ }
+
+ /**
+diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
+index ec837544f784..52d8bbf7feb5 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
+@@ -2603,7 +2603,7 @@ megasas_release_fusion(struct megasas_instance *instance)
+
+ iounmap(instance->reg_set);
+
+- pci_release_selected_regions(instance->pdev, instance->bar);
++ pci_release_selected_regions(instance->pdev, 1<<instance->bar);
+ }
+
+ /**
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
+index 751f13edece0..750f82c339d4 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
+@@ -2188,6 +2188,17 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
+ } else
+ ioc->msix96_vector = 0;
+
++ if (ioc->is_warpdrive) {
++ ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
++ &ioc->chip->ReplyPostHostIndex;
++
++ for (i = 1; i < ioc->cpu_msix_table_sz; i++)
++ ioc->reply_post_host_index[i] =
++ (resource_size_t __iomem *)
++ ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
++ * 4)));
++ }
++
+ list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
+ pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
+ reply_q->name, ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
+@@ -5280,17 +5291,6 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
+ if (r)
+ goto out_free_resources;
+
+- if (ioc->is_warpdrive) {
+- ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
+- &ioc->chip->ReplyPostHostIndex;
+-
+- for (i = 1; i < ioc->cpu_msix_table_sz; i++)
+- ioc->reply_post_host_index[i] =
+- (resource_size_t __iomem *)
+- ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
+- * 4)));
+- }
+-
+ pci_set_drvdata(ioc->pdev, ioc->shost);
+ r = _base_get_ioc_facts(ioc, CAN_SLEEP);
+ if (r)
+diff --git a/drivers/staging/comedi/drivers/comedi_test.c b/drivers/staging/comedi/drivers/comedi_test.c
+index 4ab186669f0c..ec5b9a23494d 100644
+--- a/drivers/staging/comedi/drivers/comedi_test.c
++++ b/drivers/staging/comedi/drivers/comedi_test.c
+@@ -56,11 +56,6 @@
+
+ #define N_CHANS 8
+
+-enum waveform_state_bits {
+- WAVEFORM_AI_RUNNING,
+- WAVEFORM_AO_RUNNING
+-};
+-
+ /* Data unique to this driver */
+ struct waveform_private {
+ struct timer_list ai_timer; /* timer for AI commands */
+@@ -68,7 +63,6 @@ struct waveform_private {
+ unsigned int wf_amplitude; /* waveform amplitude in microvolts */
+ unsigned int wf_period; /* waveform period in microseconds */
+ unsigned int wf_current; /* current time in waveform period */
+- unsigned long state_bits;
+ unsigned int ai_scan_period; /* AI scan period in usec */
+ unsigned int ai_convert_period; /* AI conversion period in usec */
+ struct timer_list ao_timer; /* timer for AO commands */
+@@ -191,10 +185,6 @@ static void waveform_ai_timer(unsigned long arg)
+ unsigned int nsamples;
+ unsigned int time_increment;
+
+- /* check command is still active */
+- if (!test_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits))
+- return;
+-
+ now = ktime_to_us(ktime_get());
+ nsamples = comedi_nsamples_left(s, UINT_MAX);
+
+@@ -386,11 +376,6 @@ static int waveform_ai_cmd(struct comedi_device *dev,
+ */
+ devpriv->ai_timer.expires =
+ jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1;
+-
+- /* mark command as active */
+- smp_mb__before_atomic();
+- set_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits);
+- smp_mb__after_atomic();
+ add_timer(&devpriv->ai_timer);
+ return 0;
+ }
+@@ -400,11 +385,12 @@ static int waveform_ai_cancel(struct comedi_device *dev,
+ {
+ struct waveform_private *devpriv = dev->private;
+
+- /* mark command as no longer active */
+- clear_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits);
+- smp_mb__after_atomic();
+- /* cannot call del_timer_sync() as may be called from timer routine */
+- del_timer(&devpriv->ai_timer);
++ if (in_softirq()) {
++ /* Assume we were called from the timer routine itself. */
++ del_timer(&devpriv->ai_timer);
++ } else {
++ del_timer_sync(&devpriv->ai_timer);
++ }
+ return 0;
+ }
+
+@@ -436,10 +422,6 @@ static void waveform_ao_timer(unsigned long arg)
+ u64 scans_since;
+ unsigned int scans_avail = 0;
+
+- /* check command is still active */
+- if (!test_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits))
+- return;
+-
+ /* determine number of scan periods since last time */
+ now = ktime_to_us(ktime_get());
+ scans_since = now - devpriv->ao_last_scan_time;
+@@ -518,11 +500,6 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev,
+ devpriv->ao_last_scan_time = ktime_to_us(ktime_get());
+ devpriv->ao_timer.expires =
+ jiffies + usecs_to_jiffies(devpriv->ao_scan_period);
+-
+- /* mark command as active */
+- smp_mb__before_atomic();
+- set_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits);
+- smp_mb__after_atomic();
+ add_timer(&devpriv->ao_timer);
+
+ return 1;
+@@ -608,11 +585,12 @@ static int waveform_ao_cancel(struct comedi_device *dev,
+ struct waveform_private *devpriv = dev->private;
+
+ s->async->inttrig = NULL;
+- /* mark command as no longer active */
+- clear_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits);
+- smp_mb__after_atomic();
+- /* cannot call del_timer_sync() as may be called from timer routine */
+- del_timer(&devpriv->ao_timer);
++ if (in_softirq()) {
++ /* Assume we were called from the timer routine itself. */
++ del_timer(&devpriv->ao_timer);
++ } else {
++ del_timer_sync(&devpriv->ao_timer);
++ }
+ return 0;
+ }
+
+diff --git a/drivers/staging/comedi/drivers/daqboard2000.c b/drivers/staging/comedi/drivers/daqboard2000.c
+index a536a15c1d30..a91ea0235191 100644
+--- a/drivers/staging/comedi/drivers/daqboard2000.c
++++ b/drivers/staging/comedi/drivers/daqboard2000.c
+@@ -636,7 +636,7 @@ static const void *daqboard2000_find_boardinfo(struct comedi_device *dev,
+ const struct daq200_boardtype *board;
+ int i;
+
+- if (pcidev->subsystem_device != PCI_VENDOR_ID_IOTECH)
++ if (pcidev->subsystem_vendor != PCI_VENDOR_ID_IOTECH)
+ return NULL;
+
+ for (i = 0; i < ARRAY_SIZE(boardtypes); i++) {
+diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
+index 8dabb19519a5..0f97d7b611d7 100644
+--- a/drivers/staging/comedi/drivers/ni_mio_common.c
++++ b/drivers/staging/comedi/drivers/ni_mio_common.c
+@@ -2772,7 +2772,15 @@ static int ni_ao_inttrig(struct comedi_device *dev,
+ int i;
+ static const int timeout = 1000;
+
+- if (trig_num != cmd->start_arg)
++ /*
++ * Require trig_num == cmd->start_arg when cmd->start_src == TRIG_INT.
++ * For backwards compatibility, also allow trig_num == 0 when
++ * cmd->start_src != TRIG_INT (i.e. when cmd->start_src == TRIG_EXT);
++ * in that case, the internal trigger is being used as a pre-trigger
++ * before the external trigger.
++ */
++ if (!(trig_num == cmd->start_arg ||
++ (trig_num == 0 && cmd->start_src != TRIG_INT)))
+ return -EINVAL;
+
+ /*
+@@ -5480,7 +5488,7 @@ static int ni_E_init(struct comedi_device *dev,
+ s->maxdata = (devpriv->is_m_series) ? 0xffffffff
+ : 0x00ffffff;
+ s->insn_read = ni_tio_insn_read;
+- s->insn_write = ni_tio_insn_read;
++ s->insn_write = ni_tio_insn_write;
+ s->insn_config = ni_tio_insn_config;
+ #ifdef PCIDMA
+ if (dev->irq && devpriv->mite) {
+diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
+index 5eba0ebae10f..86e40ce39632 100644
+--- a/drivers/staging/lustre/lustre/llite/namei.c
++++ b/drivers/staging/lustre/lustre/llite/namei.c
+@@ -391,6 +391,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
+ struct inode *inode = NULL;
+ __u64 bits = 0;
+ int rc = 0;
++ struct dentry *alias;
+
+ /* NB 1 request reference will be taken away by ll_intent_lock()
+ * when I return
+@@ -415,26 +416,12 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
+ */
+ }
+
+- /* Only hash *de if it is unhashed (new dentry).
+- * Atoimc_open may passing hashed dentries for open.
+- */
+- if (d_unhashed(*de)) {
+- struct dentry *alias;
+-
+- alias = ll_splice_alias(inode, *de);
+- if (IS_ERR(alias)) {
+- rc = PTR_ERR(alias);
+- goto out;
+- }
+- *de = alias;
+- } else if (!it_disposition(it, DISP_LOOKUP_NEG) &&
+- !it_disposition(it, DISP_OPEN_CREATE)) {
+- /* With DISP_OPEN_CREATE dentry will be
+- * instantiated in ll_create_it.
+- */
+- LASSERT(!d_inode(*de));
+- d_instantiate(*de, inode);
++ alias = ll_splice_alias(inode, *de);
++ if (IS_ERR(alias)) {
++ rc = PTR_ERR(alias);
++ goto out;
+ }
++ *de = alias;
+
+ if (!it_disposition(it, DISP_LOOKUP_NEG)) {
+ /* we have lookup look - unhide dentry */
+@@ -590,6 +577,24 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
+ dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
+ *opened);
+
++ /* Only negative dentries enter here */
++ LASSERT(!d_inode(dentry));
++
++ if (!d_in_lookup(dentry)) {
++ /* A valid negative dentry that just passed revalidation,
++ * there's little point to try and open it server-side,
++ * even though there's a minuscle chance it might succeed.
++ * Either way it's a valid race to just return -ENOENT here.
++ */
++ if (!(open_flags & O_CREAT))
++ return -ENOENT;
++
++ /* Otherwise we just unhash it to be rehashed afresh via
++ * lookup if necessary
++ */
++ d_drop(dentry);
++ }
++
+ it = kzalloc(sizeof(*it), GFP_NOFS);
+ if (!it)
+ return -ENOMEM;
+diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
+index 065f5d97aa67..dfec5a176315 100644
+--- a/drivers/usb/chipidea/udc.c
++++ b/drivers/usb/chipidea/udc.c
+@@ -1596,8 +1596,11 @@ static int ci_udc_pullup(struct usb_gadget *_gadget, int is_on)
+ {
+ struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget);
+
+- /* Data+ pullup controlled by OTG state machine in OTG fsm mode */
+- if (ci_otg_is_fsm_mode(ci))
++ /*
++ * Data+ pullup controlled by OTG state machine in OTG fsm mode;
++ * and don't touch Data+ in host mode for dual role config.
++ */
++ if (ci_otg_is_fsm_mode(ci) || ci->role == CI_ROLE_HOST)
+ return 0;
+
+ pm_runtime_get_sync(&ci->gadget.dev);
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index 94a14f5dc4d4..0a4d54a87f7c 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -1405,7 +1405,6 @@ made_compressed_probe:
+ spin_lock_init(&acm->write_lock);
+ spin_lock_init(&acm->read_lock);
+ mutex_init(&acm->mutex);
+- acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
+ acm->is_int_ep = usb_endpoint_xfer_int(epread);
+ if (acm->is_int_ep)
+ acm->bInterval = epread->bInterval;
+@@ -1445,14 +1444,14 @@ made_compressed_probe:
+ urb->transfer_dma = rb->dma;
+ if (acm->is_int_ep) {
+ usb_fill_int_urb(urb, acm->dev,
+- acm->rx_endpoint,
++ usb_rcvintpipe(usb_dev, epread->bEndpointAddress),
+ rb->base,
+ acm->readsize,
+ acm_read_bulk_callback, rb,
+ acm->bInterval);
+ } else {
+ usb_fill_bulk_urb(urb, acm->dev,
+- acm->rx_endpoint,
++ usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress),
+ rb->base,
+ acm->readsize,
+ acm_read_bulk_callback, rb);
+diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
+index 05ce308d5d2a..1f1eabfd8462 100644
+--- a/drivers/usb/class/cdc-acm.h
++++ b/drivers/usb/class/cdc-acm.h
+@@ -96,7 +96,6 @@ struct acm {
+ struct acm_rb read_buffers[ACM_NR];
+ struct acm_wb *putbuffer; /* for acm_tty_put_char() */
+ int rx_buflimit;
+- int rx_endpoint;
+ spinlock_t read_lock;
+ int write_used; /* number of non-empty write buffers */
+ int transmitting;
+diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
+index 31ccdccd7a04..15ce4ab11688 100644
+--- a/drivers/usb/core/config.c
++++ b/drivers/usb/core/config.c
+@@ -171,6 +171,31 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
+ ep, buffer, size);
+ }
+
++static const unsigned short low_speed_maxpacket_maxes[4] = {
++ [USB_ENDPOINT_XFER_CONTROL] = 8,
++ [USB_ENDPOINT_XFER_ISOC] = 0,
++ [USB_ENDPOINT_XFER_BULK] = 0,
++ [USB_ENDPOINT_XFER_INT] = 8,
++};
++static const unsigned short full_speed_maxpacket_maxes[4] = {
++ [USB_ENDPOINT_XFER_CONTROL] = 64,
++ [USB_ENDPOINT_XFER_ISOC] = 1023,
++ [USB_ENDPOINT_XFER_BULK] = 64,
++ [USB_ENDPOINT_XFER_INT] = 64,
++};
++static const unsigned short high_speed_maxpacket_maxes[4] = {
++ [USB_ENDPOINT_XFER_CONTROL] = 64,
++ [USB_ENDPOINT_XFER_ISOC] = 1024,
++ [USB_ENDPOINT_XFER_BULK] = 512,
++ [USB_ENDPOINT_XFER_INT] = 1024,
++};
++static const unsigned short super_speed_maxpacket_maxes[4] = {
++ [USB_ENDPOINT_XFER_CONTROL] = 512,
++ [USB_ENDPOINT_XFER_ISOC] = 1024,
++ [USB_ENDPOINT_XFER_BULK] = 1024,
++ [USB_ENDPOINT_XFER_INT] = 1024,
++};
++
+ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
+ int asnum, struct usb_host_interface *ifp, int num_ep,
+ unsigned char *buffer, int size)
+@@ -179,6 +204,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
+ struct usb_endpoint_descriptor *d;
+ struct usb_host_endpoint *endpoint;
+ int n, i, j, retval;
++ unsigned int maxp;
++ const unsigned short *maxpacket_maxes;
+
+ d = (struct usb_endpoint_descriptor *) buffer;
+ buffer += d->bLength;
+@@ -286,6 +313,42 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
+ endpoint->desc.wMaxPacketSize = cpu_to_le16(8);
+ }
+
++ /* Validate the wMaxPacketSize field */
++ maxp = usb_endpoint_maxp(&endpoint->desc);
++
++ /* Find the highest legal maxpacket size for this endpoint */
++ i = 0; /* additional transactions per microframe */
++ switch (to_usb_device(ddev)->speed) {
++ case USB_SPEED_LOW:
++ maxpacket_maxes = low_speed_maxpacket_maxes;
++ break;
++ case USB_SPEED_FULL:
++ maxpacket_maxes = full_speed_maxpacket_maxes;
++ break;
++ case USB_SPEED_HIGH:
++ /* Bits 12..11 are allowed only for HS periodic endpoints */
++ if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) {
++ i = maxp & (BIT(12) | BIT(11));
++ maxp &= ~i;
++ }
++ /* fallthrough */
++ default:
++ maxpacket_maxes = high_speed_maxpacket_maxes;
++ break;
++ case USB_SPEED_SUPER:
++ case USB_SPEED_SUPER_PLUS:
++ maxpacket_maxes = super_speed_maxpacket_maxes;
++ break;
++ }
++ j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)];
++
++ if (maxp > j) {
++ dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n",
++ cfgno, inum, asnum, d->bEndpointAddress, maxp, j);
++ maxp = j;
++ endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp);
++ }
++
+ /*
+ * Some buggy high speed devices have bulk endpoints using
+ * maxpacket sizes other than 512. High speed HCDs may not
+@@ -293,9 +356,6 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
+ */
+ if (to_usb_device(ddev)->speed == USB_SPEED_HIGH
+ && usb_endpoint_xfer_bulk(d)) {
+- unsigned maxp;
+-
+- maxp = usb_endpoint_maxp(&endpoint->desc) & 0x07ff;
+ if (maxp != 512)
+ dev_warn(ddev, "config %d interface %d altsetting %d "
+ "bulk endpoint 0x%X has invalid maxpacket %d\n",
+diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
+index e9f5043a2167..50b6baa50e11 100644
+--- a/drivers/usb/core/devio.c
++++ b/drivers/usb/core/devio.c
+@@ -241,7 +241,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
+ goto error_decrease_mem;
+ }
+
+- mem = usb_alloc_coherent(ps->dev, size, GFP_USER, &dma_handle);
++ mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN,
++ &dma_handle);
+ if (!mem) {
+ ret = -ENOMEM;
+ goto error_free_usbm;
+@@ -1708,11 +1709,17 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
+ as->urb->start_frame = uurb->start_frame;
+ as->urb->number_of_packets = number_of_packets;
+ as->urb->stream_id = stream_id;
+- if (uurb->type == USBDEVFS_URB_TYPE_ISO ||
+- ps->dev->speed == USB_SPEED_HIGH)
+- as->urb->interval = 1 << min(15, ep->desc.bInterval - 1);
+- else
+- as->urb->interval = ep->desc.bInterval;
++
++ if (ep->desc.bInterval) {
++ if (uurb->type == USBDEVFS_URB_TYPE_ISO ||
++ ps->dev->speed == USB_SPEED_HIGH ||
++ ps->dev->speed >= USB_SPEED_SUPER)
++ as->urb->interval = 1 <<
++ min(15, ep->desc.bInterval - 1);
++ else
++ as->urb->interval = ep->desc.bInterval;
++ }
++
+ as->urb->context = as;
+ as->urb->complete = async_completed;
+ for (totlen = u = 0; u < number_of_packets; u++) {
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index bee13517676f..1d5fc32d06d0 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -1052,14 +1052,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
+
+ /* Continue a partial initialization */
+ if (type == HUB_INIT2 || type == HUB_INIT3) {
+- device_lock(hub->intfdev);
++ device_lock(&hdev->dev);
+
+ /* Was the hub disconnected while we were waiting? */
+- if (hub->disconnected) {
+- device_unlock(hub->intfdev);
+- kref_put(&hub->kref, hub_release);
+- return;
+- }
++ if (hub->disconnected)
++ goto disconnected;
+ if (type == HUB_INIT2)
+ goto init2;
+ goto init3;
+@@ -1262,7 +1259,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
+ queue_delayed_work(system_power_efficient_wq,
+ &hub->init_work,
+ msecs_to_jiffies(delay));
+- device_unlock(hub->intfdev);
++ device_unlock(&hdev->dev);
+ return; /* Continues at init3: below */
+ } else {
+ msleep(delay);
+@@ -1281,12 +1278,12 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
+ /* Scan all ports that need attention */
+ kick_hub_wq(hub);
+
+- /* Allow autosuspend if it was suppressed */
+- if (type <= HUB_INIT3)
++ if (type == HUB_INIT2 || type == HUB_INIT3) {
++ /* Allow autosuspend if it was suppressed */
++ disconnected:
+ usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
+-
+- if (type == HUB_INIT2 || type == HUB_INIT3)
+- device_unlock(hub->intfdev);
++ device_unlock(&hdev->dev);
++ }
+
+ kref_put(&hub->kref, hub_release);
+ }
+@@ -1315,8 +1312,6 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
+ struct usb_device *hdev = hub->hdev;
+ int i;
+
+- cancel_delayed_work_sync(&hub->init_work);
+-
+ /* hub_wq and related activity won't re-trigger */
+ hub->quiescing = 1;
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index 14196cd416b3..2fd50578b41d 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -37,6 +37,7 @@
+ #define PCI_DEVICE_ID_INTEL_BXT 0x0aaa
+ #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa
+ #define PCI_DEVICE_ID_INTEL_APL 0x5aaa
++#define PCI_DEVICE_ID_INTEL_KBP 0xa2b0
+
+ static const struct acpi_gpio_params reset_gpios = { 0, 0, false };
+ static const struct acpi_gpio_params cs_gpios = { 1, 0, false };
+@@ -214,6 +215,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
++ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
+ { } /* Terminating Entry */
+ };
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 716f4f051a0a..05a5300aa1ab 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -1897,7 +1897,8 @@ static void dwc3_gadget_free_endpoints(struct dwc3 *dwc)
+
+ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
+ struct dwc3_request *req, struct dwc3_trb *trb,
+- const struct dwc3_event_depevt *event, int status)
++ const struct dwc3_event_depevt *event, int status,
++ int chain)
+ {
+ unsigned int count;
+ unsigned int s_pkt = 0;
+@@ -1905,6 +1906,19 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
+
+ trace_dwc3_complete_trb(dep, trb);
+
++ /*
++ * If we're in the middle of series of chained TRBs and we
++ * receive a short transfer along the way, DWC3 will skip
++ * through all TRBs including the last TRB in the chain (the
++ * where CHN bit is zero. DWC3 will also avoid clearing HWO
++ * bit and SW has to do it manually.
++ *
++ * We're going to do that here to avoid problems of HW trying
++ * to use bogus TRBs for transfers.
++ */
++ if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO))
++ trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
++
+ if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
+ /*
+ * We continue despite the error. There is not much we
+@@ -1916,6 +1930,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
+ */
+ dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n",
+ dep->name, trb);
++
+ count = trb->size & DWC3_TRB_SIZE_MASK;
+
+ if (dep->direction) {
+@@ -1954,15 +1969,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
+ s_pkt = 1;
+ }
+
+- /*
+- * We assume here we will always receive the entire data block
+- * which we should receive. Meaning, if we program RX to
+- * receive 4K but we receive only 2K, we assume that's all we
+- * should receive and we simply bounce the request back to the
+- * gadget driver for further processing.
+- */
+- req->request.actual += req->request.length - count;
+- if (s_pkt)
++ if (s_pkt && !chain)
+ return 1;
+ if ((event->status & DEPEVT_STATUS_LST) &&
+ (trb->ctrl & (DWC3_TRB_CTRL_LST |
+@@ -1981,13 +1988,17 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
+ struct dwc3_trb *trb;
+ unsigned int slot;
+ unsigned int i;
++ int count = 0;
+ int ret;
+
+ do {
++ int chain;
++
+ req = next_request(&dep->started_list);
+ if (WARN_ON_ONCE(!req))
+ return 1;
+
++ chain = req->request.num_mapped_sgs > 0;
+ i = 0;
+ do {
+ slot = req->first_trb_index + i;
+@@ -1995,13 +2006,22 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
+ slot++;
+ slot %= DWC3_TRB_NUM;
+ trb = &dep->trb_pool[slot];
++ count += trb->size & DWC3_TRB_SIZE_MASK;
+
+ ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
+- event, status);
++ event, status, chain);
+ if (ret)
+ break;
+ } while (++i < req->request.num_mapped_sgs);
+
++ /*
++ * We assume here we will always receive the entire data block
++ * which we should receive. Meaning, if we program RX to
++ * receive 4K but we receive only 2K, we assume that's all we
++ * should receive and we simply bounce the request back to the
++ * gadget driver for further processing.
++ */
++ req->request.actual += req->request.length - count;
+ dwc3_gadget_giveback(dep, req, status);
+
+ if (ret)
+diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
+index aa3707bdebb4..be6479830221 100644
+--- a/drivers/usb/gadget/legacy/inode.c
++++ b/drivers/usb/gadget/legacy/inode.c
+@@ -542,7 +542,7 @@ static ssize_t ep_aio(struct kiocb *iocb,
+ */
+ spin_lock_irq(&epdata->dev->lock);
+ value = -ENODEV;
+- if (unlikely(epdata->ep))
++ if (unlikely(epdata->ep == NULL))
+ goto fail;
+
+ req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
+diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c
+index 93d28cb00b76..cf8819a5c5b2 100644
+--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
++++ b/drivers/usb/gadget/udc/fsl_qe_udc.c
+@@ -2053,7 +2053,7 @@ static void setup_received_handle(struct qe_udc *udc,
+ struct qe_ep *ep;
+
+ if (wValue != 0 || wLength != 0
+- || pipe > USB_MAX_ENDPOINTS)
++ || pipe >= USB_MAX_ENDPOINTS)
+ break;
+ ep = &udc->eps[pipe];
+
+diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
+index e1b2dcebdc2e..bd4695075bac 100644
+--- a/drivers/usb/gadget/udc/udc-core.c
++++ b/drivers/usb/gadget/udc/udc-core.c
+@@ -106,7 +106,7 @@ void usb_gadget_unmap_request_by_dev(struct device *dev,
+ return;
+
+ if (req->num_mapped_sgs) {
+- dma_unmap_sg(dev, req->sg, req->num_mapped_sgs,
++ dma_unmap_sg(dev, req->sg, req->num_sgs,
+ is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+ req->num_mapped_sgs = 0;
+diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
+index a962b89b65a6..1e5f529d51a2 100644
+--- a/drivers/usb/host/ehci-hcd.c
++++ b/drivers/usb/host/ehci-hcd.c
+@@ -332,11 +332,11 @@ static void ehci_turn_off_all_ports(struct ehci_hcd *ehci)
+ int port = HCS_N_PORTS(ehci->hcs_params);
+
+ while (port--) {
+- ehci_writel(ehci, PORT_RWC_BITS,
+- &ehci->regs->port_status[port]);
+ spin_unlock_irq(&ehci->lock);
+ ehci_port_power(ehci, port, false);
+ spin_lock_irq(&ehci->lock);
++ ehci_writel(ehci, PORT_RWC_BITS,
++ &ehci->regs->port_status[port]);
+ }
+ }
+
+diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
+index d61fcc48099e..730b9fd26685 100644
+--- a/drivers/usb/host/xhci-hub.c
++++ b/drivers/usb/host/xhci-hub.c
+@@ -386,6 +386,9 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend)
+
+ ret = 0;
+ virt_dev = xhci->devs[slot_id];
++ if (!virt_dev)
++ return -ENODEV;
++
+ cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO);
+ if (!cmd) {
+ xhci_dbg(xhci, "Couldn't allocate command structure.\n");
+diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
+index c10972fcc8e4..69f7fabdbb53 100644
+--- a/drivers/usb/host/xhci-pci.c
++++ b/drivers/usb/host/xhci-pci.c
+@@ -314,11 +314,12 @@ static void xhci_pci_remove(struct pci_dev *dev)
+ usb_remove_hcd(xhci->shared_hcd);
+ usb_put_hcd(xhci->shared_hcd);
+ }
+- usb_hcd_pci_remove(dev);
+
+ /* Workaround for spurious wakeups at shutdown with HSW */
+ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
+ pci_set_power_state(dev, PCI_D3hot);
++
++ usb_hcd_pci_remove(dev);
+ }
+
+ #ifdef CONFIG_PM
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index d7d502578d79..bc17bcf57bbd 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -1325,12 +1325,6 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
+
+ cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list);
+
+- if (cmd->command_trb != xhci->cmd_ring->dequeue) {
+- xhci_err(xhci,
+- "Command completion event does not match command\n");
+- return;
+- }
+-
+ del_timer(&xhci->cmd_timer);
+
+ trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event);
+@@ -1342,6 +1336,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
+ xhci_handle_stopped_cmd_ring(xhci, cmd);
+ return;
+ }
++
++ if (cmd->command_trb != xhci->cmd_ring->dequeue) {
++ xhci_err(xhci,
++ "Command completion event does not match command\n");
++ return;
++ }
++
+ /*
+ * Host aborted the command ring, check if the current command was
+ * supposed to be aborted, otherwise continue normally.
+diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
+index 6b978f04b8d7..5c8210dc6fd9 100644
+--- a/drivers/usb/misc/usbtest.c
++++ b/drivers/usb/misc/usbtest.c
+@@ -585,7 +585,6 @@ static void sg_timeout(unsigned long _req)
+ {
+ struct usb_sg_request *req = (struct usb_sg_request *) _req;
+
+- req->status = -ETIMEDOUT;
+ usb_sg_cancel(req);
+ }
+
+@@ -616,8 +615,10 @@ static int perform_sglist(
+ mod_timer(&sg_timer, jiffies +
+ msecs_to_jiffies(SIMPLE_IO_TIMEOUT));
+ usb_sg_wait(req);
+- del_timer_sync(&sg_timer);
+- retval = req->status;
++ if (!del_timer_sync(&sg_timer))
++ retval = -ETIMEDOUT;
++ else
++ retval = req->status;
+
+ /* FIXME check resulting data pattern */
+
+@@ -2602,7 +2603,7 @@ usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf)
+ ktime_get_ts64(&start);
+
+ retval = usbtest_do_ioctl(intf, param_32);
+- if (retval)
++ if (retval < 0)
+ goto free_mutex;
+
+ ktime_get_ts64(&end);
+diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
+index baeb7d23bf24..8c81aac09d71 100644
+--- a/drivers/usb/renesas_usbhs/common.c
++++ b/drivers/usb/renesas_usbhs/common.c
+@@ -514,7 +514,8 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev)
+ if (gpio > 0)
+ dparam->enable_gpio = gpio;
+
+- if (dparam->type == USBHS_TYPE_RCAR_GEN2)
++ if (dparam->type == USBHS_TYPE_RCAR_GEN2 ||
++ dparam->type == USBHS_TYPE_RCAR_GEN3)
+ dparam->has_usb_dmac = 1;
+
+ return info;
+diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
+index 280ed5ff021b..857e78337324 100644
+--- a/drivers/usb/renesas_usbhs/fifo.c
++++ b/drivers/usb/renesas_usbhs/fifo.c
+@@ -871,7 +871,7 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done)
+
+ /* use PIO if packet is less than pio_dma_border or pipe is DCP */
+ if ((len < usbhs_get_dparam(priv, pio_dma_border)) ||
+- usbhs_pipe_is_dcp(pipe))
++ usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
+ goto usbhsf_pio_prepare_push;
+
+ /* check data length if this driver don't use USB-DMAC */
+@@ -976,7 +976,7 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt,
+
+ /* use PIO if packet is less than pio_dma_border or pipe is DCP */
+ if ((pkt->length < usbhs_get_dparam(priv, pio_dma_border)) ||
+- usbhs_pipe_is_dcp(pipe))
++ usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
+ goto usbhsf_pio_prepare_pop;
+
+ fifo = usbhsf_get_dma_fifo(priv, pkt);
+diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
+index 50f3363cc382..c4c64740a3e7 100644
+--- a/drivers/usb/renesas_usbhs/mod_gadget.c
++++ b/drivers/usb/renesas_usbhs/mod_gadget.c
+@@ -617,10 +617,13 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
+ * use dmaengine if possible.
+ * It will use pio handler if impossible.
+ */
+- if (usb_endpoint_dir_in(desc))
++ if (usb_endpoint_dir_in(desc)) {
+ pipe->handler = &usbhs_fifo_dma_push_handler;
+- else
++ } else {
+ pipe->handler = &usbhs_fifo_dma_pop_handler;
++ usbhs_xxxsts_clear(priv, BRDYSTS,
++ usbhs_pipe_number(pipe));
++ }
+
+ ret = 0;
+ }
+@@ -1073,7 +1076,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv)
+
+ gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED);
+ dev_info(dev, "%stransceiver found\n",
+- gpriv->transceiver ? "" : "no ");
++ !IS_ERR(gpriv->transceiver) ? "" : "no ");
+
+ /*
+ * CAUTION
+diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
+index 00820809139a..b2d767e743fc 100644
+--- a/drivers/usb/serial/ftdi_sio.c
++++ b/drivers/usb/serial/ftdi_sio.c
+@@ -648,6 +648,8 @@ static const struct usb_device_id id_table_combined[] = {
+ { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) },
+ { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) },
+ { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) },
+ { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) },
+ { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) },
+ { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) },
+@@ -1008,6 +1010,7 @@ static const struct usb_device_id id_table_combined[] = {
+ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
+ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
+ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
++ { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) },
+ { } /* Terminating entry */
+ };
+
+diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
+index c5d6c1e73e8e..f87a938cf005 100644
+--- a/drivers/usb/serial/ftdi_sio_ids.h
++++ b/drivers/usb/serial/ftdi_sio_ids.h
+@@ -406,6 +406,12 @@
+ #define FTDI_4N_GALAXY_DE_3_PID 0xF3C2
+
+ /*
++ * Ivium Technologies product IDs
++ */
++#define FTDI_PALMSENS_PID 0xf440
++#define FTDI_IVIUM_XSTAT_PID 0xf441
++
++/*
+ * Linx Technologies product ids
+ */
+ #define LINX_SDMUSBQSS_PID 0xF448 /* Linx SDM-USB-QS-S */
+@@ -673,6 +679,12 @@
+ #define INTREPID_NEOVI_PID 0x0701
+
+ /*
++ * WICED USB UART
++ */
++#define WICED_VID 0x0A5C
++#define WICED_USB20706V2_PID 0x6422
++
++/*
+ * Definitions for ID TECH (www.idt-net.com) devices
+ */
+ #define IDTECH_VID 0x0ACD /* ID TECH Vendor ID */
+diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
+index 5608af4a369d..de9992b492b0 100644
+--- a/drivers/usb/serial/mos7720.c
++++ b/drivers/usb/serial/mos7720.c
+@@ -1252,7 +1252,7 @@ static int mos7720_write(struct tty_struct *tty, struct usb_serial_port *port,
+
+ if (urb->transfer_buffer == NULL) {
+ urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE,
+- GFP_KERNEL);
++ GFP_ATOMIC);
+ if (!urb->transfer_buffer)
+ goto exit;
+ }
+diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
+index ed378fb232e7..57426d703a09 100644
+--- a/drivers/usb/serial/mos7840.c
++++ b/drivers/usb/serial/mos7840.c
+@@ -1340,8 +1340,8 @@ static int mos7840_write(struct tty_struct *tty, struct usb_serial_port *port,
+ }
+
+ if (urb->transfer_buffer == NULL) {
+- urb->transfer_buffer =
+- kmalloc(URB_TRANSFER_BUFFER_SIZE, GFP_KERNEL);
++ urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE,
++ GFP_ATOMIC);
+ if (!urb->transfer_buffer)
+ goto exit;
+ }
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index 8e07536c233a..9894e341c6ac 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -274,6 +274,12 @@ static void option_instat_callback(struct urb *urb);
+ #define TELIT_PRODUCT_LE920 0x1200
+ #define TELIT_PRODUCT_LE910 0x1201
+ #define TELIT_PRODUCT_LE910_USBCFG4 0x1206
++#define TELIT_PRODUCT_LE920A4_1207 0x1207
++#define TELIT_PRODUCT_LE920A4_1208 0x1208
++#define TELIT_PRODUCT_LE920A4_1211 0x1211
++#define TELIT_PRODUCT_LE920A4_1212 0x1212
++#define TELIT_PRODUCT_LE920A4_1213 0x1213
++#define TELIT_PRODUCT_LE920A4_1214 0x1214
+
+ /* ZTE PRODUCTS */
+ #define ZTE_VENDOR_ID 0x19d2
+@@ -519,6 +525,12 @@ static void option_instat_callback(struct urb *urb);
+ #define VIATELECOM_VENDOR_ID 0x15eb
+ #define VIATELECOM_PRODUCT_CDS7 0x0001
+
++/* WeTelecom products */
++#define WETELECOM_VENDOR_ID 0x22de
++#define WETELECOM_PRODUCT_WMD200 0x6801
++#define WETELECOM_PRODUCT_6802 0x6802
++#define WETELECOM_PRODUCT_WMD300 0x6803
++
+ struct option_blacklist_info {
+ /* bitmask of interface numbers blacklisted for send_setup */
+ const unsigned long sendsetup;
+@@ -628,6 +640,11 @@ static const struct option_blacklist_info telit_le920_blacklist = {
+ .reserved = BIT(1) | BIT(5),
+ };
+
++static const struct option_blacklist_info telit_le920a4_blacklist_1 = {
++ .sendsetup = BIT(0),
++ .reserved = BIT(1),
++};
++
+ static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
+ .sendsetup = BIT(2),
+ .reserved = BIT(0) | BIT(1) | BIT(3),
+@@ -1203,6 +1220,16 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
+ .driver_info = (kernel_ulong_t)&telit_le920_blacklist },
++ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1207) },
++ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1208),
++ .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
++ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1211),
++ .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
++ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1212),
++ .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
++ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) },
++ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214),
++ .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
+ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
+ .driver_info = (kernel_ulong_t)&net_intf1_blacklist },
+@@ -1966,9 +1993,13 @@ static const struct usb_device_id option_ids[] = {
+ .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
++ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */
+ { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */
+ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
+ { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },
++ { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) },
++ { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) },
++ { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) },
+ { } /* Terminating entry */
+ };
+ MODULE_DEVICE_TABLE(usb, option_ids);
+diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
+index b1b9bac44016..d213cf44a7e4 100644
+--- a/drivers/usb/serial/usb-serial.c
++++ b/drivers/usb/serial/usb-serial.c
+@@ -1433,7 +1433,7 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[]
+
+ rc = usb_register(udriver);
+ if (rc)
+- return rc;
++ goto failed_usb_register;
+
+ for (sd = serial_drivers; *sd; ++sd) {
+ (*sd)->usb_driver = udriver;
+@@ -1451,6 +1451,8 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[]
+ while (sd-- > serial_drivers)
+ usb_serial_deregister(*sd);
+ usb_deregister(udriver);
++failed_usb_register:
++ kfree(udriver);
+ return rc;
+ }
+ EXPORT_SYMBOL_GPL(usb_serial_register_drivers);
+diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
+index 15ecfc9c5f6c..152b43822ef1 100644
+--- a/drivers/vfio/pci/vfio_pci_intrs.c
++++ b/drivers/vfio/pci/vfio_pci_intrs.c
+@@ -564,67 +564,80 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
+ }
+
+ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
+- uint32_t flags, void *data)
++ unsigned int count, uint32_t flags,
++ void *data)
+ {
+- int32_t fd = *(int32_t *)data;
+-
+- if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
+- return -EINVAL;
+-
+ /* DATA_NONE/DATA_BOOL enables loopback testing */
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+- if (*ctx)
+- eventfd_signal(*ctx, 1);
+- return 0;
++ if (*ctx) {
++ if (count) {
++ eventfd_signal(*ctx, 1);
++ } else {
++ eventfd_ctx_put(*ctx);
++ *ctx = NULL;
++ }
++ return 0;
++ }
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+- uint8_t trigger = *(uint8_t *)data;
++ uint8_t trigger;
++
++ if (!count)
++ return -EINVAL;
++
++ trigger = *(uint8_t *)data;
+ if (trigger && *ctx)
+ eventfd_signal(*ctx, 1);
+- return 0;
+- }
+
+- /* Handle SET_DATA_EVENTFD */
+- if (fd == -1) {
+- if (*ctx)
+- eventfd_ctx_put(*ctx);
+- *ctx = NULL;
+ return 0;
+- } else if (fd >= 0) {
+- struct eventfd_ctx *efdctx;
+- efdctx = eventfd_ctx_fdget(fd);
+- if (IS_ERR(efdctx))
+- return PTR_ERR(efdctx);
+- if (*ctx)
+- eventfd_ctx_put(*ctx);
+- *ctx = efdctx;
++ } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
++ int32_t fd;
++
++ if (!count)
++ return -EINVAL;
++
++ fd = *(int32_t *)data;
++ if (fd == -1) {
++ if (*ctx)
++ eventfd_ctx_put(*ctx);
++ *ctx = NULL;
++ } else if (fd >= 0) {
++ struct eventfd_ctx *efdctx;
++
++ efdctx = eventfd_ctx_fdget(fd);
++ if (IS_ERR(efdctx))
++ return PTR_ERR(efdctx);
++
++ if (*ctx)
++ eventfd_ctx_put(*ctx);
++
++ *ctx = efdctx;
++ }
+ return 0;
+- } else
+- return -EINVAL;
++ }
++
++ return -EINVAL;
+ }
+
+ static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
+ unsigned index, unsigned start,
+ unsigned count, uint32_t flags, void *data)
+ {
+- if (index != VFIO_PCI_ERR_IRQ_INDEX)
++ if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
+ return -EINVAL;
+
+- /*
+- * We should sanitize start & count, but that wasn't caught
+- * originally, so this IRQ index must forever ignore them :-(
+- */
+-
+- return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data);
++ return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
++ count, flags, data);
+ }
+
+ static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
+ unsigned index, unsigned start,
+ unsigned count, uint32_t flags, void *data)
+ {
+- if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1)
++ if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
+ return -EINVAL;
+
+- return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data);
++ return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
++ count, flags, data);
+ }
+
+ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
+diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
+index ca6bfddaacad..2ebf30e57eec 100644
+--- a/drivers/virtio/virtio_ring.c
++++ b/drivers/virtio/virtio_ring.c
+@@ -316,6 +316,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
+ * host should service the ring ASAP. */
+ if (out_sgs)
+ vq->notify(&vq->vq);
++ if (indirect)
++ kfree(desc);
+ END_USE(vq);
+ return -ENOSPC;
+ }
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 4274a7bfdaed..72f50480eee7 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1040,6 +1040,7 @@ struct btrfs_fs_info {
+ struct btrfs_workqueue *qgroup_rescan_workers;
+ struct completion qgroup_rescan_completion;
+ struct btrfs_work qgroup_rescan_work;
++ bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
+
+ /* filesystem state */
+ unsigned long fs_state;
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 60ce1190307b..864cf3be0251 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1626,8 +1626,8 @@ fail:
+ return ret;
+ }
+
+-static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+- u64 root_id)
++struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
++ u64 root_id)
+ {
+ struct btrfs_root *root;
+
+@@ -2306,6 +2306,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
+ fs_info->quota_enabled = 0;
+ fs_info->pending_quota_state = 0;
+ fs_info->qgroup_ulist = NULL;
++ fs_info->qgroup_rescan_running = false;
+ mutex_init(&fs_info->qgroup_rescan_lock);
+ }
+
+@@ -3849,7 +3850,7 @@ void close_ctree(struct btrfs_root *root)
+ smp_mb();
+
+ /* wait for the qgroup rescan worker to stop */
+- btrfs_qgroup_wait_for_completion(fs_info);
++ btrfs_qgroup_wait_for_completion(fs_info, false);
+
+ /* wait for the uuid_scan task to finish */
+ down(&fs_info->uuid_tree_rescan_sem);
+diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
+index acba821499a9..355e31f90106 100644
+--- a/fs/btrfs/disk-io.h
++++ b/fs/btrfs/disk-io.h
+@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
+ struct btrfs_key *location);
+ int btrfs_init_fs_root(struct btrfs_root *root);
++struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
++ u64 root_id);
+ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *root);
+ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 05173563e4a6..3722a1f65069 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -5088,7 +5088,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+- return btrfs_qgroup_wait_for_completion(root->fs_info);
++ return btrfs_qgroup_wait_for_completion(root->fs_info, true);
+ }
+
+ static long _btrfs_ioctl_set_received_subvol(struct file *file,
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index 9d4c05b14f6e..4904ebee449b 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
+ goto out;
+ fs_info->quota_enabled = 0;
+ fs_info->pending_quota_state = 0;
+- btrfs_qgroup_wait_for_completion(fs_info);
++ btrfs_qgroup_wait_for_completion(fs_info, false);
+ spin_lock(&fs_info->qgroup_lock);
+ quota_root = fs_info->quota_root;
+ fs_info->quota_root = NULL;
+@@ -2302,6 +2302,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+ int err = -ENOMEM;
+ int ret = 0;
+
++ mutex_lock(&fs_info->qgroup_rescan_lock);
++ fs_info->qgroup_rescan_running = true;
++ mutex_unlock(&fs_info->qgroup_rescan_lock);
++
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out;
+@@ -2368,6 +2372,9 @@ out:
+ }
+
+ done:
++ mutex_lock(&fs_info->qgroup_rescan_lock);
++ fs_info->qgroup_rescan_running = false;
++ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ complete_all(&fs_info->qgroup_rescan_completion);
+ }
+
+@@ -2486,20 +2493,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
+ return 0;
+ }
+
+-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
++int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
++ bool interruptible)
+ {
+ int running;
+ int ret = 0;
+
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ spin_lock(&fs_info->qgroup_lock);
+- running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
++ running = fs_info->qgroup_rescan_running;
+ spin_unlock(&fs_info->qgroup_lock);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+- if (running)
++ if (!running)
++ return 0;
++
++ if (interruptible)
+ ret = wait_for_completion_interruptible(
+ &fs_info->qgroup_rescan_completion);
++ else
++ wait_for_completion(&fs_info->qgroup_rescan_completion);
+
+ return ret;
+ }
+diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
+index ecb2c143ef75..3d73e4c9c7df 100644
+--- a/fs/btrfs/qgroup.h
++++ b/fs/btrfs/qgroup.h
+@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
+ int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
+ void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
+-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
++int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
++ bool interruptible);
+ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 src, u64 dst);
+ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
+index f1c30861d062..3454aa4faecb 100644
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
+ root_key.objectid = key.offset;
+ key.offset++;
+
++ /*
++ * The root might have been inserted already, as before we look
++ * for orphan roots, log replay might have happened, which
++ * triggers a transaction commit and qgroup accounting, which
++ * in turn reads and inserts fs roots while doing backref
++ * walking.
++ */
++ root = btrfs_lookup_fs_root(tree_root->fs_info,
++ root_key.objectid);
++ if (root) {
++ WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
++ &root->state));
++ if (btrfs_root_refs(&root->root_item) == 0)
++ btrfs_add_dead_root(root);
++ continue;
++ }
++
+ root = btrfs_read_fs_root(tree_root, &root_key);
+ err = PTR_ERR_OR_ZERO(root);
+ if (err && err != -ENOENT) {
+@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
+ set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
+
+ err = btrfs_insert_fs_root(root->fs_info, root);
+- /*
+- * The root might have been inserted already, as before we look
+- * for orphan roots, log replay might have happened, which
+- * triggers a transaction commit and qgroup accounting, which
+- * in turn reads and inserts fs roots while doing backref
+- * walking.
+- */
+- if (err == -EEXIST)
+- err = 0;
+ if (err) {
++ BUG_ON(err == -EEXIST);
+ btrfs_free_fs_root(root);
+ break;
+ }
+diff --git a/fs/seq_file.c b/fs/seq_file.c
+index 19f532e7d35e..6dc4296eed62 100644
+--- a/fs/seq_file.c
++++ b/fs/seq_file.c
+@@ -223,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
+ size -= n;
+ buf += n;
+ copied += n;
+- if (!m->count)
++ if (!m->count) {
++ m->from = 0;
+ m->index++;
++ }
+ if (!size)
+ goto Done;
+ }
+diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
+index f35523d4fa3a..b803213d1307 100644
+--- a/fs/sysfs/file.c
++++ b/fs/sysfs/file.c
+@@ -114,9 +114,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
+ * If buf != of->prealloc_buf, we don't know how
+ * large it is, so cannot safely pass it to ->show
+ */
+- if (pos || WARN_ON_ONCE(buf != of->prealloc_buf))
++ if (WARN_ON_ONCE(buf != of->prealloc_buf))
+ return 0;
+ len = ops->show(kobj, of->kn->priv, buf);
++ if (pos) {
++ if (len <= pos)
++ return 0;
++ len -= pos;
++ memmove(buf, buf + pos, len);
++ }
+ return min(count, len);
+ }
+
+diff --git a/include/linux/acpi.h b/include/linux/acpi.h
+index 288fac5294f5..47f950856346 100644
+--- a/include/linux/acpi.h
++++ b/include/linux/acpi.h
+@@ -985,7 +985,7 @@ static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
+ return NULL;
+ }
+
+-#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, validate, data, fn) \
++#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \
+ static const void * __acpi_table_##name[] \
+ __attribute__((unused)) \
+ = { (void *) table_id, \
+diff --git a/include/linux/i8042.h b/include/linux/i8042.h
+index 0f9bafa17a02..d98780ca9604 100644
+--- a/include/linux/i8042.h
++++ b/include/linux/i8042.h
+@@ -62,7 +62,6 @@ struct serio;
+ void i8042_lock_chip(void);
+ void i8042_unlock_chip(void);
+ int i8042_command(unsigned char *param, int command);
+-bool i8042_check_port_owner(const struct serio *);
+ int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+ struct serio *serio));
+ int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+@@ -83,11 +82,6 @@ static inline int i8042_command(unsigned char *param, int command)
+ return -ENODEV;
+ }
+
+-static inline bool i8042_check_port_owner(const struct serio *serio)
+-{
+- return false;
+-}
+-
+ static inline int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+ struct serio *serio))
+ {
+diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
+index 64184d27e3cd..d641a18abacb 100644
+--- a/include/linux/mfd/cros_ec.h
++++ b/include/linux/mfd/cros_ec.h
+@@ -226,6 +226,21 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
+ struct cros_ec_command *msg);
+
+ /**
++ * cros_ec_cmd_xfer_status - Send a command to the ChromeOS EC
++ *
++ * This function is identical to cros_ec_cmd_xfer, except it returns success
++ * status only if both the command was transmitted successfully and the EC
++ * replied with success status. It's not necessary to check msg->result when
++ * using this function.
++ *
++ * @ec_dev: EC device
++ * @msg: Message to write
++ * @return: Num. of bytes transferred on success, <0 on failure
++ */
++int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
++ struct cros_ec_command *msg);
++
++/**
+ * cros_ec_remove - Remove a ChromeOS EC
+ *
+ * Call this to deregister a ChromeOS EC, then clean up any private data.
+diff --git a/include/linux/msi.h b/include/linux/msi.h
+index 8b425c66305a..ec39a086f1f6 100644
+--- a/include/linux/msi.h
++++ b/include/linux/msi.h
+@@ -264,12 +264,12 @@ enum {
+ * callbacks.
+ */
+ MSI_FLAG_USE_DEF_CHIP_OPS = (1 << 1),
+- /* Build identity map between hwirq and irq */
+- MSI_FLAG_IDENTITY_MAP = (1 << 2),
+ /* Support multiple PCI MSI interrupts */
+- MSI_FLAG_MULTI_PCI_MSI = (1 << 3),
++ MSI_FLAG_MULTI_PCI_MSI = (1 << 2),
+ /* Support PCI MSIX interrupts */
+- MSI_FLAG_PCI_MSIX = (1 << 4),
++ MSI_FLAG_PCI_MSIX = (1 << 3),
++ /* Needs early activate, required for PCI */
++ MSI_FLAG_ACTIVATE_EARLY = (1 << 4),
+ };
+
+ int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
+diff --git a/include/linux/serio.h b/include/linux/serio.h
+index df4ab5de1586..c733cff44e18 100644
+--- a/include/linux/serio.h
++++ b/include/linux/serio.h
+@@ -31,7 +31,8 @@ struct serio {
+
+ struct serio_device_id id;
+
+- spinlock_t lock; /* protects critical sections from port's interrupt handler */
++ /* Protects critical sections from port's interrupt handler */
++ spinlock_t lock;
+
+ int (*write)(struct serio *, unsigned char);
+ int (*open)(struct serio *);
+@@ -40,16 +41,29 @@ struct serio {
+ void (*stop)(struct serio *);
+
+ struct serio *parent;
+- struct list_head child_node; /* Entry in parent->children list */
++ /* Entry in parent->children list */
++ struct list_head child_node;
+ struct list_head children;
+- unsigned int depth; /* level of nesting in serio hierarchy */
++ /* Level of nesting in serio hierarchy */
++ unsigned int depth;
+
+- struct serio_driver *drv; /* accessed from interrupt, must be protected by serio->lock and serio->sem */
+- struct mutex drv_mutex; /* protects serio->drv so attributes can pin driver */
++ /*
++ * serio->drv is accessed from interrupt handlers; when modifying
++ * caller should acquire serio->drv_mutex and serio->lock.
++ */
++ struct serio_driver *drv;
++ /* Protects serio->drv so attributes can pin current driver */
++ struct mutex drv_mutex;
+
+ struct device dev;
+
+ struct list_head node;
++
++ /*
++ * For use by PS/2 layer when several ports share hardware and
++ * may get indigestion when exposed to concurrent access (i8042).
++ */
++ struct mutex *ps2_cmd_mutex;
+ };
+ #define to_serio_port(d) container_of(d, struct serio, dev)
+
+diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
+index 51440131d337..28c5da6fdfac 100644
+--- a/include/trace/events/timer.h
++++ b/include/trace/events/timer.h
+@@ -330,24 +330,32 @@ TRACE_EVENT(itimer_expire,
+ #ifdef CONFIG_NO_HZ_COMMON
+
+ #define TICK_DEP_NAMES \
+- tick_dep_name(NONE) \
++ tick_dep_mask_name(NONE) \
+ tick_dep_name(POSIX_TIMER) \
+ tick_dep_name(PERF_EVENTS) \
+ tick_dep_name(SCHED) \
+ tick_dep_name_end(CLOCK_UNSTABLE)
+
+ #undef tick_dep_name
++#undef tick_dep_mask_name
+ #undef tick_dep_name_end
+
+-#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+-#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
++/* The MASK will convert to their bits and they need to be processed too */
++#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
++ TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
++#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
++ TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
++/* NONE only has a mask defined for it */
++#define tick_dep_mask_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+
+ TICK_DEP_NAMES
+
+ #undef tick_dep_name
++#undef tick_dep_mask_name
+ #undef tick_dep_name_end
+
+ #define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
++#define tick_dep_mask_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
+ #define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
+
+ #define show_tick_dep_name(val) \
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 43d43a2d5811..e68c0a735c8f 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -242,18 +242,6 @@ unlock:
+ return ret;
+ }
+
+-static void event_function_local(struct perf_event *event, event_f func, void *data)
+-{
+- struct event_function_struct efs = {
+- .event = event,
+- .func = func,
+- .data = data,
+- };
+-
+- int ret = event_function(&efs);
+- WARN_ON_ONCE(ret);
+-}
+-
+ static void event_function_call(struct perf_event *event, event_f func, void *data)
+ {
+ struct perf_event_context *ctx = event->ctx;
+@@ -303,6 +291,54 @@ again:
+ raw_spin_unlock_irq(&ctx->lock);
+ }
+
++/*
++ * Similar to event_function_call() + event_function(), but hard assumes IRQs
++ * are already disabled and we're on the right CPU.
++ */
++static void event_function_local(struct perf_event *event, event_f func, void *data)
++{
++ struct perf_event_context *ctx = event->ctx;
++ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
++ struct task_struct *task = READ_ONCE(ctx->task);
++ struct perf_event_context *task_ctx = NULL;
++
++ WARN_ON_ONCE(!irqs_disabled());
++
++ if (task) {
++ if (task == TASK_TOMBSTONE)
++ return;
++
++ task_ctx = ctx;
++ }
++
++ perf_ctx_lock(cpuctx, task_ctx);
++
++ task = ctx->task;
++ if (task == TASK_TOMBSTONE)
++ goto unlock;
++
++ if (task) {
++ /*
++ * We must be either inactive or active and the right task,
++ * otherwise we're screwed, since we cannot IPI to somewhere
++ * else.
++ */
++ if (ctx->is_active) {
++ if (WARN_ON_ONCE(task != current))
++ goto unlock;
++
++ if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
++ goto unlock;
++ }
++ } else {
++ WARN_ON_ONCE(&cpuctx->ctx != ctx);
++ }
++
++ func(event, cpuctx, ctx, data);
++unlock:
++ perf_ctx_unlock(cpuctx, task_ctx);
++}
++
+ #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
+ PERF_FLAG_FD_OUTPUT |\
+ PERF_FLAG_PID_CGROUP |\
+diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
+index b7a525ab2083..8c50276b60d1 100644
+--- a/kernel/events/uprobes.c
++++ b/kernel/events/uprobes.c
+@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+ err = -EAGAIN;
+ ptep = page_check_address(page, mm, addr, &ptl, 0);
+- if (!ptep)
++ if (!ptep) {
++ mem_cgroup_cancel_charge(kpage, memcg, false);
+ goto unlock;
++ }
+
+ get_page(kpage);
+ page_add_new_anon_rmap(kpage, vma, addr, false);
+@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
+
+ err = 0;
+ unlock:
+- mem_cgroup_cancel_charge(kpage, memcg, false);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+ unlock_page(page);
+ return err;
+diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
+index 38e89ce7b071..0afe671f1770 100644
+--- a/kernel/irq/msi.c
++++ b/kernel/irq/msi.c
+@@ -324,7 +324,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
+ struct msi_domain_ops *ops = info->ops;
+ msi_alloc_info_t arg;
+ struct msi_desc *desc;
+- int i, ret, virq = -1;
++ int i, ret, virq;
+
+ ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
+ if (ret)
+@@ -332,12 +332,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
+
+ for_each_msi_entry(desc, dev) {
+ ops->set_desc(&arg, desc);
+- if (info->flags & MSI_FLAG_IDENTITY_MAP)
+- virq = (int)ops->get_hwirq(info, &arg);
+- else
+- virq = -1;
+
+- virq = __irq_domain_alloc_irqs(domain, virq, desc->nvec_used,
++ virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
+ dev_to_node(dev), &arg, false);
+ if (virq < 0) {
+ ret = -ENOSPC;
+@@ -361,6 +357,17 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
+ else
+ dev_dbg(dev, "irq [%d-%d] for MSI\n",
+ virq, virq + desc->nvec_used - 1);
++ /*
++ * This flag is set by the PCI layer as we need to activate
++ * the MSI entries before the PCI layer enables MSI in the
++ * card. Otherwise the card latches a random msi message.
++ */
++ if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
++ struct irq_data *irq_data;
++
++ irq_data = irq_domain_get_irq_data(domain, desc->irq);
++ irq_domain_activate_irq(irq_data);
++ }
+ }
+
+ return 0;
+diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
+index 75f98c5498d5..a24cfb41d1fc 100644
+--- a/kernel/sched/cputime.c
++++ b/kernel/sched/cputime.c
+@@ -603,19 +603,25 @@ static void cputime_adjust(struct task_cputime *curr,
+ stime = curr->stime;
+ utime = curr->utime;
+
+- if (utime == 0) {
+- stime = rtime;
++ /*
++ * If either stime or both stime and utime are 0, assume all runtime is
++ * userspace. Once a task gets some ticks, the monotonicy code at
++ * 'update' will ensure things converge to the observed ratio.
++ */
++ if (stime == 0) {
++ utime = rtime;
+ goto update;
+ }
+
+- if (stime == 0) {
+- utime = rtime;
++ if (utime == 0) {
++ stime = rtime;
+ goto update;
+ }
+
+ stime = scale_stime((__force u64)stime, (__force u64)rtime,
+ (__force u64)(stime + utime));
+
++update:
+ /*
+ * Make sure stime doesn't go backwards; this preserves monotonicity
+ * for utime because rtime is monotonic.
+@@ -638,7 +644,6 @@ static void cputime_adjust(struct task_cputime *curr,
+ stime = rtime - utime;
+ }
+
+-update:
+ prev->stime = stime;
+ prev->utime = utime;
+ out:
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 343a2b7e57aa..07f28251f6e0 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2872,7 +2872,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
+ struct page *page;
+ pgtable_t pgtable;
+ pmd_t _pmd;
+- bool young, write, dirty;
++ bool young, write, dirty, soft_dirty;
+ unsigned long addr;
+ int i;
+
+@@ -2898,6 +2898,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
+ write = pmd_write(*pmd);
+ young = pmd_young(*pmd);
+ dirty = pmd_dirty(*pmd);
++ soft_dirty = pmd_soft_dirty(*pmd);
+
+ pmdp_huge_split_prepare(vma, haddr, pmd);
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+@@ -2914,6 +2915,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
+ swp_entry_t swp_entry;
+ swp_entry = make_migration_entry(page + i, write);
+ entry = swp_entry_to_pte(swp_entry);
++ if (soft_dirty)
++ entry = pte_swp_mksoft_dirty(entry);
+ } else {
+ entry = mk_pte(page + i, vma->vm_page_prot);
+ entry = maybe_mkwrite(entry, vma);
+@@ -2921,6 +2924,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
+ entry = pte_wrprotect(entry);
+ if (!young)
+ entry = pte_mkold(entry);
++ if (soft_dirty)
++ entry = pte_mksoft_dirty(entry);
+ }
+ if (dirty)
+ SetPageDirty(page + i);
+diff --git a/mm/readahead.c b/mm/readahead.c
+index 40be3ae0afe3..7f9f8c3464e2 100644
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -8,6 +8,7 @@
+ */
+
+ #include <linux/kernel.h>
++#include <linux/dax.h>
+ #include <linux/gfp.h>
+ #include <linux/export.h>
+ #include <linux/blkdev.h>
+@@ -545,6 +546,14 @@ do_readahead(struct address_space *mapping, struct file *filp,
+ if (!mapping || !mapping->a_ops)
+ return -EINVAL;
+
++ /*
++ * Readahead doesn't make sense for DAX inodes, but we don't want it
++ * to report a failure either. Instead, we just return success and
++ * don't do any work.
++ */
++ if (dax_mapping(mapping))
++ return 0;
++
+ return force_page_cache_readahead(mapping, filp, index, nr);
+ }
+
+diff --git a/mm/slub.c b/mm/slub.c
+index 825ff4505336..7a6d268840c0 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -3479,6 +3479,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
+ */
+ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
+ {
++ LIST_HEAD(discard);
+ struct page *page, *h;
+
+ BUG_ON(irqs_disabled());
+@@ -3486,13 +3487,16 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
+ list_for_each_entry_safe(page, h, &n->partial, lru) {
+ if (!page->inuse) {
+ remove_partial(n, page);
+- discard_slab(s, page);
++ list_add(&page->lru, &discard);
+ } else {
+ list_slab_objects(s, page,
+ "Objects remaining in %s on __kmem_cache_shutdown()");
+ }
+ }
+ spin_unlock_irq(&n->list_lock);
++
++ list_for_each_entry_safe(page, h, &discard, lru)
++ discard_slab(s, page);
+ }
+
+ /*
+diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
+index 0c12e4001f19..8cc49c04458a 100644
+--- a/net/mac80211/cfg.c
++++ b/net/mac80211/cfg.c
+@@ -869,7 +869,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
+
+ /* free all potentially still buffered bcast frames */
+ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
+- skb_queue_purge(&sdata->u.ap.ps.bc_buf);
++ ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
+
+ mutex_lock(&local->mtx);
+ ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
+diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
+index 203044379ce0..8bad2ad81399 100644
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -365,7 +365,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
+ skb = skb_dequeue(&ps->bc_buf);
+ if (skb) {
+ purged++;
+- dev_kfree_skb(skb);
++ ieee80211_free_txskb(&local->hw, skb);
+ }
+ total += skb_queue_len(&ps->bc_buf);
+ }
+@@ -448,7 +448,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
+ if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) {
+ ps_dbg(tx->sdata,
+ "BC TX buffer full - dropping the oldest frame\n");
+- dev_kfree_skb(skb_dequeue(&ps->bc_buf));
++ ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf));
+ } else
+ tx->local->total_ps_buffered++;
+
+@@ -4055,7 +4055,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
+ sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
+ if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb))
+ break;
+- dev_kfree_skb_any(skb);
++ ieee80211_free_txskb(hw, skb);
+ }
+
+ info = IEEE80211_SKB_CB(skb);
+diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
+index e64ae93d5b4f..bf4b0e98f5b5 100644
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -340,12 +340,14 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
+ }
+
+ static struct gss_upcall_msg *
+-__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
++__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth)
+ {
+ struct gss_upcall_msg *pos;
+ list_for_each_entry(pos, &pipe->in_downcall, list) {
+ if (!uid_eq(pos->uid, uid))
+ continue;
++ if (auth && pos->auth->service != auth->service)
++ continue;
+ atomic_inc(&pos->count);
+ dprintk("RPC: %s found msg %p\n", __func__, pos);
+ return pos;
+@@ -365,7 +367,7 @@ gss_add_msg(struct gss_upcall_msg *gss_msg)
+ struct gss_upcall_msg *old;
+
+ spin_lock(&pipe->lock);
+- old = __gss_find_upcall(pipe, gss_msg->uid);
++ old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth);
+ if (old == NULL) {
+ atomic_inc(&gss_msg->count);
+ list_add(&gss_msg->list, &pipe->in_downcall);
+@@ -714,7 +716,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ err = -ENOENT;
+ /* Find a matching upcall */
+ spin_lock(&pipe->lock);
+- gss_msg = __gss_find_upcall(pipe, uid);
++ gss_msg = __gss_find_upcall(pipe, uid, NULL);
+ if (gss_msg == NULL) {
+ spin_unlock(&pipe->lock);
+ goto err_put_ctx;
+diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
+index e7fd76975d86..66c9d63f4797 100644
+--- a/net/sunrpc/xprtmultipath.c
++++ b/net/sunrpc/xprtmultipath.c
+@@ -271,14 +271,12 @@ struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi,
+ xprt_switch_find_xprt_t find_next)
+ {
+ struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
+- struct list_head *head;
+
+ if (xps == NULL)
+ return NULL;
+- head = &xps->xps_xprt_list;
+- if (xps->xps_nxprts < 2)
+- return xprt_switch_find_first_entry(head);
+- return xprt_switch_set_next_cursor(head, &xpi->xpi_cursor, find_next);
++ return xprt_switch_set_next_cursor(&xps->xps_xprt_list,
++ &xpi->xpi_cursor,
++ find_next);
+ }
+
+ static
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index 7e2b2fa189c3..167cf59318ed 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -2278,6 +2278,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+ /* SYN_SENT! */
+ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
++ break;
++ case -EADDRNOTAVAIL:
++ /* Source port number is unavailable. Try a new one! */
++ transport->srcport = 0;
+ }
+ out:
+ return ret;
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index 89dacf9b4e6c..160c7f713722 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -906,20 +906,23 @@ static int azx_resume(struct device *dev)
+ struct snd_card *card = dev_get_drvdata(dev);
+ struct azx *chip;
+ struct hda_intel *hda;
++ struct hdac_bus *bus;
+
+ if (!card)
+ return 0;
+
+ chip = card->private_data;
+ hda = container_of(chip, struct hda_intel, chip);
++ bus = azx_bus(chip);
+ if (chip->disabled || hda->init_failed || !chip->running)
+ return 0;
+
+- if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL
+- && hda->need_i915_power) {
+- snd_hdac_display_power(azx_bus(chip), true);
+- snd_hdac_i915_set_bclk(azx_bus(chip));
++ if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
++ snd_hdac_display_power(bus, true);
++ if (hda->need_i915_power)
++ snd_hdac_i915_set_bclk(bus);
+ }
++
+ if (chip->msi)
+ if (pci_enable_msi(pci) < 0)
+ chip->msi = 0;
+@@ -929,6 +932,11 @@ static int azx_resume(struct device *dev)
+
+ hda_intel_init_chip(chip, true);
+
++ /* power down again for link-controlled chips */
++ if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
++ !hda->need_i915_power)
++ snd_hdac_display_power(bus, false);
++
+ snd_power_change_state(card, SNDRV_CTL_POWER_D0);
+
+ trace_azx_resume(chip);
+@@ -1008,6 +1016,7 @@ static int azx_runtime_resume(struct device *dev)
+
+ chip = card->private_data;
+ hda = container_of(chip, struct hda_intel, chip);
++ bus = azx_bus(chip);
+ if (chip->disabled || hda->init_failed)
+ return 0;
+
+@@ -1015,15 +1024,9 @@ static int azx_runtime_resume(struct device *dev)
+ return 0;
+
+ if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
+- bus = azx_bus(chip);
+- if (hda->need_i915_power) {
+- snd_hdac_display_power(bus, true);
++ snd_hdac_display_power(bus, true);
++ if (hda->need_i915_power)
+ snd_hdac_i915_set_bclk(bus);
+- } else {
+- /* toggle codec wakeup bit for STATESTS read */
+- snd_hdac_set_codec_wakeup(bus, true);
+- snd_hdac_set_codec_wakeup(bus, false);
+- }
+ }
+
+ /* Read STATESTS before controller reset */
+@@ -1043,6 +1046,11 @@ static int azx_runtime_resume(struct device *dev)
+ azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) &
+ ~STATESTS_INT_MASK);
+
++ /* power down again for link-controlled chips */
++ if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
++ !hda->need_i915_power)
++ snd_hdac_display_power(bus, false);
++
+ trace_azx_runtime_resume(chip);
+ return 0;
+ }
+diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c
+index 204cc074adb9..41aa3355e920 100644
+--- a/sound/usb/line6/pcm.c
++++ b/sound/usb/line6/pcm.c
+@@ -55,7 +55,6 @@ static int snd_line6_impulse_volume_put(struct snd_kcontrol *kcontrol,
+ err = line6_pcm_acquire(line6pcm, LINE6_STREAM_IMPULSE);
+ if (err < 0) {
+ line6pcm->impulse_volume = 0;
+- line6_pcm_release(line6pcm, LINE6_STREAM_IMPULSE);
+ return err;
+ }
+ } else {
+@@ -211,7 +210,9 @@ static void line6_stream_stop(struct snd_line6_pcm *line6pcm, int direction,
+ spin_lock_irqsave(&pstr->lock, flags);
+ clear_bit(type, &pstr->running);
+ if (!pstr->running) {
++ spin_unlock_irqrestore(&pstr->lock, flags);
+ line6_unlink_audio_urbs(line6pcm, pstr);
++ spin_lock_irqsave(&pstr->lock, flags);
+ if (direction == SNDRV_PCM_STREAM_CAPTURE) {
+ line6pcm->prev_fbuf = NULL;
+ line6pcm->prev_fsize = 0;
+diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c
+index daf81d169a42..45dd34874f43 100644
+--- a/sound/usb/line6/pod.c
++++ b/sound/usb/line6/pod.c
+@@ -244,8 +244,8 @@ static int pod_set_system_param_int(struct usb_line6_pod *pod, int value,
+ static ssize_t serial_number_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
+- struct usb_interface *interface = to_usb_interface(dev);
+- struct usb_line6_pod *pod = usb_get_intfdata(interface);
++ struct snd_card *card = dev_to_snd_card(dev);
++ struct usb_line6_pod *pod = card->private_data;
+
+ return sprintf(buf, "%u\n", pod->serial_number);
+ }
+@@ -256,8 +256,8 @@ static ssize_t serial_number_show(struct device *dev,
+ static ssize_t firmware_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
+- struct usb_interface *interface = to_usb_interface(dev);
+- struct usb_line6_pod *pod = usb_get_intfdata(interface);
++ struct snd_card *card = dev_to_snd_card(dev);
++ struct usb_line6_pod *pod = card->private_data;
+
+ return sprintf(buf, "%d.%02d\n", pod->firmware_version / 100,
+ pod->firmware_version % 100);
+@@ -269,8 +269,8 @@ static ssize_t firmware_version_show(struct device *dev,
+ static ssize_t device_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
+- struct usb_interface *interface = to_usb_interface(dev);
+- struct usb_line6_pod *pod = usb_get_intfdata(interface);
++ struct snd_card *card = dev_to_snd_card(dev);
++ struct usb_line6_pod *pod = card->private_data;
+
+ return sprintf(buf, "%d\n", pod->device_id);
+ }
+diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
+index 6adde457b602..6cf1f3597455 100644
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -1128,6 +1128,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
+ {
+ /* devices which do not support reading the sample rate. */
+ switch (chip->usb_id) {
++ case USB_ID(0x041E, 0x4080): /* Creative Live Cam VF0610 */
+ case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */
+ case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+ case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */
+@@ -1138,6 +1139,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
+ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
+ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
+ case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
++ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
+ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
+ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
+ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
+diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
+index a07b9605e93b..853b26d85a76 100644
+--- a/tools/perf/arch/x86/util/intel-pt.c
++++ b/tools/perf/arch/x86/util/intel-pt.c
+@@ -501,7 +501,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
+- bool have_timing_info;
++ bool have_timing_info, need_immediate = false;
+ struct perf_evsel *evsel, *intel_pt_evsel = NULL;
+ const struct cpu_map *cpus = evlist->cpus;
+ bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+@@ -655,6 +655,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
+ ptr->have_sched_switch = 3;
+ } else {
+ opts->record_switch_events = true;
++ need_immediate = true;
+ if (cpu_wide)
+ ptr->have_sched_switch = 3;
+ else
+@@ -700,6 +701,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+
++ if (need_immediate)
++ tracking_evsel->immediate = true;
++
+ /* In per-cpu case, always need the time of mmap events etc */
+ if (!cpu_map__empty(cpus)) {
+ perf_evsel__set_sample_bit(tracking_evsel, TIME);
+diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
+index 1dc140c5481d..3f02bea102b1 100644
+--- a/tools/perf/builtin-mem.c
++++ b/tools/perf/builtin-mem.c
+@@ -87,6 +87,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
+ if (mem->operation & MEM_OPERATION_LOAD)
+ perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+
++ if (mem->operation & MEM_OPERATION_STORE)
++ perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
++
+ if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+ rec_argv[i++] = "-W";
+
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
+index 87a297dd8901..c33efb9d49f5 100644
+--- a/tools/perf/util/symbol-elf.c
++++ b/tools/perf/util/symbol-elf.c
+@@ -827,7 +827,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
+ sec = syms_ss->symtab;
+ shdr = syms_ss->symshdr;
+
+- if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
++ if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
++ ".text", NULL))
+ dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
+
+ if (runtime_ss->opdsec)
+diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
+index c919866853a0..1bfacc8e509a 100644
+--- a/tools/testing/nvdimm/test/nfit.c
++++ b/tools/testing/nvdimm/test/nfit.c
+@@ -13,6 +13,7 @@
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ #include <linux/platform_device.h>
+ #include <linux/dma-mapping.h>
++#include <linux/workqueue.h>
+ #include <linux/libnvdimm.h>
+ #include <linux/vmalloc.h>
+ #include <linux/device.h>
+@@ -1480,6 +1481,7 @@ static int nfit_test_probe(struct platform_device *pdev)
+ if (nfit_test->setup != nfit_test0_setup)
+ return 0;
+
++ flush_work(&acpi_desc->work);
+ nfit_test->setup_hotplug = 1;
+ nfit_test->setup(nfit_test);
+
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-09-07 19:22 Tom Wijsman
0 siblings, 0 replies; 20+ messages in thread
From: Tom Wijsman @ 2016-09-07 19:22 UTC (permalink / raw
To: gentoo-commits
commit: faa13db0418c5cff069ffd3f6df60308dcfaf77b
Author: Tom Wijsman (TomWij) <TomWij <AT> gentoo <DOT> org>
AuthorDate: Wed Sep 7 20:55:34 2016 +0000
Commit: Tom Wijsman <tomwij <AT> gentoo <DOT> org>
CommitDate: Wed Sep 7 20:57:11 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=faa13db0
Removed experimental gcc <4.9 optimization patch, it conflicts with experimental gcc >=4.9 optimization patch; as GCC 4.9.3 is stable in tree
0000_README | 4 -
...able-additional-cpu-optimizations-for-gcc.patch | 327 ---------------------
2 files changed, 331 deletions(-)
diff --git a/0000_README b/0000_README
index d857e6a..c4afba8 100644
--- a/0000_README
+++ b/0000_README
@@ -75,10 +75,6 @@ Patch: 4567_distro-Gentoo-Kconfig.patch
From: Tom Wijsman <TomWij@gentoo.org>
Desc: Add Gentoo Linux support config settings and defaults.
-Patch: 5000_enable-additional-cpu-optimizations-for-gcc.patch
-From: https://github.com/graysky2/kernel_gcc_patch/
-Desc: Kernel patch enables gcc < v4.9 optimizations for additional CPUs.
-
Patch: 5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch
From: http://algo.ing.unimo.it/people/paolo/disk_sched/
Desc: BFQ v7r11 patch 1 for 4.7: Build, cgroups and kconfig bits
diff --git a/5000_enable-additional-cpu-optimizations-for-gcc.patch b/5000_enable-additional-cpu-optimizations-for-gcc.patch
deleted file mode 100644
index f7ab6f0..0000000
--- a/5000_enable-additional-cpu-optimizations-for-gcc.patch
+++ /dev/null
@@ -1,327 +0,0 @@
-This patch has been tested on and known to work with kernel versions from 3.2
-up to the latest git version (pulled on 12/14/2013).
-
-This patch will expand the number of microarchitectures to include new
-processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
-14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
-Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 2nd Gen Core
-i3/i5/i7 (Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), and Intel 4th
-Gen Core i3/i5/i7 (Haswell). It also offers the compiler the 'native' flag.
-
-Small but real speed increases are measurable using a make endpoint comparing
-a generic kernel to one built with one of the respective microarchs.
-
-See the following experimental evidence supporting this statement:
-https://github.com/graysky2/kernel_gcc_patch
-
-REQUIREMENTS
-linux version >=3.15
-gcc version <4.9
-
----
-diff -uprN a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
---- a/arch/x86/include/asm/module.h 2013-11-03 18:41:51.000000000 -0500
-+++ b/arch/x86/include/asm/module.h 2013-12-15 06:21:24.351122516 -0500
-@@ -15,6 +15,16 @@
- #define MODULE_PROC_FAMILY "586MMX "
- #elif defined CONFIG_MCORE2
- #define MODULE_PROC_FAMILY "CORE2 "
-+#elif defined CONFIG_MNATIVE
-+#define MODULE_PROC_FAMILY "NATIVE "
-+#elif defined CONFIG_MCOREI7
-+#define MODULE_PROC_FAMILY "COREI7 "
-+#elif defined CONFIG_MCOREI7AVX
-+#define MODULE_PROC_FAMILY "COREI7AVX "
-+#elif defined CONFIG_MCOREAVXI
-+#define MODULE_PROC_FAMILY "COREAVXI "
-+#elif defined CONFIG_MCOREAVX2
-+#define MODULE_PROC_FAMILY "COREAVX2 "
- #elif defined CONFIG_MATOM
- #define MODULE_PROC_FAMILY "ATOM "
- #elif defined CONFIG_M686
-@@ -33,6 +43,18 @@
- #define MODULE_PROC_FAMILY "K7 "
- #elif defined CONFIG_MK8
- #define MODULE_PROC_FAMILY "K8 "
-+#elif defined CONFIG_MK10
-+#define MODULE_PROC_FAMILY "K10 "
-+#elif defined CONFIG_MBARCELONA
-+#define MODULE_PROC_FAMILY "BARCELONA "
-+#elif defined CONFIG_MBOBCAT
-+#define MODULE_PROC_FAMILY "BOBCAT "
-+#elif defined CONFIG_MBULLDOZER
-+#define MODULE_PROC_FAMILY "BULLDOZER "
-+#elif defined CONFIG_MPILEDRIVER
-+#define MODULE_PROC_FAMILY "PILEDRIVER "
-+#elif defined CONFIG_MJAGUAR
-+#define MODULE_PROC_FAMILY "JAGUAR "
- #elif defined CONFIG_MELAN
- #define MODULE_PROC_FAMILY "ELAN "
- #elif defined CONFIG_MCRUSOE
-diff -uprN a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
---- a/arch/x86/Kconfig.cpu 2013-11-03 18:41:51.000000000 -0500
-+++ b/arch/x86/Kconfig.cpu 2013-12-15 06:21:24.351122516 -0500
-@@ -139,7 +139,7 @@ config MPENTIUM4
-
-
- config MK6
-- bool "K6/K6-II/K6-III"
-+ bool "AMD K6/K6-II/K6-III"
- depends on X86_32
- ---help---
- Select this for an AMD K6-family processor. Enables use of
-@@ -147,7 +147,7 @@ config MK6
- flags to GCC.
-
- config MK7
-- bool "Athlon/Duron/K7"
-+ bool "AMD Athlon/Duron/K7"
- depends on X86_32
- ---help---
- Select this for an AMD Athlon K7-family processor. Enables use of
-@@ -155,12 +155,55 @@ config MK7
- flags to GCC.
-
- config MK8
-- bool "Opteron/Athlon64/Hammer/K8"
-+ bool "AMD Opteron/Athlon64/Hammer/K8"
- ---help---
- Select this for an AMD Opteron or Athlon64 Hammer-family processor.
- Enables use of some extended instructions, and passes appropriate
- optimization flags to GCC.
-
-+config MK10
-+ bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
-+ ---help---
-+ Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
-+ Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
-+ Enables use of some extended instructions, and passes appropriate
-+ optimization flags to GCC.
-+
-+config MBARCELONA
-+ bool "AMD Barcelona"
-+ ---help---
-+ Select this for AMD Barcelona and newer processors.
-+
-+ Enables -march=barcelona
-+
-+config MBOBCAT
-+ bool "AMD Bobcat"
-+ ---help---
-+ Select this for AMD Bobcat processors.
-+
-+ Enables -march=btver1
-+
-+config MBULLDOZER
-+ bool "AMD Bulldozer"
-+ ---help---
-+ Select this for AMD Bulldozer processors.
-+
-+ Enables -march=bdver1
-+
-+config MPILEDRIVER
-+ bool "AMD Piledriver"
-+ ---help---
-+ Select this for AMD Piledriver processors.
-+
-+ Enables -march=bdver2
-+
-+config MJAGUAR
-+ bool "AMD Jaguar"
-+ ---help---
-+ Select this for AMD Jaguar processors.
-+
-+ Enables -march=btver2
-+
- config MCRUSOE
- bool "Crusoe"
- depends on X86_32
-@@ -251,8 +294,17 @@ config MPSC
- using the cpu family field
- in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
-
-+config MATOM
-+ bool "Intel Atom"
-+ ---help---
-+
-+ Select this for the Intel Atom platform. Intel Atom CPUs have an
-+ in-order pipelining architecture and thus can benefit from
-+ accordingly optimized code. Use a recent GCC with specific Atom
-+ support in order to fully benefit from selecting this option.
-+
- config MCORE2
-- bool "Core 2/newer Xeon"
-+ bool "Intel Core 2"
- ---help---
-
- Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
-@@ -260,14 +312,40 @@ config MCORE2
- family in /proc/cpuinfo. Newer ones have 6 and older ones 15
- (not a typo)
-
--config MATOM
-- bool "Intel Atom"
-+ Enables -march=core2
-+
-+config MCOREI7
-+ bool "Intel Core i7"
- ---help---
-
-- Select this for the Intel Atom platform. Intel Atom CPUs have an
-- in-order pipelining architecture and thus can benefit from
-- accordingly optimized code. Use a recent GCC with specific Atom
-- support in order to fully benefit from selecting this option.
-+ Select this for the Intel Nehalem platform. Intel Nehalem proecessors
-+ include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors.
-+
-+ Enables -march=corei7
-+
-+config MCOREI7AVX
-+ bool "Intel Core 2nd Gen AVX"
-+ ---help---
-+
-+ Select this for 2nd Gen Core processors including Sandy Bridge.
-+
-+ Enables -march=corei7-avx
-+
-+config MCOREAVXI
-+ bool "Intel Core 3rd Gen AVX"
-+ ---help---
-+
-+ Select this for 3rd Gen Core processors including Ivy Bridge.
-+
-+ Enables -march=core-avx-i
-+
-+config MCOREAVX2
-+ bool "Intel Core AVX2"
-+ ---help---
-+
-+ Select this for AVX2 enabled processors including Haswell.
-+
-+ Enables -march=core-avx2
-
- config GENERIC_CPU
- bool "Generic-x86-64"
-@@ -276,6 +354,19 @@ config GENERIC_CPU
- Generic x86-64 CPU.
- Run equally well on all x86-64 CPUs.
-
-+config MNATIVE
-+ bool "Native optimizations autodetected by GCC"
-+ ---help---
-+
-+ GCC 4.2 and above support -march=native, which automatically detects
-+ the optimum settings to use based on your processor. -march=native
-+ also detects and applies additional settings beyond -march specific
-+ to your CPU, (eg. -msse4). Unless you have a specific reason not to
-+ (e.g. distcc cross-compiling), you should probably be using
-+ -march=native rather than anything listed below.
-+
-+ Enables -march=native
-+
- endchoice
-
- config X86_GENERIC
-@@ -300,7 +391,7 @@ config X86_INTERNODE_CACHE_SHIFT
- config X86_L1_CACHE_SHIFT
- int
- default "7" if MPENTIUM4 || MPSC
-- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-+ default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU
- default "4" if MELAN || M486 || MGEODEGX1
- default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-
-@@ -331,11 +422,11 @@ config X86_ALIGNMENT_16
-
- config X86_INTEL_USERCOPY
- def_bool y
-- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
-+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2
-
- config X86_USE_PPRO_CHECKSUM
- def_bool y
-- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
-+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE
-
- config X86_USE_3DNOW
- def_bool y
-@@ -363,17 +454,17 @@ config X86_P6_NOP
-
- config X86_TSC
- def_bool y
-- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
-+ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) || X86_64 || MNATIVE
-
- config X86_CMPXCHG64
- def_bool y
-- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
-+ depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
-
- # this should be set for all -march=.. options where the compiler
- # generates cmov.
- config X86_CMOV
- def_bool y
-- depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
-+ depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
-
- config X86_MINIMUM_CPU_FAMILY
- int
-diff -uprN a/arch/x86/Makefile b/arch/x86/Makefile
---- a/arch/x86/Makefile 2013-11-03 18:41:51.000000000 -0500
-+++ b/arch/x86/Makefile 2013-12-15 06:21:24.354455723 -0500
-@@ -61,11 +61,26 @@ else
- KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
-
- # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
-+ cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
- cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
-+ cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
-+ cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
-+ cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
-+ cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
-+ cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
-+ cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
- cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-
- cflags-$(CONFIG_MCORE2) += \
-- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
-+ $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
-+ cflags-$(CONFIG_MCOREI7) += \
-+ $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7))
-+ cflags-$(CONFIG_MCOREI7AVX) += \
-+ $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx))
-+ cflags-$(CONFIG_MCOREAVXI) += \
-+ $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i))
-+ cflags-$(CONFIG_MCOREAVX2) += \
-+ $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2))
- cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
- cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
-diff -uprN a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
---- a/arch/x86/Makefile_32.cpu 2013-11-03 18:41:51.000000000 -0500
-+++ b/arch/x86/Makefile_32.cpu 2013-12-15 06:21:24.354455723 -0500
-@@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6) += -march=k6
- # Please note, that patches that add -march=athlon-xp and friends are pointless.
- # They make zero difference whatsosever to performance at this time.
- cflags-$(CONFIG_MK7) += -march=athlon
-+cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
- cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
-+cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon)
-+cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon)
-+cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon)
-+cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon)
-+cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon)
-+cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
- cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
- cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
- cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
-@@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
- cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
- cflags-$(CONFIG_MVIAC7) += -march=i686
- cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
-+cflags-$(CONFIG_MCOREI7) += -march=i686 $(call tune,corei7)
-+cflags-$(CONFIG_MCOREI7AVX) += -march=i686 $(call tune,corei7-avx)
-+cflags-$(CONFIG_MCOREAVXI) += -march=i686 $(call tune,core-avx-i)
-+cflags-$(CONFIG_MCOREAVX2) += -march=i686 $(call tune,core-avx2)
- cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-09-13 12:16 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-09-13 12:16 UTC (permalink / raw
To: gentoo-commits
commit: 0dc47d59a59df4a74907657b3df5e618198088ee
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Tue Sep 13 12:16:05 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Tue Sep 13 12:16:05 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=0dc47d59
BFQ version v8r2.
0000_README | 4 +-
...oups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch | 2 +-
...ntroduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1 | 2 +-
...arly-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch | 2 +-
...-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for-4.patch1 | 217 +++++++++++----------
5 files changed, 114 insertions(+), 113 deletions(-)
diff --git a/0000_README b/0000_README
index 6a88eb0..f502025 100644
--- a/0000_README
+++ b/0000_README
@@ -91,9 +91,9 @@ Patch: 5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch
From: http://algo.ing.unimo.it/people/paolo/disk_sched/
Desc: BFQ v7r11 patch 3 for 4.7: Early Queue Merge (EQM)
-Patch: 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1
+Patch: 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for-4.patch1
From: http://algo.ing.unimo.it/people/paolo/disk_sched/
-Desc: BFQ v7r11 patch 4 for 4.7: Early Queue Merge (EQM)
+Desc: BFQ v8r3 patch 4 for 4.7: Early Queue Merge (EQM)
Patch: 5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
From: https://github.com/graysky2/kernel_gcc_patch/
diff --git a/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch b/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch
index 45d0b07..ff75a8b 100644
--- a/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch
+++ b/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.patch
@@ -1,4 +1,4 @@
-From feb58b4dd1e8fd895f28ba4c759e92febe316cb2 Mon Sep 17 00:00:00 2001
+From 22ee35ec82fa543b65c1b6d516a086a21f723846 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Tue, 7 Apr 2015 13:39:12 +0200
Subject: [PATCH 1/4] block: cgroups, kconfig, build bits for BFQ-v7r11-4.7.0
diff --git a/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1 b/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1
index 8a67a4b..368a4ff 100644
--- a/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1
+++ b/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.patch1
@@ -1,4 +1,4 @@
-From 1f07b3f666e6da78d10e62cfb9696242e5b3005e Mon Sep 17 00:00:00 2001
+From 2aae32be2a18a7d0da104ae42c08cb9bce9d9c7c Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Thu, 9 May 2013 19:10:02 +0200
Subject: [PATCH 2/4] block: introduce the BFQ-v7r11 I/O sched for 4.7.0
diff --git a/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch b/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch
index eb23acc..a9876aa 100644
--- a/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch
+++ b/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.7.patch
@@ -1,4 +1,4 @@
-From d93e55da4df8c5e7c33379780ad7d2fdb02e0568 Mon Sep 17 00:00:00 2001
+From 47de1e46ef5f462e9694e5b0607aec6ad658f1e0 Mon Sep 17 00:00:00 2001
From: Mauro Andreolini <mauro.andreolini@unimore.it>
Date: Sun, 6 Sep 2015 16:09:05 +0200
Subject: [PATCH 3/4] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r11 for
diff --git a/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1 b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for-4.patch1
similarity index 97%
rename from 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1
rename to 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for-4.patch1
index cbc051f..bf56ac7 100644
--- a/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r2-for-4.patch1
+++ b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for-4.patch1
@@ -1,16 +1,16 @@
-From 0061399c3c07fb8d119c0d581b613b870e63b165 Mon Sep 17 00:00:00 2001
+From d384ccf796a992e27691b7359ce54534db57e74c Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 17 May 2016 08:28:04 +0200
-Subject: [PATCH 4/4] block, bfq: turn BFQ-v7r11 for 4.7.0 into BFQ-v8r2 for
+Subject: [PATCH 4/4] block, bfq: turn BFQ-v7r11 for 4.7.0 into BFQ-v8r3 for
4.7.0
---
block/Kconfig.iosched | 2 +-
block/bfq-cgroup.c | 480 +++++----
- block/bfq-iosched.c | 2601 +++++++++++++++++++++++++++++--------------------
+ block/bfq-iosched.c | 2602 +++++++++++++++++++++++++++++--------------------
block/bfq-sched.c | 441 +++++++--
block/bfq.h | 708 +++++++-------
- 5 files changed, 2483 insertions(+), 1749 deletions(-)
+ 5 files changed, 2484 insertions(+), 1749 deletions(-)
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index f78cd1a..6d92579 100644
@@ -881,7 +881,7 @@ index 5ee99ec..c83d90c 100644
static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
-index d1f648d..5bff378 100644
+index d1f648d..3bc1f8b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7,25 +7,26 @@
@@ -1368,7 +1368,7 @@ index d1f648d..5bff378 100644
}
/*
-@@ -856,25 +875,497 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -856,25 +875,498 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* queue. Then we add bfqq to the burst.
*/
bfq_add_to_burst(bfqd, bfqq);
@@ -1603,9 +1603,10 @@ index d1f648d..5bff378 100644
+ jiffies,
+ jiffies_to_msecs(bfqq->wr_cur_max_time));
+ } else if (old_wr_coeff > 1) {
-+ if (interactive) /* update wr duration */
++ if (interactive) { /* update wr coeff and duration */
++ bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
-+ else if (in_burst) {
++ } else if (in_burst) {
+ bfqq->wr_coeff = 1;
+ bfq_log_bfqq(bfqd, bfqq,
+ "wrais ending at %lu, rais_max_time %u",
@@ -1870,7 +1871,7 @@ index d1f648d..5bff378 100644
*/
prev = bfqq->next_rq;
next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
-@@ -887,160 +1378,10 @@ static void bfq_add_request(struct request *rq)
+@@ -887,160 +1379,10 @@ static void bfq_add_request(struct request *rq)
if (prev != bfqq->next_rq)
bfq_pos_tree_add_move(bfqd, bfqq);
@@ -2035,7 +2036,7 @@ index d1f648d..5bff378 100644
if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
time_is_before_jiffies(
bfqq->last_wr_start_finish +
-@@ -1049,16 +1390,43 @@ add_bfqq_busy:
+@@ -1049,16 +1391,43 @@ add_bfqq_busy:
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
bfqd->wr_busy_queues++;
@@ -2083,7 +2084,7 @@ index d1f648d..5bff378 100644
if (bfqd->low_latency &&
(old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
bfqq->last_wr_start_finish = jiffies;
-@@ -1106,6 +1474,9 @@ static void bfq_remove_request(struct request *rq)
+@@ -1106,6 +1475,9 @@ static void bfq_remove_request(struct request *rq)
struct bfq_data *bfqd = bfqq->bfqd;
const int sync = rq_is_sync(rq);
@@ -2093,7 +2094,7 @@ index d1f648d..5bff378 100644
if (bfqq->next_rq == rq) {
bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
bfq_updated_next_req(bfqd, bfqq);
-@@ -1119,8 +1490,25 @@ static void bfq_remove_request(struct request *rq)
+@@ -1119,8 +1491,25 @@ static void bfq_remove_request(struct request *rq)
elv_rb_del(&bfqq->sort_list, rq);
if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
@@ -2120,7 +2121,7 @@ index d1f648d..5bff378 100644
/*
* Remove queue from request-position tree as it is empty.
*/
-@@ -1134,9 +1522,7 @@ static void bfq_remove_request(struct request *rq)
+@@ -1134,9 +1523,7 @@ static void bfq_remove_request(struct request *rq)
BUG_ON(bfqq->meta_pending == 0);
bfqq->meta_pending--;
}
@@ -2130,7 +2131,7 @@ index d1f648d..5bff378 100644
}
static int bfq_merge(struct request_queue *q, struct request **req,
-@@ -1221,21 +1607,25 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
+@@ -1221,21 +1608,25 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
bfqq->next_rq = rq;
bfq_remove_request(next);
@@ -2159,7 +2160,7 @@ index d1f648d..5bff378 100644
}
static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
-@@ -1278,7 +1668,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request,
+@@ -1278,7 +1669,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request,
sector_t sector)
{
return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
@@ -2168,7 +2169,7 @@ index d1f648d..5bff378 100644
}
static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
-@@ -1400,7 +1790,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+@@ -1400,7 +1791,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
* throughput.
*/
bfqq->new_bfqq = new_bfqq;
@@ -2177,7 +2178,7 @@ index d1f648d..5bff378 100644
return new_bfqq;
}
-@@ -1431,9 +1821,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+@@ -1431,9 +1822,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
}
/*
@@ -2204,7 +2205,7 @@ index d1f648d..5bff378 100644
* structure otherwise.
*
* The OOM queue is not allowed to participate to cooperation: in fact, since
-@@ -1442,6 +1846,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+@@ -1442,6 +1847,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
* handle merging with the OOM queue would be quite complex and expensive
* to maintain. Besides, in such a critical condition as an out of memory,
* the benefits of queue merging may be little relevant, or even negligible.
@@ -2223,7 +2224,7 @@ index d1f648d..5bff378 100644
*/
static struct bfq_queue *
bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-@@ -1451,16 +1867,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1451,16 +1868,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (bfqq->new_bfqq)
return bfqq->new_bfqq;
@@ -2259,7 +2260,7 @@ index d1f648d..5bff378 100644
unlikely(in_service_bfqq == &bfqd->oom_bfqq))
goto check_scheduled;
-@@ -1482,7 +1914,15 @@ check_scheduled:
+@@ -1482,7 +1915,15 @@ check_scheduled:
BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent);
@@ -2276,7 +2277,7 @@ index d1f648d..5bff378 100644
bfq_may_be_close_cooperator(bfqq, new_bfqq))
return bfq_setup_merge(bfqq, new_bfqq);
-@@ -1498,46 +1938,11 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+@@ -1498,46 +1939,11 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
*/
if (!bfqq->bic)
return;
@@ -2324,7 +2325,7 @@ index d1f648d..5bff378 100644
}
static void bfq_get_bic_reference(struct bfq_queue *bfqq)
-@@ -1562,6 +1967,40 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+@@ -1562,6 +1968,40 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
if (bfq_bfqq_IO_bound(bfqq))
bfq_mark_bfqq_IO_bound(new_bfqq);
bfq_clear_bfqq_IO_bound(bfqq);
@@ -2365,7 +2366,7 @@ index d1f648d..5bff378 100644
/*
* Grab a reference to the bic, to prevent it from being destroyed
* before being possibly touched by a bfq_split_bfqq().
-@@ -1588,18 +2027,6 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+@@ -1588,18 +2028,6 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_put_queue(bfqq);
}
@@ -2384,7 +2385,7 @@ index d1f648d..5bff378 100644
static int bfq_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
-@@ -1637,30 +2064,86 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+@@ -1637,30 +2065,86 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
* to decide whether bio and rq can be merged.
*/
bfqq = new_bfqq;
@@ -2477,7 +2478,7 @@ index d1f648d..5bff378 100644
bfqd->in_service_queue = bfqq;
}
-@@ -1676,31 +2159,6 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
+@@ -1676,31 +2160,6 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
return bfqq;
}
@@ -2509,7 +2510,7 @@ index d1f648d..5bff378 100644
static void bfq_arm_slice_timer(struct bfq_data *bfqd)
{
struct bfq_queue *bfqq = bfqd->in_service_queue;
-@@ -1725,62 +2183,34 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+@@ -1725,62 +2184,34 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
* being too ill-treated, grant them a small fraction of the
* assigned budget before reducing the waiting time to
* BFQ_MIN_TT. This happened to help reduce latency.
@@ -2593,7 +2594,7 @@ index d1f648d..5bff378 100644
struct bfq_queue *bfqq = RQ_BFQQ(rq);
/*
-@@ -1794,15 +2224,9 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
+@@ -1794,15 +2225,9 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
* incrementing bfqq->dispatched.
*/
bfqq->dispatched++;
@@ -2610,7 +2611,7 @@ index d1f648d..5bff378 100644
}
/*
-@@ -1822,18 +2246,12 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
+@@ -1822,18 +2247,12 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
rq = rq_entry_fifo(bfqq->fifo.next);
@@ -2630,7 +2631,7 @@ index d1f648d..5bff378 100644
static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
BUG_ON(bfqq != bfqd->in_service_queue);
-@@ -1850,12 +2268,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+@@ -1850,12 +2269,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_mark_bfqq_split_coop(bfqq);
if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
@@ -2652,7 +2653,7 @@ index d1f648d..5bff378 100644
bfq_del_bfqq_busy(bfqd, bfqq, 1);
} else {
bfq_activate_bfqq(bfqd, bfqq);
-@@ -1882,10 +2303,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1882,10 +2304,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
struct request *next_rq;
int budget, min_budget;
@@ -2674,7 +2675,7 @@ index d1f648d..5bff378 100644
bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
-@@ -1894,7 +2324,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1894,7 +2325,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
@@ -2683,7 +2684,7 @@ index d1f648d..5bff378 100644
switch (reason) {
/*
* Caveat: in all the following cases we trade latency
-@@ -1936,14 +2366,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1936,14 +2367,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
break;
case BFQ_BFQQ_BUDGET_TIMEOUT:
/*
@@ -2702,7 +2703,7 @@ index d1f648d..5bff378 100644
*/
budget = min(budget * 2, bfqd->bfq_max_budget);
break;
-@@ -1960,17 +2386,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1960,17 +2387,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
budget = min(budget * 4, bfqd->bfq_max_budget);
break;
case BFQ_BFQQ_NO_MORE_REQUESTS:
@@ -2759,7 +2760,7 @@ index d1f648d..5bff378 100644
*/
budget = bfqd->bfq_max_budget;
-@@ -1981,65 +2439,105 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+@@ -1981,65 +2440,105 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
/*
@@ -2894,7 +2895,7 @@ index d1f648d..5bff378 100644
/*
* Calculate the bandwidth for the last slice. We use a 64 bit
-@@ -2048,32 +2546,51 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -2048,32 +2547,51 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* and to avoid overflows.
*/
bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
@@ -2962,7 +2963,7 @@ index d1f648d..5bff378 100644
}
update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
-@@ -2086,9 +2603,8 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -2086,9 +2604,8 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
int dev_type = blk_queue_nonrot(bfqd->queue);
if (bfqd->bfq_user_max_budget == 0) {
bfqd->bfq_max_budget =
@@ -2974,7 +2975,7 @@ index d1f648d..5bff378 100644
bfqd->bfq_max_budget);
}
if (bfqd->device_speed == BFQ_BFQD_FAST &&
-@@ -2102,38 +2618,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -2102,38 +2619,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqd->RT_prod = R_fast[dev_type] *
T_fast[dev_type];
}
@@ -3038,7 +3039,7 @@ index d1f648d..5bff378 100644
}
/*
-@@ -2191,6 +2704,15 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -2191,6 +2705,15 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
@@ -3054,7 +3055,7 @@ index d1f648d..5bff378 100644
return max(bfqq->last_idle_bklogged +
HZ * bfqq->service_from_backlogged /
bfqd->bfq_wr_max_softrt_rate,
-@@ -2198,13 +2720,21 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+@@ -2198,13 +2721,21 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
}
/*
@@ -3081,7 +3082,7 @@ index d1f648d..5bff378 100644
}
/**
-@@ -2214,28 +2744,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now)
+@@ -2214,28 +2745,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now)
* @compensate: if true, compensate for the time spent idling.
* @reason: the reason causing the expiration.
*
@@ -3127,7 +3128,7 @@ index d1f648d..5bff378 100644
*/
static void bfq_bfqq_expire(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
-@@ -2243,40 +2769,53 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2243,40 +2770,53 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
enum bfqq_expiration reason)
{
bool slow;
@@ -3203,7 +3204,7 @@ index d1f648d..5bff378 100644
bfq_clear_bfqq_IO_bound(bfqq);
if (bfqd->low_latency && bfqq->wr_coeff == 1)
-@@ -2285,19 +2824,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2285,19 +2825,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
RB_EMPTY_ROOT(&bfqq->sort_list)) {
/*
@@ -3235,7 +3236,7 @@ index d1f648d..5bff378 100644
/*
* The application is still waiting for the
* completion of one or more requests:
-@@ -2314,7 +2857,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2314,7 +2858,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
* happened to be in the past.
*/
bfqq->soft_rt_next_start =
@@ -3244,7 +3245,7 @@ index d1f648d..5bff378 100644
/*
* Schedule an update of soft_rt_next_start to when
* the task may be discovered to be isochronous.
-@@ -2324,15 +2867,27 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2324,15 +2868,27 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
}
bfq_log_bfqq(bfqd, bfqq,
@@ -3274,7 +3275,7 @@ index d1f648d..5bff378 100644
}
/*
-@@ -2342,20 +2897,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+@@ -2342,20 +2898,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
*/
static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
{
@@ -3303,7 +3304,7 @@ index d1f648d..5bff378 100644
static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
{
bfq_log_bfqq(bfqq->bfqd, bfqq,
-@@ -2397,10 +2949,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2397,10 +2950,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
bool idling_boosts_thr, idling_boosts_thr_without_issues,
@@ -3317,7 +3318,7 @@ index d1f648d..5bff378 100644
/*
* The next variable takes into account the cases where idling
* boosts the throughput.
-@@ -2422,7 +2976,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2422,7 +2977,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
*/
idling_boosts_thr = !bfqd->hw_tag ||
(!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
@@ -3326,7 +3327,7 @@ index d1f648d..5bff378 100644
/*
* The value of the next variable,
-@@ -2463,74 +3017,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2463,74 +3018,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
bfqd->wr_busy_queues == 0;
/*
@@ -3420,7 +3421,7 @@ index d1f648d..5bff378 100644
* (i) each of these processes must get the same throughput as
* the others;
* (ii) all these processes have the same I/O pattern
-@@ -2552,26 +3059,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2552,26 +3060,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* words, only if sub-condition (i) holds, then idling is
* allowed, and the device tends to be prevented from queueing
* many requests, possibly of several processes. The reason
@@ -3494,7 +3495,7 @@ index d1f648d..5bff378 100644
*
* According to the above considerations, the next variable is
* true (only) if sub-condition (i) holds. To compute the
-@@ -2579,7 +3113,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2579,7 +3114,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* the function bfq_symmetric_scenario(), but also check
* whether bfqq is being weight-raised, because
* bfq_symmetric_scenario() does not take into account also
@@ -3503,7 +3504,7 @@ index d1f648d..5bff378 100644
* bfq_weights_tree_add()).
*
* As a side note, it is worth considering that the above
-@@ -2601,17 +3135,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2601,17 +3136,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* bfqq. Such a case is when bfqq became active in a burst of
* queue activations. Queues that became active during a large
* burst benefit only from throughput, as discussed in the
@@ -3526,7 +3527,7 @@ index d1f648d..5bff378 100644
/*
* We have now all the components we need to compute the return
-@@ -2621,6 +3154,14 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2621,6 +3155,14 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* 2) idling either boosts the throughput (without issues), or
* is necessary to preserve service guarantees.
*/
@@ -3541,7 +3542,7 @@ index d1f648d..5bff378 100644
return bfq_bfqq_sync(bfqq) &&
(idling_boosts_thr_without_issues ||
idling_needed_for_service_guarantees);
-@@ -2632,7 +3173,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+@@ -2632,7 +3174,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* 1) the queue must remain in service and cannot be expired, and
* 2) the device must be idled to wait for the possible arrival of a new
* request for the queue.
@@ -3550,7 +3551,7 @@ index d1f648d..5bff378 100644
* why performing device idling is the best choice to boost the throughput
* and preserve service guarantees when bfq_bfqq_may_idle itself
* returns true.
-@@ -2698,9 +3239,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+@@ -2698,9 +3240,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
*/
bfq_clear_bfqq_wait_request(bfqq);
del_timer(&bfqd->idle_slice_timer);
@@ -3560,7 +3561,7 @@ index d1f648d..5bff378 100644
}
goto keep_queue;
}
-@@ -2745,14 +3284,11 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+@@ -2745,14 +3285,11 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
/*
@@ -3578,7 +3579,7 @@ index d1f648d..5bff378 100644
time_is_before_jiffies(bfqq->last_wr_start_finish +
bfqq->wr_cur_max_time)) {
bfqq->last_wr_start_finish = jiffies;
-@@ -2811,13 +3347,29 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+@@ -2811,13 +3348,29 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
*/
if (!bfqd->rq_in_driver)
bfq_schedule_dispatch(bfqd);
@@ -3608,7 +3609,7 @@ index d1f648d..5bff378 100644
bfq_update_wr_data(bfqd, bfqq);
bfq_log_bfqq(bfqd, bfqq,
-@@ -2833,9 +3385,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+@@ -2833,9 +3386,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
bfqd->in_service_bic = RQ_BIC(rq);
}
@@ -3619,7 +3620,7 @@ index d1f648d..5bff378 100644
goto expire;
return dispatched;
-@@ -2881,8 +3431,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd)
+@@ -2881,8 +3432,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd)
st = bfq_entity_service_tree(&bfqq->entity);
dispatched += __bfq_forced_dispatch_bfqq(bfqq);
@@ -3629,7 +3630,7 @@ index d1f648d..5bff378 100644
bfq_forget_idle(st);
}
-@@ -2895,9 +3445,9 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+@@ -2895,9 +3446,9 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
{
struct bfq_data *bfqd = q->elevator->elevator_data;
struct bfq_queue *bfqq;
@@ -3640,7 +3641,7 @@ index d1f648d..5bff378 100644
if (bfqd->busy_queues == 0)
return 0;
-@@ -2908,21 +3458,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+@@ -2908,21 +3459,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
if (!bfqq)
return 0;
@@ -3663,7 +3664,7 @@ index d1f648d..5bff378 100644
bfq_clear_bfqq_wait_request(bfqq);
BUG_ON(timer_pending(&bfqd->idle_slice_timer));
-@@ -2933,6 +3469,8 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+@@ -2933,6 +3470,8 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
bfq_log_bfqq(bfqd, bfqq, "dispatched %s request",
bfq_bfqq_sync(bfqq) ? "sync" : "async");
@@ -3672,7 +3673,7 @@ index d1f648d..5bff378 100644
return 1;
}
-@@ -2944,23 +3482,22 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+@@ -2944,23 +3483,22 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
*/
static void bfq_put_queue(struct bfq_queue *bfqq)
{
@@ -3701,7 +3702,7 @@ index d1f648d..5bff378 100644
if (bfq_bfqq_sync(bfqq))
/*
-@@ -2973,7 +3510,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+@@ -2973,7 +3511,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
*/
hlist_del_init(&bfqq->burst_list_node);
@@ -3710,7 +3711,7 @@ index d1f648d..5bff378 100644
kmem_cache_free(bfq_pool, bfqq);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
-@@ -3007,8 +3544,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+@@ -3007,8 +3545,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_schedule_dispatch(bfqd);
}
@@ -3720,7 +3721,7 @@ index d1f648d..5bff378 100644
bfq_put_cooperator(bfqq);
-@@ -3019,26 +3555,7 @@ static void bfq_init_icq(struct io_cq *icq)
+@@ -3019,26 +3556,7 @@ static void bfq_init_icq(struct io_cq *icq)
{
struct bfq_io_cq *bic = icq_to_bic(icq);
@@ -3748,7 +3749,7 @@ index d1f648d..5bff378 100644
}
static void bfq_exit_icq(struct io_cq *icq)
-@@ -3046,21 +3563,21 @@ static void bfq_exit_icq(struct io_cq *icq)
+@@ -3046,21 +3564,21 @@ static void bfq_exit_icq(struct io_cq *icq)
struct bfq_io_cq *bic = icq_to_bic(icq);
struct bfq_data *bfqd = bic_to_bfqd(bic);
@@ -3777,7 +3778,7 @@ index d1f648d..5bff378 100644
}
}
-@@ -3068,7 +3585,8 @@ static void bfq_exit_icq(struct io_cq *icq)
+@@ -3068,7 +3586,8 @@ static void bfq_exit_icq(struct io_cq *icq)
* Update the entity prio values; note that the new values will not
* be used until the next (re)activation.
*/
@@ -3787,7 +3788,7 @@ index d1f648d..5bff378 100644
{
struct task_struct *tsk = current;
int ioprio_class;
-@@ -3100,7 +3618,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
+@@ -3100,7 +3619,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
break;
}
@@ -3796,7 +3797,7 @@ index d1f648d..5bff378 100644
printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
bfqq->new_ioprio);
BUG();
-@@ -3108,45 +3626,40 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
+@@ -3108,45 +3627,40 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
bfqq->entity.prio_changed = 1;
@@ -3856,7 +3857,7 @@ index d1f648d..5bff378 100644
}
static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-@@ -3155,8 +3668,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3155,8 +3669,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
RB_CLEAR_NODE(&bfqq->entity.rb_node);
INIT_LIST_HEAD(&bfqq->fifo);
INIT_HLIST_NODE(&bfqq->burst_list_node);
@@ -3867,7 +3868,7 @@ index d1f648d..5bff378 100644
bfqq->bfqd = bfqd;
if (bic)
-@@ -3166,6 +3680,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3166,6 +3681,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfq_class_idle(bfqq))
bfq_mark_bfqq_idle_window(bfqq);
bfq_mark_bfqq_sync(bfqq);
@@ -3875,7 +3876,7 @@ index d1f648d..5bff378 100644
} else
bfq_clear_bfqq_sync(bfqq);
bfq_mark_bfqq_IO_bound(bfqq);
-@@ -3175,72 +3690,17 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3175,72 +3691,17 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqq->pid = pid;
bfqq->wr_coeff = 1;
@@ -3954,7 +3955,7 @@ index d1f648d..5bff378 100644
}
static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
-@@ -3263,44 +3723,60 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+@@ -3263,44 +3724,60 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
}
static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
@@ -4033,7 +4034,7 @@ index d1f648d..5bff378 100644
return bfqq;
}
-@@ -3316,37 +3792,21 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
+@@ -3316,37 +3793,21 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
bic->ttime.ttime_samples;
}
@@ -4084,7 +4085,7 @@ index d1f648d..5bff378 100644
}
/*
-@@ -3364,7 +3824,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
+@@ -3364,7 +3825,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
return;
/* Idle window just restored, statistics are meaningless. */
@@ -4094,7 +4095,7 @@ index d1f648d..5bff378 100644
return;
enable_idle = bfq_bfqq_idle_window(bfqq);
-@@ -3404,22 +3865,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3404,22 +3866,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_update_io_thinktime(bfqd, bic);
bfq_update_io_seektime(bfqd, bfqq, rq);
@@ -4119,7 +4120,7 @@ index d1f648d..5bff378 100644
bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
-@@ -3433,14 +3885,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3433,14 +3886,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* is small and the queue is not to be expired, then
* just exit.
*
@@ -4143,7 +4144,7 @@ index d1f648d..5bff378 100644
*/
if (small_req && !budget_timeout)
return;
-@@ -3453,9 +3906,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3453,9 +3907,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
bfq_clear_bfqq_wait_request(bfqq);
del_timer(&bfqd->idle_slice_timer);
@@ -4153,7 +4154,7 @@ index d1f648d..5bff378 100644
/*
* The queue is not empty, because a new request just
-@@ -3499,27 +3950,19 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+@@ -3499,27 +3951,19 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
*/
new_bfqq->allocated[rq_data_dir(rq)]++;
bfqq->allocated[rq_data_dir(rq)]--;
@@ -4184,7 +4185,7 @@ index d1f648d..5bff378 100644
rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &bfqq->fifo);
-@@ -3528,8 +3971,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+@@ -3528,8 +3972,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
static void bfq_update_hw_tag(struct bfq_data *bfqd)
{
@@ -4195,7 +4196,7 @@ index d1f648d..5bff378 100644
if (bfqd->hw_tag == 1)
return;
-@@ -3555,48 +3998,45 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
+@@ -3555,48 +3999,45 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
{
struct bfq_queue *bfqq = RQ_BFQQ(rq);
struct bfq_data *bfqd = bfqq->bfqd;
@@ -4264,7 +4265,7 @@ index d1f648d..5bff378 100644
*/
if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
RB_EMPTY_ROOT(&bfqq->sort_list))
-@@ -3608,10 +4048,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
+@@ -3608,10 +4049,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
* or if we want to idle in case it has no pending requests.
*/
if (bfqd->in_service_queue == bfqq) {
@@ -4276,7 +4277,7 @@ index d1f648d..5bff378 100644
bfq_arm_slice_timer(bfqd);
goto out;
} else if (bfq_may_expire_for_budg_timeout(bfqq))
-@@ -3682,14 +4119,14 @@ static void bfq_put_request(struct request *rq)
+@@ -3682,14 +4120,14 @@ static void bfq_put_request(struct request *rq)
rq->elv.priv[1] = NULL;
bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
@@ -4293,7 +4294,7 @@ index d1f648d..5bff378 100644
*/
static struct bfq_queue *
bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
-@@ -3727,11 +4164,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+@@ -3727,11 +4165,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
unsigned long flags;
bool split = false;
@@ -4306,7 +4307,7 @@ index d1f648d..5bff378 100644
if (!bic)
goto queue_fail;
-@@ -3741,23 +4175,47 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+@@ -3741,23 +4176,47 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
new_queue:
bfqq = bic_to_bfqq(bic, is_sync);
if (!bfqq || bfqq == &bfqd->oom_bfqq) {
@@ -4361,7 +4362,7 @@ index d1f648d..5bff378 100644
bfqq = bfq_split_bfqq(bic, bfqq);
split = true;
if (!bfqq)
-@@ -3766,9 +4224,8 @@ new_queue:
+@@ -3766,9 +4225,8 @@ new_queue:
}
bfqq->allocated[rw]++;
@@ -4373,7 +4374,7 @@ index d1f648d..5bff378 100644
rq->elv.priv[0] = bic;
rq->elv.priv[1] = bfqq;
-@@ -3783,7 +4240,6 @@ new_queue:
+@@ -3783,7 +4241,6 @@ new_queue:
if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
bfqq->bic = bic;
if (split) {
@@ -4381,7 +4382,7 @@ index d1f648d..5bff378 100644
/*
* If the queue has just been split from a shared
* queue, restore the idle window and the possible
-@@ -3793,6 +4249,9 @@ new_queue:
+@@ -3793,6 +4250,9 @@ new_queue:
}
}
@@ -4391,7 +4392,7 @@ index d1f648d..5bff378 100644
spin_unlock_irqrestore(q->queue_lock, flags);
return 0;
-@@ -3872,6 +4331,7 @@ static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
+@@ -3872,6 +4332,7 @@ static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
cancel_work_sync(&bfqd->unplug_work);
}
@@ -4399,7 +4400,7 @@ index d1f648d..5bff378 100644
static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
struct bfq_queue **bfqq_ptr)
{
-@@ -3880,9 +4340,9 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+@@ -3880,9 +4341,9 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
if (bfqq) {
@@ -4411,7 +4412,7 @@ index d1f648d..5bff378 100644
bfq_put_queue(bfqq);
*bfqq_ptr = NULL;
}
-@@ -3904,6 +4364,7 @@ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
+@@ -3904,6 +4365,7 @@ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
}
@@ -4419,7 +4420,7 @@ index d1f648d..5bff378 100644
static void bfq_exit_queue(struct elevator_queue *e)
{
-@@ -3923,8 +4384,6 @@ static void bfq_exit_queue(struct elevator_queue *e)
+@@ -3923,8 +4385,6 @@ static void bfq_exit_queue(struct elevator_queue *e)
bfq_shutdown_timer_wq(bfqd);
@@ -4428,7 +4429,7 @@ index d1f648d..5bff378 100644
BUG_ON(timer_pending(&bfqd->idle_slice_timer));
#ifdef CONFIG_BFQ_GROUP_IOSCHED
-@@ -3973,11 +4432,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -3973,11 +4433,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
* will not attempt to free it.
*/
bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
@@ -4444,7 +4445,7 @@ index d1f648d..5bff378 100644
/*
* Trigger weight initialization, according to ioprio, at the
* oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
-@@ -3996,9 +4458,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -3996,9 +4459,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
goto out_free;
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
@@ -4454,7 +4455,7 @@ index d1f648d..5bff378 100644
init_timer(&bfqd->idle_slice_timer);
bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
-@@ -4023,20 +4482,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -4023,20 +4483,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->bfq_back_penalty = bfq_back_penalty;
bfqd->bfq_slice_idle = bfq_slice_idle;
bfqd->bfq_class_idle_last_service = 0;
@@ -4482,7 +4483,7 @@ index d1f648d..5bff378 100644
bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
bfqd->bfq_wr_max_time = 0;
bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
-@@ -4048,16 +4506,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+@@ -4048,16 +4507,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
* video.
*/
bfqd->wr_busy_queues = 0;
@@ -4503,7 +4504,7 @@ index d1f648d..5bff378 100644
bfqd->device_speed = BFQ_BFQD_FAST;
return 0;
-@@ -4161,10 +4618,8 @@ SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
+@@ -4161,10 +4619,8 @@ SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
@@ -4516,7 +4517,7 @@ index d1f648d..5bff378 100644
SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
-@@ -4199,10 +4654,6 @@ STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
+@@ -4199,10 +4655,6 @@ STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
INT_MAX, 0);
STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
@@ -4527,7 +4528,7 @@ index d1f648d..5bff378 100644
STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
-@@ -4224,10 +4675,8 @@ static ssize_t bfq_weights_store(struct elevator_queue *e,
+@@ -4224,10 +4676,8 @@ static ssize_t bfq_weights_store(struct elevator_queue *e,
static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
{
@@ -4539,7 +4540,7 @@ index d1f648d..5bff378 100644
else
return bfq_default_max_budget;
}
-@@ -4252,6 +4701,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
+@@ -4252,6 +4702,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
return ret;
}
@@ -4550,7 +4551,7 @@ index d1f648d..5bff378 100644
static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
const char *page, size_t count)
{
-@@ -4264,13 +4717,31 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+@@ -4264,13 +4718,31 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
else if (__data > INT_MAX)
__data = INT_MAX;
@@ -4583,7 +4584,7 @@ index d1f648d..5bff378 100644
static ssize_t bfq_low_latency_store(struct elevator_queue *e,
const char *page, size_t count)
{
-@@ -4297,9 +4768,8 @@ static struct elv_fs_entry bfq_attrs[] = {
+@@ -4297,9 +4769,8 @@ static struct elv_fs_entry bfq_attrs[] = {
BFQ_ATTR(back_seek_penalty),
BFQ_ATTR(slice_idle),
BFQ_ATTR(max_budget),
@@ -4594,7 +4595,7 @@ index d1f648d..5bff378 100644
BFQ_ATTR(low_latency),
BFQ_ATTR(wr_coeff),
BFQ_ATTR(wr_max_time),
-@@ -4342,9 +4812,28 @@ static struct elevator_type iosched_bfq = {
+@@ -4342,9 +4813,28 @@ static struct elevator_type iosched_bfq = {
.elevator_owner = THIS_MODULE,
};
@@ -4619,11 +4620,11 @@ index d1f648d..5bff378 100644
static int __init bfq_init(void)
{
int ret;
-+ char msg[50] = "BFQ I/O-scheduler: v8r2";
++ char msg[50] = "BFQ I/O-scheduler: v8r3";
/*
* Can be 0 on HZ < 1000 setups.
-@@ -4352,9 +4841,6 @@ static int __init bfq_init(void)
+@@ -4352,9 +4842,6 @@ static int __init bfq_init(void)
if (bfq_slice_idle == 0)
bfq_slice_idle = 1;
@@ -4633,7 +4634,7 @@ index d1f648d..5bff378 100644
#ifdef CONFIG_BFQ_GROUP_IOSCHED
ret = blkcg_policy_register(&blkcg_policy_bfq);
if (ret)
-@@ -4370,23 +4856,34 @@ static int __init bfq_init(void)
+@@ -4370,23 +4857,34 @@ static int __init bfq_init(void)
* installed on the reference devices (see the comments before the
* definitions of the two arrays).
*/
@@ -5425,13 +5426,13 @@ index a64fec1..7d73b9d 100644
bfqd->wr_busy_queues++;
}
diff --git a/block/bfq.h b/block/bfq.h
-index f73c942..c6ba099 100644
+index f73c942..49d28b9 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
/*
- * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes.
-+ * BFQ-v8r2 for 4.7.0: data structures and common functions prototypes.
++ * BFQ-v8r3 for 4.7.0: data structures and common functions prototypes.
*
* Based on ideas and code from CFQ:
* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-09-15 14:55 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-09-15 14:55 UTC (permalink / raw
To: gentoo-commits
commit: 8c5fb835790a108b9ab7d774b0bf85719dadc05f
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Thu Sep 15 14:55:32 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Thu Sep 15 14:55:32 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=8c5fb835
Linux patch 4.7.4
0000_README | 4 +
1003_linux-4.7.4.patch | 2424 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 2428 insertions(+)
diff --git a/0000_README b/0000_README
index f502025..2b11683 100644
--- a/0000_README
+++ b/0000_README
@@ -55,6 +55,10 @@ Patch: 1002_linux-4.7.3.patch
From: http://www.kernel.org
Desc: Linux 4.7.3
+Patch: 1003_linux-4.7.4.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.4
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1003_linux-4.7.4.patch b/1003_linux-4.7.4.patch
new file mode 100644
index 0000000..5c75a93
--- /dev/null
+++ b/1003_linux-4.7.4.patch
@@ -0,0 +1,2424 @@
+diff --git a/Makefile b/Makefile
+index 4afff18fcb12..ec3bd119fbf8 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 3
++SUBLEVEL = 4
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
+index 60078a67d7e3..b15e1c158bda 100644
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -1597,6 +1597,9 @@ void __init enable_IR_x2apic(void)
+ unsigned long flags;
+ int ret, ir_stat;
+
++ if (skip_ioapic_setup)
++ return;
++
+ ir_stat = irq_remapping_prepare();
+ if (ir_stat < 0 && !x2apic_supported())
+ return;
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 2475b1c72773..b993f88280a9 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -515,7 +515,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
+
+ void blk_set_queue_dying(struct request_queue *q)
+ {
+- queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
++ spin_lock_irq(q->queue_lock);
++ queue_flag_set(QUEUE_FLAG_DYING, q);
++ spin_unlock_irq(q->queue_lock);
+
+ if (q->mq_ops)
+ blk_mq_wake_waiters(q);
+diff --git a/block/blk-merge.c b/block/blk-merge.c
+index 261353166dcf..bea93441a9c6 100644
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -94,9 +94,31 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
+ bool do_split = true;
+ struct bio *new = NULL;
+ const unsigned max_sectors = get_max_io_size(q, bio);
++ unsigned bvecs = 0;
+
+ bio_for_each_segment(bv, bio, iter) {
+ /*
++ * With arbitrary bio size, the incoming bio may be very
++ * big. We have to split the bio into small bios so that
++ * each holds at most BIO_MAX_PAGES bvecs because
++ * bio_clone() can fail to allocate big bvecs.
++ *
++ * It should have been better to apply the limit per
++ * request queue in which bio_clone() is involved,
++ * instead of globally. The biggest blocker is the
++ * bio_clone() in bio bounce.
++ *
++ * If bio is splitted by this reason, we should have
++ * allowed to continue bios merging, but don't do
++ * that now for making the change simple.
++ *
++ * TODO: deal with bio bounce's bio_clone() gracefully
++ * and convert the global limit into per-queue limit.
++ */
++ if (bvecs++ >= BIO_MAX_PAGES)
++ goto split;
++
++ /*
+ * If the queue doesn't support SG gaps and adding this
+ * offset would create a gap, disallow it.
+ */
+diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
+index 84708a5f8c52..b206115d761c 100644
+--- a/drivers/block/floppy.c
++++ b/drivers/block/floppy.c
+@@ -3663,11 +3663,6 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
+
+ opened_bdev[drive] = bdev;
+
+- if (!(mode & (FMODE_READ|FMODE_WRITE))) {
+- res = -EINVAL;
+- goto out;
+- }
+-
+ res = -ENXIO;
+
+ if (!floppy_track_buffer) {
+@@ -3711,20 +3706,21 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
+ if (UFDCS->rawcmd == 1)
+ UFDCS->rawcmd = 2;
+
+- UDRS->last_checked = 0;
+- clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+- check_disk_change(bdev);
+- if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+- goto out;
+- if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+- goto out;
+-
+- res = -EROFS;
+-
+- if ((mode & FMODE_WRITE) &&
+- !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
+- goto out;
+-
++ if (!(mode & FMODE_NDELAY)) {
++ if (mode & (FMODE_READ|FMODE_WRITE)) {
++ UDRS->last_checked = 0;
++ clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
++ check_disk_change(bdev);
++ if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
++ goto out;
++ if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
++ goto out;
++ }
++ res = -EROFS;
++ if ((mode & FMODE_WRITE) &&
++ !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
++ goto out;
++ }
+ mutex_unlock(&open_lock);
+ mutex_unlock(&floppy_mutex);
+ return 0;
+diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
+index 0bb44d5b5df4..2ee40fd360ca 100644
+--- a/drivers/cpufreq/cpufreq-dt-platdev.c
++++ b/drivers/cpufreq/cpufreq-dt-platdev.c
+@@ -74,6 +74,8 @@ static const struct of_device_id machines[] __initconst = {
+ { .compatible = "ti,omap5", },
+
+ { .compatible = "xlnx,zynq-7000", },
++
++ { }
+ };
+
+ static int __init cpufreq_dt_platdev_init(void)
+diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
+index 6dc597126b79..b3044219772c 100644
+--- a/drivers/crypto/caam/caamalg.c
++++ b/drivers/crypto/caam/caamalg.c
+@@ -556,7 +556,10 @@ skip_enc:
+
+ /* Read and write assoclen bytes */
+ append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+- append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
++ if (alg->caam.geniv)
++ append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
++ else
++ append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+ /* Skip assoc data */
+ append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+@@ -565,6 +568,14 @@ skip_enc:
+ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+ KEY_VLF);
+
++ if (alg->caam.geniv) {
++ append_seq_load(desc, ivsize, LDST_CLASS_1_CCB |
++ LDST_SRCDST_BYTE_CONTEXT |
++ (ctx1_iv_off << LDST_OFFSET_SHIFT));
++ append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO |
++ (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize);
++ }
++
+ /* Load Counter into CONTEXT1 reg */
+ if (is_rfc3686)
+ append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
+@@ -2150,7 +2161,7 @@ static void init_authenc_job(struct aead_request *req,
+
+ init_aead_job(req, edesc, all_contig, encrypt);
+
+- if (ivsize && (is_rfc3686 || !(alg->caam.geniv && encrypt)))
++ if (ivsize && ((is_rfc3686 && encrypt) || !alg->caam.geniv))
+ append_load_as_imm(desc, req->iv, ivsize,
+ LDST_CLASS_1_CCB |
+ LDST_SRCDST_BYTE_CONTEXT |
+@@ -2537,20 +2548,6 @@ static int aead_decrypt(struct aead_request *req)
+ return ret;
+ }
+
+-static int aead_givdecrypt(struct aead_request *req)
+-{
+- struct crypto_aead *aead = crypto_aead_reqtfm(req);
+- unsigned int ivsize = crypto_aead_ivsize(aead);
+-
+- if (req->cryptlen < ivsize)
+- return -EINVAL;
+-
+- req->cryptlen -= ivsize;
+- req->assoclen += ivsize;
+-
+- return aead_decrypt(req);
+-}
+-
+ /*
+ * allocate and map the ablkcipher extended descriptor for ablkcipher
+ */
+@@ -3210,7 +3207,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = MD5_DIGEST_SIZE,
+ },
+@@ -3256,7 +3253,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ },
+@@ -3302,7 +3299,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA224_DIGEST_SIZE,
+ },
+@@ -3348,7 +3345,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+@@ -3394,7 +3391,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA384_DIGEST_SIZE,
+ },
+@@ -3440,7 +3437,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA512_DIGEST_SIZE,
+ },
+@@ -3486,7 +3483,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = MD5_DIGEST_SIZE,
+ },
+@@ -3534,7 +3531,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ },
+@@ -3582,7 +3579,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA224_DIGEST_SIZE,
+ },
+@@ -3630,7 +3627,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+@@ -3678,7 +3675,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA384_DIGEST_SIZE,
+ },
+@@ -3726,7 +3723,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA512_DIGEST_SIZE,
+ },
+@@ -3772,7 +3769,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES_BLOCK_SIZE,
+ .maxauthsize = MD5_DIGEST_SIZE,
+ },
+@@ -3818,7 +3815,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ },
+@@ -3864,7 +3861,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES_BLOCK_SIZE,
+ .maxauthsize = SHA224_DIGEST_SIZE,
+ },
+@@ -3910,7 +3907,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+@@ -3956,7 +3953,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES_BLOCK_SIZE,
+ .maxauthsize = SHA384_DIGEST_SIZE,
+ },
+@@ -4002,7 +3999,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = DES_BLOCK_SIZE,
+ .maxauthsize = SHA512_DIGEST_SIZE,
+ },
+@@ -4051,7 +4048,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = CTR_RFC3686_IV_SIZE,
+ .maxauthsize = MD5_DIGEST_SIZE,
+ },
+@@ -4102,7 +4099,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = CTR_RFC3686_IV_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ },
+@@ -4153,7 +4150,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = CTR_RFC3686_IV_SIZE,
+ .maxauthsize = SHA224_DIGEST_SIZE,
+ },
+@@ -4204,7 +4201,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = CTR_RFC3686_IV_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+@@ -4255,7 +4252,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = CTR_RFC3686_IV_SIZE,
+ .maxauthsize = SHA384_DIGEST_SIZE,
+ },
+@@ -4306,7 +4303,7 @@ static struct caam_aead_alg driver_aeads[] = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+- .decrypt = aead_givdecrypt,
++ .decrypt = aead_decrypt,
+ .ivsize = CTR_RFC3686_IV_SIZE,
+ .maxauthsize = SHA512_DIGEST_SIZE,
+ },
+diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
+index 9bb99e274d23..79a05a3bc50b 100644
+--- a/drivers/gpu/drm/drm_atomic.c
++++ b/drivers/gpu/drm/drm_atomic.c
+@@ -465,7 +465,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
+ val,
+ -1,
+ &replaced);
+- state->color_mgmt_changed = replaced;
++ state->color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == config->ctm_property) {
+ ret = drm_atomic_replace_property_blob_from_id(crtc,
+@@ -473,7 +473,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
+ val,
+ sizeof(struct drm_color_ctm),
+ &replaced);
+- state->color_mgmt_changed = replaced;
++ state->color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == config->gamma_lut_property) {
+ ret = drm_atomic_replace_property_blob_from_id(crtc,
+@@ -481,7 +481,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
+ val,
+ -1,
+ &replaced);
+- state->color_mgmt_changed = replaced;
++ state->color_mgmt_changed |= replaced;
+ return ret;
+ } else if (crtc->funcs->atomic_set_property)
+ return crtc->funcs->atomic_set_property(crtc, state, property, val);
+diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
+index 0e3cc66aa8b7..a5cae1b6d57b 100644
+--- a/drivers/gpu/drm/drm_crtc.c
++++ b/drivers/gpu/drm/drm_crtc.c
+@@ -5312,6 +5312,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
+ struct drm_pending_vblank_event *e = NULL;
+ int ret = -EINVAL;
+
++ if (!drm_core_check_feature(dev, DRIVER_MODESET))
++ return -EINVAL;
++
+ if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS ||
+ page_flip->reserved != 0)
+ return -EINVAL;
+diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
+index eb4bb8b2f3a5..eb515f04eb9d 100644
+--- a/drivers/gpu/drm/msm/msm_gem_submit.c
++++ b/drivers/gpu/drm/msm/msm_gem_submit.c
+@@ -62,6 +62,14 @@ void msm_gem_submit_free(struct msm_gem_submit *submit)
+ kfree(submit);
+ }
+
++static inline unsigned long __must_check
++copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
++{
++ if (access_ok(VERIFY_READ, from, n))
++ return __copy_from_user_inatomic(to, from, n);
++ return -EFAULT;
++}
++
+ static int submit_lookup_objects(struct msm_gem_submit *submit,
+ struct drm_msm_gem_submit *args, struct drm_file *file)
+ {
+@@ -69,6 +77,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
+ int ret = 0;
+
+ spin_lock(&file->table_lock);
++ pagefault_disable();
+
+ for (i = 0; i < args->nr_bos; i++) {
+ struct drm_msm_gem_submit_bo submit_bo;
+@@ -82,10 +91,15 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
+ */
+ submit->bos[i].flags = 0;
+
+- ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
+- if (ret) {
+- ret = -EFAULT;
+- goto out_unlock;
++ ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo));
++ if (unlikely(ret)) {
++ pagefault_enable();
++ spin_unlock(&file->table_lock);
++ ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
++ if (ret)
++ goto out;
++ spin_lock(&file->table_lock);
++ pagefault_disable();
+ }
+
+ if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) {
+@@ -125,9 +139,12 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
+ }
+
+ out_unlock:
+- submit->nr_bos = i;
++ pagefault_enable();
+ spin_unlock(&file->table_lock);
+
++out:
++ submit->nr_bos = i;
++
+ return ret;
+ }
+
+diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
+index 259cd6e6d71c..17e34546ade2 100644
+--- a/drivers/gpu/drm/radeon/atombios_crtc.c
++++ b/drivers/gpu/drm/radeon/atombios_crtc.c
+@@ -627,7 +627,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
+ if (radeon_crtc->ss.refdiv) {
+ radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
+ radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
+- if (rdev->family >= CHIP_RV770)
++ if (ASIC_IS_AVIVO(rdev) &&
++ rdev->family != CHIP_RS780 &&
++ rdev->family != CHIP_RS880)
+ radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
+ }
+ }
+diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
+index 590b0377fbe2..0ab76dd13eb7 100644
+--- a/drivers/gpu/drm/radeon/radeon_ttm.c
++++ b/drivers/gpu/drm/radeon/radeon_ttm.c
+@@ -263,8 +263,8 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
+
+ rdev = radeon_get_rdev(bo->bdev);
+ ridx = radeon_copy_ring_index(rdev);
+- old_start = old_mem->start << PAGE_SHIFT;
+- new_start = new_mem->start << PAGE_SHIFT;
++ old_start = (u64)old_mem->start << PAGE_SHIFT;
++ new_start = (u64)new_mem->start << PAGE_SHIFT;
+
+ switch (old_mem->mem_type) {
+ case TTM_PL_VRAM:
+diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
+index 37cac59401d7..2e24616cd84a 100644
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -321,6 +321,15 @@ vc4_first_render_job(struct vc4_dev *vc4)
+ struct vc4_exec_info, head);
+ }
+
++static inline struct vc4_exec_info *
++vc4_last_render_job(struct vc4_dev *vc4)
++{
++ if (list_empty(&vc4->render_job_list))
++ return NULL;
++ return list_last_entry(&vc4->render_job_list,
++ struct vc4_exec_info, head);
++}
++
+ /**
+ * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
+ * setup parameters.
+diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
+index 46899d6de675..78ab08e8f87e 100644
+--- a/drivers/gpu/drm/vc4/vc4_gem.c
++++ b/drivers/gpu/drm/vc4/vc4_gem.c
+@@ -574,8 +574,8 @@ vc4_cl_lookup_bos(struct drm_device *dev,
+ spin_unlock(&file_priv->table_lock);
+
+ fail:
+- kfree(handles);
+- return 0;
++ drm_free_large(handles);
++ return ret;
+ }
+
+ static int
+diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
+index b0104a346a74..094bc6a475c1 100644
+--- a/drivers/gpu/drm/vc4/vc4_irq.c
++++ b/drivers/gpu/drm/vc4/vc4_irq.c
+@@ -83,8 +83,10 @@ vc4_overflow_mem_work(struct work_struct *work)
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ current_exec = vc4_first_bin_job(vc4);
++ if (!current_exec)
++ current_exec = vc4_last_render_job(vc4);
+ if (current_exec) {
+- vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
++ vc4->overflow_mem->seqno = current_exec->seqno;
+ list_add_tail(&vc4->overflow_mem->unref_head,
+ ¤t_exec->unref_list);
+ vc4->overflow_mem = NULL;
+diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
+index 70ed1d0151b8..d3ef0fcaaddf 100644
+--- a/drivers/irqchip/irq-mips-gic.c
++++ b/drivers/irqchip/irq-mips-gic.c
+@@ -713,9 +713,6 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ unsigned long flags;
+ int i;
+
+- irq_set_chip_and_handler(virq, &gic_level_irq_controller,
+- handle_level_irq);
+-
+ spin_lock_irqsave(&gic_lock, flags);
+ gic_map_to_pin(intr, gic_cpu_pin);
+ gic_map_to_vpe(intr, mips_cm_vp_id(vpe));
+@@ -732,6 +729,10 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ {
+ if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS)
+ return gic_local_irq_domain_map(d, virq, hw);
++
++ irq_set_chip_and_handler(virq, &gic_level_irq_controller,
++ handle_level_irq);
++
+ return gic_shared_irq_domain_map(d, virq, hw, 0);
+ }
+
+@@ -771,11 +772,13 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
+ hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i);
+
+ ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq,
+- &gic_edge_irq_controller,
++ &gic_level_irq_controller,
+ NULL);
+ if (ret)
+ goto error;
+
++ irq_set_handler(virq + i, handle_level_irq);
++
+ ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu);
+ if (ret)
+ goto error;
+@@ -890,10 +893,17 @@ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
+ return;
+ }
+
++static void gic_dev_domain_activate(struct irq_domain *domain,
++ struct irq_data *d)
++{
++ gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0);
++}
++
+ static struct irq_domain_ops gic_dev_domain_ops = {
+ .xlate = gic_dev_domain_xlate,
+ .alloc = gic_dev_domain_alloc,
+ .free = gic_dev_domain_free,
++ .activate = gic_dev_domain_activate,
+ };
+
+ static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index f5dbb4e884d8..5d3b2318cb3c 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -1818,7 +1818,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
+ free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
+
+ if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
+- !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
++ !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
+ !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
+ !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
+ !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
+diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
+index e2fb44cc5c37..dc3a854e02d3 100644
+--- a/drivers/misc/mei/hw-me.c
++++ b/drivers/misc/mei/hw-me.c
+@@ -1263,8 +1263,14 @@ static bool mei_me_fw_type_nm(struct pci_dev *pdev)
+ static bool mei_me_fw_type_sps(struct pci_dev *pdev)
+ {
+ u32 reg;
+- /* Read ME FW Status check for SPS Firmware */
+- pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®);
++ unsigned int devfn;
++
++ /*
++ * Read ME FW Status register to check for SPS Firmware
++ * The SPS FW is only signaled in pci function 0
++ */
++ devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
++ pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, ®);
+ trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg);
+ /* if bits [19:16] = 15, running SPS Firmware */
+ return (reg & 0xf0000) == 0xf0000;
+diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
+index 64e64da6da44..71cea9b296b2 100644
+--- a/drivers/misc/mei/pci-me.c
++++ b/drivers/misc/mei/pci-me.c
+@@ -85,8 +85,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
+
+ {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)},
+- {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)},
+- {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)},
++ {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)},
++ {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)},
+
+ {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)},
+diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c
+index 83458f7a2824..6dc96c8dfe75 100644
+--- a/drivers/scsi/constants.c
++++ b/drivers/scsi/constants.c
+@@ -361,8 +361,9 @@ static const char * const snstext[] = {
+
+ /* Get sense key string or NULL if not available */
+ const char *
+-scsi_sense_key_string(unsigned char key) {
+- if (key <= 0xE)
++scsi_sense_key_string(unsigned char key)
++{
++ if (key < ARRAY_SIZE(snstext))
+ return snstext[key];
+ return NULL;
+ }
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index 0a4d54a87f7c..591e52009b58 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -1196,6 +1196,8 @@ static int acm_probe(struct usb_interface *intf,
+ }
+
+ if (!buflen) {
++ if (!intf->cur_altsetting || !intf->cur_altsetting->endpoint)
++ return -EINVAL;
+ if (intf->cur_altsetting->endpoint &&
+ intf->cur_altsetting->endpoint->extralen &&
+ intf->cur_altsetting->endpoint->extra) {
+@@ -1276,6 +1278,8 @@ next_desc:
+ data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
+ control_interface = intf;
+ } else {
++ if (!intf->cur_altsetting)
++ return -ENODEV;
+ if (intf->cur_altsetting->desc.bNumEndpoints != 3) {
+ dev_dbg(&intf->dev,"No union descriptor, giving up\n");
+ return -ENODEV;
+@@ -1305,15 +1309,22 @@ next_desc:
+ combined_interfaces = 1;
+ /* a popular other OS doesn't use it */
+ quirks |= NO_CAP_LINE;
++ if (!data_interface->cur_altsetting)
++ return -EINVAL;
+ if (data_interface->cur_altsetting->desc.bNumEndpoints != 3) {
+ dev_err(&intf->dev, "This needs exactly 3 endpoints\n");
+ return -EINVAL;
+ }
+ look_for_collapsed_interface:
++ if (!data_interface->cur_altsetting)
++ return -EINVAL;
+ for (i = 0; i < 3; i++) {
+ struct usb_endpoint_descriptor *ep;
+ ep = &data_interface->cur_altsetting->endpoint[i].desc;
+
++ if (!ep)
++ return -ENODEV;
++
+ if (usb_endpoint_is_int_in(ep))
+ epctrl = ep;
+ else if (usb_endpoint_is_bulk_out(ep))
+@@ -1332,8 +1343,12 @@ look_for_collapsed_interface:
+ skip_normal_probe:
+
+ /*workaround for switched interfaces */
++ if (!data_interface->cur_altsetting)
++ return -EINVAL;
+ if (data_interface->cur_altsetting->desc.bInterfaceClass
+ != CDC_DATA_INTERFACE_TYPE) {
++ if (!control_interface->cur_altsetting)
++ return -EINVAL;
+ if (control_interface->cur_altsetting->desc.bInterfaceClass
+ == CDC_DATA_INTERFACE_TYPE) {
+ dev_dbg(&intf->dev,
+@@ -1356,6 +1371,7 @@ skip_normal_probe:
+
+
+ if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 ||
++ !control_interface->cur_altsetting ||
+ control_interface->cur_altsetting->desc.bNumEndpoints == 0)
+ return -EINVAL;
+
+@@ -1363,6 +1379,8 @@ skip_normal_probe:
+ epread = &data_interface->cur_altsetting->endpoint[0].desc;
+ epwrite = &data_interface->cur_altsetting->endpoint[1].desc;
+
++ if (!epctrl || !epread || !epwrite)
++ return -ENODEV;
+
+ /* workaround for switched endpoints */
+ if (!usb_endpoint_dir_in(epread)) {
+diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
+index 9d6320e8ff3e..6e29d053843d 100644
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -88,7 +88,7 @@ struct vhost_scsi_cmd {
+ struct scatterlist *tvc_prot_sgl;
+ struct page **tvc_upages;
+ /* Pointer to response header iovec */
+- struct iovec *tvc_resp_iov;
++ struct iovec tvc_resp_iov;
+ /* Pointer to vhost_scsi for our device */
+ struct vhost_scsi *tvc_vhost;
+ /* Pointer to vhost_virtqueue for the cmd */
+@@ -547,7 +547,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
+ memcpy(v_rsp.sense, cmd->tvc_sense_buf,
+ se_cmd->scsi_sense_length);
+
+- iov_iter_init(&iov_iter, READ, cmd->tvc_resp_iov,
++ iov_iter_init(&iov_iter, READ, &cmd->tvc_resp_iov,
+ cmd->tvc_in_iovs, sizeof(v_rsp));
+ ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
+ if (likely(ret == sizeof(v_rsp))) {
+@@ -1044,7 +1044,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+ }
+ cmd->tvc_vhost = vs;
+ cmd->tvc_vq = vq;
+- cmd->tvc_resp_iov = &vq->iov[out];
++ cmd->tvc_resp_iov = vq->iov[out];
+ cmd->tvc_in_iovs = in;
+
+ pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
+diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
+index 7487971f9f78..c1010f018bd8 100644
+--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
++++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
+@@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type,
+ rc = -ENOMEM;
+ goto out;
+ }
+- } else {
++ } else if (msg_type == XS_TRANSACTION_END) {
+ list_for_each_entry(trans, &u->transactions, list)
+ if (trans->handle.id == u->u.msg.tx_id)
+ break;
+diff --git a/fs/block_dev.c b/fs/block_dev.c
+index 71ccab1d22c6..b1495fa57d6d 100644
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -659,7 +659,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type,
+ {
+ struct dentry *dent;
+ dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
+- if (dent)
++ if (!IS_ERR(dent))
+ dent->d_sb->s_iflags |= SB_I_CGROUPWB;
+ return dent;
+ }
+diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
+index 0f9961eede1e..f96547f83cab 100644
+--- a/fs/crypto/policy.c
++++ b/fs/crypto/policy.c
+@@ -95,10 +95,15 @@ static int create_encryption_context_from_policy(struct inode *inode,
+ int fscrypt_process_policy(struct inode *inode,
+ const struct fscrypt_policy *policy)
+ {
++ if (!inode_owner_or_capable(inode))
++ return -EACCES;
++
+ if (policy->version != 0)
+ return -EINVAL;
+
+ if (!inode_has_encryption_context(inode)) {
++ if (!S_ISDIR(inode->i_mode))
++ return -EINVAL;
+ if (!inode->i_sb->s_cop->empty_dir)
+ return -EOPNOTSUPP;
+ if (!inode->i_sb->s_cop->empty_dir(inode))
+diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
+index ad050698143f..8a9feb341f31 100644
+--- a/fs/ext4/crypto_policy.c
++++ b/fs/ext4/crypto_policy.c
+@@ -102,6 +102,9 @@ static int ext4_create_encryption_context_from_policy(
+ int ext4_process_policy(const struct ext4_encryption_policy *policy,
+ struct inode *inode)
+ {
++ if (!inode_owner_or_capable(inode))
++ return -EACCES;
++
+ if (policy->version != 0)
+ return -EINVAL;
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index b747ec09c1ac..ea628af9d609 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -51,25 +51,31 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
+ struct ext4_inode_info *ei)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+- __u16 csum_lo;
+- __u16 csum_hi = 0;
+ __u32 csum;
++ __u16 dummy_csum = 0;
++ int offset = offsetof(struct ext4_inode, i_checksum_lo);
++ unsigned int csum_size = sizeof(dummy_csum);
+
+- csum_lo = le16_to_cpu(raw->i_checksum_lo);
+- raw->i_checksum_lo = 0;
+- if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+- EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
+- csum_hi = le16_to_cpu(raw->i_checksum_hi);
+- raw->i_checksum_hi = 0;
+- }
++ csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset);
++ csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size);
++ offset += csum_size;
++ csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
++ EXT4_GOOD_OLD_INODE_SIZE - offset);
+
+- csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
+- EXT4_INODE_SIZE(inode->i_sb));
+-
+- raw->i_checksum_lo = cpu_to_le16(csum_lo);
+- if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+- EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
+- raw->i_checksum_hi = cpu_to_le16(csum_hi);
++ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
++ offset = offsetof(struct ext4_inode, i_checksum_hi);
++ csum = ext4_chksum(sbi, csum, (__u8 *)raw +
++ EXT4_GOOD_OLD_INODE_SIZE,
++ offset - EXT4_GOOD_OLD_INODE_SIZE);
++ if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
++ csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
++ csum_size);
++ offset += csum_size;
++ csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
++ EXT4_INODE_SIZE(inode->i_sb) -
++ offset);
++ }
++ }
+
+ return csum;
+ }
+@@ -5460,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
+ sbi->s_want_extra_isize,
+ iloc, handle);
+ if (ret) {
+- ext4_set_inode_state(inode,
+- EXT4_STATE_NO_EXPAND);
+ if (mnt_count !=
+ le16_to_cpu(sbi->s_es->s_mnt_count)) {
+ ext4_warning(inode->i_sb,
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index ec4c39952e84..5bb46b6ed456 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -420,15 +420,14 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ __u32 csum;
+- __le32 save_csum;
+ int size;
++ __u32 dummy_csum = 0;
++ int offset = offsetof(struct dx_tail, dt_checksum);
+
+ size = count_offset + (count * sizeof(struct dx_entry));
+- save_csum = t->dt_checksum;
+- t->dt_checksum = 0;
+ csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
+- csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
+- t->dt_checksum = save_csum;
++ csum = ext4_chksum(sbi, csum, (__u8 *)t, offset);
++ csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
+
+ return cpu_to_le32(csum);
+ }
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 639bd756a8d8..d4505f89fe76 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2068,23 +2068,25 @@ failed:
+ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
+ struct ext4_group_desc *gdp)
+ {
+- int offset;
++ int offset = offsetof(struct ext4_group_desc, bg_checksum);
+ __u16 crc = 0;
+ __le32 le_group = cpu_to_le32(block_group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (ext4_has_metadata_csum(sbi->s_sb)) {
+ /* Use new metadata_csum algorithm */
+- __le16 save_csum;
+ __u32 csum32;
++ __u16 dummy_csum = 0;
+
+- save_csum = gdp->bg_checksum;
+- gdp->bg_checksum = 0;
+ csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
+ sizeof(le_group));
+- csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
+- sbi->s_desc_size);
+- gdp->bg_checksum = save_csum;
++ csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
++ csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
++ sizeof(dummy_csum));
++ offset += sizeof(dummy_csum);
++ if (offset < sbi->s_desc_size)
++ csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
++ sbi->s_desc_size - offset);
+
+ crc = csum32 & 0xFFFF;
+ goto out;
+@@ -2094,8 +2096,6 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
+ if (!ext4_has_feature_gdt_csum(sb))
+ return 0;
+
+- offset = offsetof(struct ext4_group_desc, bg_checksum);
+-
+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+ crc = crc16(crc, (__u8 *)gdp, offset);
+@@ -2131,6 +2131,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
+
+ /* Called at mount-time, super-block is locked */
+ static int ext4_check_descriptors(struct super_block *sb,
++ ext4_fsblk_t sb_block,
+ ext4_group_t *first_not_zeroed)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+@@ -2161,6 +2162,11 @@ static int ext4_check_descriptors(struct super_block *sb,
+ grp = i;
+
+ block_bitmap = ext4_block_bitmap(sb, gdp);
++ if (block_bitmap == sb_block) {
++ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
++ "Block bitmap for group %u overlaps "
++ "superblock", i);
++ }
+ if (block_bitmap < first_block || block_bitmap > last_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Block bitmap for group %u not in group "
+@@ -2168,6 +2174,11 @@ static int ext4_check_descriptors(struct super_block *sb,
+ return 0;
+ }
+ inode_bitmap = ext4_inode_bitmap(sb, gdp);
++ if (inode_bitmap == sb_block) {
++ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
++ "Inode bitmap for group %u overlaps "
++ "superblock", i);
++ }
+ if (inode_bitmap < first_block || inode_bitmap > last_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode bitmap for group %u not in group "
+@@ -2175,6 +2186,11 @@ static int ext4_check_descriptors(struct super_block *sb,
+ return 0;
+ }
+ inode_table = ext4_inode_table(sb, gdp);
++ if (inode_table == sb_block) {
++ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
++ "Inode table for group %u overlaps "
++ "superblock", i);
++ }
+ if (inode_table < first_block ||
+ inode_table + sbi->s_itb_per_group - 1 > last_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+@@ -3677,7 +3693,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ goto failed_mount2;
+ }
+ }
+- if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
++ if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
+ ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
+ ret = -EFSCORRUPTED;
+ goto failed_mount2;
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index e79bd32b9b79..2eb935ca5d9e 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -121,17 +121,18 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ __u32 csum;
+- __le32 save_csum;
+ __le64 dsk_block_nr = cpu_to_le64(block_nr);
++ __u32 dummy_csum = 0;
++ int offset = offsetof(struct ext4_xattr_header, h_checksum);
+
+- save_csum = hdr->h_checksum;
+- hdr->h_checksum = 0;
+ csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
+ sizeof(dsk_block_nr));
+- csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
+- EXT4_BLOCK_SIZE(inode->i_sb));
++ csum = ext4_chksum(sbi, csum, (__u8 *)hdr, offset);
++ csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
++ offset += sizeof(dummy_csum);
++ csum = ext4_chksum(sbi, csum, (__u8 *)hdr + offset,
++ EXT4_BLOCK_SIZE(inode->i_sb) - offset);
+
+- hdr->h_checksum = save_csum;
+ return cpu_to_le32(csum);
+ }
+
+@@ -1352,15 +1353,19 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+ size_t min_offs, free;
+ int total_ino;
+ void *base, *start, *end;
+- int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
++ int error = 0, tried_min_extra_isize = 0;
+ int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
++ int isize_diff; /* How much do we need to grow i_extra_isize */
+
+ down_write(&EXT4_I(inode)->xattr_sem);
++ /*
++ * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
++ */
++ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ retry:
+- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
+- up_write(&EXT4_I(inode)->xattr_sem);
+- return 0;
+- }
++ isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
++ if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
++ goto out;
+
+ header = IHDR(inode, raw_inode);
+ entry = IFIRST(header);
+@@ -1381,7 +1386,7 @@ retry:
+ goto cleanup;
+
+ free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
+- if (free >= new_extra_isize) {
++ if (free >= isize_diff) {
+ entry = IFIRST(header);
+ ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
+ - new_extra_isize, (void *)raw_inode +
+@@ -1389,8 +1394,7 @@ retry:
+ (void *)header, total_ino,
+ inode->i_sb->s_blocksize);
+ EXT4_I(inode)->i_extra_isize = new_extra_isize;
+- error = 0;
+- goto cleanup;
++ goto out;
+ }
+
+ /*
+@@ -1413,7 +1417,7 @@ retry:
+ end = bh->b_data + bh->b_size;
+ min_offs = end - base;
+ free = ext4_xattr_free_space(first, &min_offs, base, NULL);
+- if (free < new_extra_isize) {
++ if (free < isize_diff) {
+ if (!tried_min_extra_isize && s_min_extra_isize) {
+ tried_min_extra_isize++;
+ new_extra_isize = s_min_extra_isize;
+@@ -1427,7 +1431,7 @@ retry:
+ free = inode->i_sb->s_blocksize;
+ }
+
+- while (new_extra_isize > 0) {
++ while (isize_diff > 0) {
+ size_t offs, size, entry_size;
+ struct ext4_xattr_entry *small_entry = NULL;
+ struct ext4_xattr_info i = {
+@@ -1458,7 +1462,7 @@ retry:
+ EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
+ EXT4_XATTR_LEN(last->e_name_len);
+ if (total_size <= free && total_size < min_total_size) {
+- if (total_size < new_extra_isize) {
++ if (total_size < isize_diff) {
+ small_entry = last;
+ } else {
+ entry = last;
+@@ -1513,22 +1517,22 @@ retry:
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
+ if (error)
+ goto cleanup;
++ total_ino -= entry_size;
+
+ entry = IFIRST(header);
+- if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
+- shift_bytes = new_extra_isize;
++ if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
++ shift_bytes = isize_diff;
+ else
+- shift_bytes = entry_size + size;
++ shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
+ /* Adjust the offsets and shift the remaining entries ahead */
+- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
+- shift_bytes, (void *)raw_inode +
+- EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
+- (void *)header, total_ino - entry_size,
+- inode->i_sb->s_blocksize);
++ ext4_xattr_shift_entries(entry, -shift_bytes,
++ (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
++ EXT4_I(inode)->i_extra_isize + shift_bytes,
++ (void *)header, total_ino, inode->i_sb->s_blocksize);
+
+- extra_isize += shift_bytes;
+- new_extra_isize -= shift_bytes;
+- EXT4_I(inode)->i_extra_isize = extra_isize;
++ isize_diff -= shift_bytes;
++ EXT4_I(inode)->i_extra_isize += shift_bytes;
++ header = IHDR(inode, raw_inode);
+
+ i.name = b_entry_name;
+ i.value = buffer;
+@@ -1550,6 +1554,8 @@ retry:
+ kfree(bs);
+ }
+ brelse(bh);
++out:
++ ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ up_write(&EXT4_I(inode)->xattr_sem);
+ return 0;
+
+@@ -1561,6 +1567,10 @@ cleanup:
+ kfree(is);
+ kfree(bs);
+ brelse(bh);
++ /*
++ * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
++ * size expansion failed.
++ */
+ up_write(&EXT4_I(inode)->xattr_sem);
+ return error;
+ }
+diff --git a/fs/namei.c b/fs/namei.c
+index 70580ab1445c..9281b2bf025a 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -901,6 +901,7 @@ static inline int may_follow_link(struct nameidata *nd)
+ {
+ const struct inode *inode;
+ const struct inode *parent;
++ kuid_t puid;
+
+ if (!sysctl_protected_symlinks)
+ return 0;
+@@ -916,7 +917,8 @@ static inline int may_follow_link(struct nameidata *nd)
+ return 0;
+
+ /* Allowed if parent directory and link owner match. */
+- if (uid_eq(parent->i_uid, inode->i_uid))
++ puid = parent->i_uid;
++ if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
+ return 0;
+
+ if (nd->flags & LOOKUP_RCU)
+diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
+index 80aa6f1eb336..4133aa7e7c09 100644
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -80,6 +80,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
+ }
+
+ for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
++ if (ovl_is_private_xattr(name))
++ continue;
+ retry:
+ size = vfs_getxattr(old, name, value, value_size);
+ if (size == -ERANGE)
+diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
+index d1cdc60dd68f..ac98a71e753a 100644
+--- a/fs/overlayfs/inode.c
++++ b/fs/overlayfs/inode.c
+@@ -231,7 +231,7 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+ }
+
+
+-static bool ovl_is_private_xattr(const char *name)
++bool ovl_is_private_xattr(const char *name)
+ {
+ return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0;
+ }
+@@ -279,24 +279,27 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+ {
+ struct dentry *realdentry = ovl_dentry_real(dentry);
+ ssize_t res;
+- int off;
++ size_t len;
++ char *s;
+
+ res = vfs_listxattr(realdentry, list, size);
+ if (res <= 0 || size == 0)
+ return res;
+
+ /* filter out private xattrs */
+- for (off = 0; off < res;) {
+- char *s = list + off;
+- size_t slen = strlen(s) + 1;
++ for (s = list, len = res; len;) {
++ size_t slen = strnlen(s, len) + 1;
+
+- BUG_ON(off + slen > res);
++ /* underlying fs providing us with an broken xattr list? */
++ if (WARN_ON(slen > len))
++ return -EIO;
+
++ len -= slen;
+ if (ovl_is_private_xattr(s)) {
+ res -= slen;
+- memmove(s, s + slen, res - off);
++ memmove(s, s + slen, len);
+ } else {
+- off += slen;
++ s += slen;
+ }
+ }
+
+diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
+index cfbca53590d0..d8ddc31f591e 100644
+--- a/fs/overlayfs/overlayfs.h
++++ b/fs/overlayfs/overlayfs.h
+@@ -168,6 +168,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
+ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
+ void ovl_cache_free(struct list_head *list);
+ int ovl_check_d_type_supported(struct path *realpath);
++void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
++ struct dentry *dentry, int level);
+
+ /* inode.c */
+ int ovl_setattr(struct dentry *dentry, struct iattr *attr);
+@@ -180,6 +182,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
+ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
+ int ovl_removexattr(struct dentry *dentry, const char *name);
+ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags);
++bool ovl_is_private_xattr(const char *name);
+
+ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
+ struct ovl_entry *oe);
+diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
+index cf37fc76fc9f..f241b4ee3d8a 100644
+--- a/fs/overlayfs/readdir.c
++++ b/fs/overlayfs/readdir.c
+@@ -248,7 +248,7 @@ static inline int ovl_dir_read(struct path *realpath,
+ err = rdd->err;
+ } while (!err && rdd->count);
+
+- if (!err && rdd->first_maybe_whiteout)
++ if (!err && rdd->first_maybe_whiteout && rdd->dentry)
+ err = ovl_check_whiteouts(realpath->dentry, rdd);
+
+ fput(realfile);
+@@ -606,3 +606,64 @@ int ovl_check_d_type_supported(struct path *realpath)
+
+ return rdd.d_type_supported;
+ }
++
++static void ovl_workdir_cleanup_recurse(struct path *path, int level)
++{
++ int err;
++ struct inode *dir = path->dentry->d_inode;
++ LIST_HEAD(list);
++ struct ovl_cache_entry *p;
++ struct ovl_readdir_data rdd = {
++ .ctx.actor = ovl_fill_merge,
++ .dentry = NULL,
++ .list = &list,
++ .root = RB_ROOT,
++ .is_lowest = false,
++ };
++
++ err = ovl_dir_read(path, &rdd);
++ if (err)
++ goto out;
++
++ inode_lock_nested(dir, I_MUTEX_PARENT);
++ list_for_each_entry(p, &list, l_node) {
++ struct dentry *dentry;
++
++ if (p->name[0] == '.') {
++ if (p->len == 1)
++ continue;
++ if (p->len == 2 && p->name[1] == '.')
++ continue;
++ }
++ dentry = lookup_one_len(p->name, path->dentry, p->len);
++ if (IS_ERR(dentry))
++ continue;
++ if (dentry->d_inode)
++ ovl_workdir_cleanup(dir, path->mnt, dentry, level);
++ dput(dentry);
++ }
++ inode_unlock(dir);
++out:
++ ovl_cache_free(&list);
++}
++
++void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
++ struct dentry *dentry, int level)
++{
++ int err;
++
++ if (!d_is_dir(dentry) || level > 1) {
++ ovl_cleanup(dir, dentry);
++ return;
++ }
++
++ err = ovl_do_rmdir(dir, dentry);
++ if (err) {
++ struct path path = { .mnt = mnt, .dentry = dentry };
++
++ inode_unlock(dir);
++ ovl_workdir_cleanup_recurse(&path, level + 1);
++ inode_lock_nested(dir, I_MUTEX_PARENT);
++ ovl_cleanup(dir, dentry);
++ }
++}
+diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
+index 6db75cbb668f..86f20256cda0 100644
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -798,6 +798,10 @@ retry:
+ struct kstat stat = {
+ .mode = S_IFDIR | 0,
+ };
++ struct iattr attr = {
++ .ia_valid = ATTR_MODE,
++ .ia_mode = stat.mode,
++ };
+
+ if (work->d_inode) {
+ err = -EEXIST;
+@@ -805,7 +809,7 @@ retry:
+ goto out_dput;
+
+ retried = true;
+- ovl_cleanup(dir, work);
++ ovl_workdir_cleanup(dir, mnt, work, 0);
+ dput(work);
+ goto retry;
+ }
+@@ -813,6 +817,21 @@ retry:
+ err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
+ if (err)
+ goto out_dput;
++
++ err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
++ if (err && err != -ENODATA && err != -EOPNOTSUPP)
++ goto out_dput;
++
++ err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
++ if (err && err != -ENODATA && err != -EOPNOTSUPP)
++ goto out_dput;
++
++ /* Clear any inherited mode bits */
++ inode_lock(work->d_inode);
++ err = notify_change(work, &attr, NULL);
++ inode_unlock(work->d_inode);
++ if (err)
++ goto out_dput;
+ }
+ out_unlock:
+ inode_unlock(dir);
+diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
+index b45345d701e7..51157da3f76e 100644
+--- a/fs/ubifs/tnc_commit.c
++++ b/fs/ubifs/tnc_commit.c
+@@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+
+ p = c->gap_lebs;
+ do {
+- ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
++ ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs);
+ written = layout_leb_in_gaps(c, p);
+ if (written < 0) {
+ err = written;
+diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
+index b5fc27969e9d..c63710fbd95d 100644
+--- a/fs/ubifs/xattr.c
++++ b/fs/ubifs/xattr.c
+@@ -575,7 +575,8 @@ static int ubifs_xattr_get(const struct xattr_handler *handler,
+ dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
+ inode->i_ino, dentry, size);
+
+- return __ubifs_getxattr(inode, name, buffer, size);
++ name = xattr_full_name(handler, name);
++ return __ubifs_getxattr(inode, name, buffer, size);
+ }
+
+ static int ubifs_xattr_set(const struct xattr_handler *handler,
+@@ -586,6 +587,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler,
+ dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
+ name, inode->i_ino, dentry, size);
+
++ name = xattr_full_name(handler, name);
++
+ if (value)
+ return __ubifs_setxattr(inode, name, value, size, flags);
+ else
+diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
+index 12ca86778e02..85bdf3de2360 100644
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -581,7 +581,8 @@ xfs_sb_verify(
+ * Only check the in progress field for the primary superblock as
+ * mkfs.xfs doesn't clear it from secondary superblocks.
+ */
+- return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
++ return xfs_mount_validate_sb(mp, &sb,
++ bp->b_maps[0].bm_bn == XFS_SB_DADDR,
+ check_version);
+ }
+
+diff --git a/include/linux/capability.h b/include/linux/capability.h
+index 00690ff92edf..5f3c63dde2d5 100644
+--- a/include/linux/capability.h
++++ b/include/linux/capability.h
+@@ -206,6 +206,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
+ struct user_namespace *ns, int cap);
+ extern bool capable(int cap);
+ extern bool ns_capable(struct user_namespace *ns, int cap);
++extern bool ns_capable_noaudit(struct user_namespace *ns, int cap);
+ #else
+ static inline bool has_capability(struct task_struct *t, int cap)
+ {
+@@ -233,6 +234,10 @@ static inline bool ns_capable(struct user_namespace *ns, int cap)
+ {
+ return true;
+ }
++static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap)
++{
++ return true;
++}
+ #endif /* CONFIG_MULTIUSER */
+ extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
+ extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
+diff --git a/kernel/capability.c b/kernel/capability.c
+index 45432b54d5c6..00411c82dac5 100644
+--- a/kernel/capability.c
++++ b/kernel/capability.c
+@@ -361,6 +361,24 @@ bool has_capability_noaudit(struct task_struct *t, int cap)
+ return has_ns_capability_noaudit(t, &init_user_ns, cap);
+ }
+
++static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit)
++{
++ int capable;
++
++ if (unlikely(!cap_valid(cap))) {
++ pr_crit("capable() called with invalid cap=%u\n", cap);
++ BUG();
++ }
++
++ capable = audit ? security_capable(current_cred(), ns, cap) :
++ security_capable_noaudit(current_cred(), ns, cap);
++ if (capable == 0) {
++ current->flags |= PF_SUPERPRIV;
++ return true;
++ }
++ return false;
++}
++
+ /**
+ * ns_capable - Determine if the current task has a superior capability in effect
+ * @ns: The usernamespace we want the capability in
+@@ -374,19 +392,27 @@ bool has_capability_noaudit(struct task_struct *t, int cap)
+ */
+ bool ns_capable(struct user_namespace *ns, int cap)
+ {
+- if (unlikely(!cap_valid(cap))) {
+- pr_crit("capable() called with invalid cap=%u\n", cap);
+- BUG();
+- }
+-
+- if (security_capable(current_cred(), ns, cap) == 0) {
+- current->flags |= PF_SUPERPRIV;
+- return true;
+- }
+- return false;
++ return ns_capable_common(ns, cap, true);
+ }
+ EXPORT_SYMBOL(ns_capable);
+
++/**
++ * ns_capable_noaudit - Determine if the current task has a superior capability
++ * (unaudited) in effect
++ * @ns: The usernamespace we want the capability in
++ * @cap: The capability to be tested for
++ *
++ * Return true if the current task has the given superior capability currently
++ * available for use, false if not.
++ *
++ * This sets PF_SUPERPRIV on the task if the capability is available on the
++ * assumption that it's about to be used.
++ */
++bool ns_capable_noaudit(struct user_namespace *ns, int cap)
++{
++ return ns_capable_common(ns, cap, false);
++}
++EXPORT_SYMBOL(ns_capable_noaudit);
+
+ /**
+ * capable - Determine if the current task has a superior capability in effect
+diff --git a/kernel/cred.c b/kernel/cred.c
+index 0c0cd8a62285..5f264fb5737d 100644
+--- a/kernel/cred.c
++++ b/kernel/cred.c
+@@ -689,6 +689,8 @@ EXPORT_SYMBOL(set_security_override_from_ctx);
+ */
+ int set_create_files_as(struct cred *new, struct inode *inode)
+ {
++ if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
++ return -EINVAL;
+ new->fsuid = inode->i_uid;
+ new->fsgid = inode->i_gid;
+ return security_kernel_create_files_as(new, inode);
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 4a7ec0c6c88c..aea4f4da3836 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1406,7 +1406,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
+ p->real_start_time = ktime_get_boot_ns();
+ p->io_context = NULL;
+ p->audit_context = NULL;
+- threadgroup_change_begin(current);
+ cgroup_fork(p);
+ #ifdef CONFIG_NUMA
+ p->mempolicy = mpol_dup(p->mempolicy);
+@@ -1558,6 +1557,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
+ INIT_LIST_HEAD(&p->thread_group);
+ p->task_works = NULL;
+
++ threadgroup_change_begin(current);
+ /*
+ * Ensure that the cgroup subsystem policies allow the new process to be
+ * forked. It should be noted the the new process's css_set can be changed
+@@ -1658,6 +1658,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
+ bad_fork_cancel_cgroup:
+ cgroup_cancel_fork(p);
+ bad_fork_free_pid:
++ threadgroup_change_end(current);
+ if (pid != &init_struct_pid)
+ free_pid(pid);
+ bad_fork_cleanup_thread:
+@@ -1690,7 +1691,6 @@ bad_fork_cleanup_policy:
+ mpol_put(p->mempolicy);
+ bad_fork_cleanup_threadgroup_lock:
+ #endif
+- threadgroup_change_end(current);
+ delayacct_tsk_free(p);
+ bad_fork_cleanup_count:
+ atomic_dec(&p->cred->user->processes);
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index 479d25cd3d4f..b6c394563178 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -401,7 +401,10 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
+ do {
+ seq = raw_read_seqcount_latch(&tkf->seq);
+ tkr = tkf->base + (seq & 0x01);
+- now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
++ now = ktime_to_ns(tkr->base);
++
++ now += clocksource_delta(tkr->read(tkr->clock),
++ tkr->cycle_last, tkr->mask);
+ } while (read_seqcount_retry(&tkf->seq, seq));
+
+ return now;
+diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
+index f6bd65236712..107310a6f36f 100644
+--- a/kernel/time/timekeeping_debug.c
++++ b/kernel/time/timekeeping_debug.c
+@@ -23,7 +23,9 @@
+
+ #include "timekeeping_internal.h"
+
+-static unsigned int sleep_time_bin[32] = {0};
++#define NUM_BINS 32
++
++static unsigned int sleep_time_bin[NUM_BINS] = {0};
+
+ static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
+ {
+@@ -69,6 +71,9 @@ late_initcall(tk_debug_sleep_time_init);
+
+ void tk_debug_account_sleep_time(struct timespec64 *t)
+ {
+- sleep_time_bin[fls(t->tv_sec)]++;
++ /* Cap bin index so we don't overflow the array */
++ int bin = min(fls(t->tv_sec), NUM_BINS-1);
++
++ sleep_time_bin[bin]++;
+ }
+
+diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
+index c0947544babe..f02ab80aa6ee 100644
+--- a/net/sunrpc/xprtrdma/frwr_ops.c
++++ b/net/sunrpc/xprtrdma/frwr_ops.c
+@@ -125,17 +125,16 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+ }
+
+ static void
+-__frwr_reset_and_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
++__frwr_reset_and_unmap(struct rpcrdma_mw *mw)
+ {
++ struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+- struct rpcrdma_frmr *f = &mw->frmr;
+ int rc;
+
+ rc = __frwr_reset_mr(ia, mw);
+- ib_dma_unmap_sg(ia->ri_device, f->fr_sg, f->fr_nents, f->fr_dir);
++ ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ if (rc)
+ return;
+-
+ rpcrdma_put_mw(r_xprt, mw);
+ }
+
+@@ -152,8 +151,7 @@ __frwr_recovery_worker(struct work_struct *work)
+ struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
+ mw_work);
+
+- __frwr_reset_and_unmap(r->mw_xprt, r);
+- return;
++ __frwr_reset_and_unmap(r);
+ }
+
+ /* A broken MR was discovered in a context that can't sleep.
+@@ -167,8 +165,7 @@ __frwr_queue_recovery(struct rpcrdma_mw *r)
+ }
+
+ static int
+-__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
+- unsigned int depth)
++__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, unsigned int depth)
+ {
+ struct rpcrdma_frmr *f = &r->frmr;
+ int rc;
+@@ -177,11 +174,11 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
+ if (IS_ERR(f->fr_mr))
+ goto out_mr_err;
+
+- f->fr_sg = kcalloc(depth, sizeof(*f->fr_sg), GFP_KERNEL);
+- if (!f->fr_sg)
++ r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
++ if (!r->mw_sg)
+ goto out_list_err;
+
+- sg_init_table(f->fr_sg, depth);
++ sg_init_table(r->mw_sg, depth);
+
+ init_completion(&f->fr_linv_done);
+
+@@ -210,7 +207,7 @@ __frwr_release(struct rpcrdma_mw *r)
+ if (rc)
+ dprintk("RPC: %s: ib_dereg_mr status %i\n",
+ __func__, rc);
+- kfree(r->frmr.fr_sg);
++ kfree(r->mw_sg);
+ }
+
+ static int
+@@ -350,7 +347,6 @@ static int
+ frwr_op_init(struct rpcrdma_xprt *r_xprt)
+ {
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+- struct ib_device *device = r_xprt->rx_ia.ri_device;
+ unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+ struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+ int i;
+@@ -372,7 +368,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
+ if (!r)
+ return -ENOMEM;
+
+- rc = __frwr_init(r, pd, device, depth);
++ rc = __frwr_init(r, pd, depth);
+ if (rc) {
+ kfree(r);
+ return rc;
+@@ -386,7 +382,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
+ return 0;
+ }
+
+-/* Post a FAST_REG Work Request to register a memory region
++/* Post a REG_MR Work Request to register a memory region
+ * for remote access via RDMA READ or RDMA WRITE.
+ */
+ static int
+@@ -394,8 +390,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+ int nsegs, bool writing)
+ {
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+- struct ib_device *device = ia->ri_device;
+- enum dma_data_direction direction = rpcrdma_data_dir(writing);
+ struct rpcrdma_mr_seg *seg1 = seg;
+ struct rpcrdma_mw *mw;
+ struct rpcrdma_frmr *frmr;
+@@ -421,15 +415,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+
+ if (nsegs > ia->ri_max_frmr_depth)
+ nsegs = ia->ri_max_frmr_depth;
+-
+ for (i = 0; i < nsegs;) {
+ if (seg->mr_page)
+- sg_set_page(&frmr->fr_sg[i],
++ sg_set_page(&mw->mw_sg[i],
+ seg->mr_page,
+ seg->mr_len,
+ offset_in_page(seg->mr_offset));
+ else
+- sg_set_buf(&frmr->fr_sg[i], seg->mr_offset,
++ sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ seg->mr_len);
+
+ ++seg;
+@@ -440,26 +433,20 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+ offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+ break;
+ }
+- frmr->fr_nents = i;
+- frmr->fr_dir = direction;
+-
+- dma_nents = ib_dma_map_sg(device, frmr->fr_sg, frmr->fr_nents, direction);
+- if (!dma_nents) {
+- pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n",
+- __func__, frmr->fr_sg, frmr->fr_nents);
+- return -ENOMEM;
+- }
++ mw->mw_nents = i;
++ mw->mw_dir = rpcrdma_data_dir(writing);
+
+- n = ib_map_mr_sg(mr, frmr->fr_sg, frmr->fr_nents, NULL, PAGE_SIZE);
+- if (unlikely(n != frmr->fr_nents)) {
+- pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
+- __func__, frmr->fr_mr, n, frmr->fr_nents);
+- rc = n < 0 ? n : -EINVAL;
+- goto out_senderr;
+- }
++ dma_nents = ib_dma_map_sg(ia->ri_device,
++ mw->mw_sg, mw->mw_nents, mw->mw_dir);
++ if (!dma_nents)
++ goto out_dmamap_err;
++
++ n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
++ if (unlikely(n != mw->mw_nents))
++ goto out_mapmr_err;
+
+ dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
+- __func__, mw, frmr->fr_nents, mr->length);
++ __func__, mw, mw->mw_nents, mr->length);
+
+ key = (u8)(mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(mr, ++key);
+@@ -484,13 +471,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+ seg1->rl_mw = mw;
+ seg1->mr_rkey = mr->rkey;
+ seg1->mr_base = mr->iova;
+- seg1->mr_nsegs = frmr->fr_nents;
++ seg1->mr_nsegs = mw->mw_nents;
+ seg1->mr_len = mr->length;
+
+- return frmr->fr_nents;
++ return mw->mw_nents;
++
++out_dmamap_err:
++ pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
++ mw->mw_sg, mw->mw_nents);
++ return -ENOMEM;
++
++out_mapmr_err:
++ pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
++ frmr->fr_mr, n, mw->mw_nents);
++ rc = n < 0 ? n : -EIO;
++ __frwr_queue_recovery(mw);
++ return rc;
+
+ out_senderr:
+- dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
++ pr_err("rpcrdma: ib_post_send status %i\n", rc);
+ __frwr_queue_recovery(mw);
+ return rc;
+ }
+@@ -582,8 +581,8 @@ unmap:
+ mw = seg->rl_mw;
+ seg->rl_mw = NULL;
+
+- ib_dma_unmap_sg(ia->ri_device, f->fr_sg, f->fr_nents,
+- f->fr_dir);
++ ib_dma_unmap_sg(ia->ri_device,
++ mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ rpcrdma_put_mw(r_xprt, mw);
+
+ i += seg->mr_nsegs;
+@@ -630,7 +629,7 @@ frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ mw = seg->rl_mw;
+
+ if (sync)
+- __frwr_reset_and_unmap(r_xprt, mw);
++ __frwr_reset_and_unmap(mw);
+ else
+ __frwr_queue_recovery(mw);
+
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index 95cdc66225ee..c53abd1281b3 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -221,9 +221,6 @@ enum rpcrdma_frmr_state {
+ };
+
+ struct rpcrdma_frmr {
+- struct scatterlist *fr_sg;
+- int fr_nents;
+- enum dma_data_direction fr_dir;
+ struct ib_mr *fr_mr;
+ struct ib_cqe fr_cqe;
+ enum rpcrdma_frmr_state fr_state;
+@@ -240,13 +237,16 @@ struct rpcrdma_fmr {
+ };
+
+ struct rpcrdma_mw {
++ struct list_head mw_list;
++ struct scatterlist *mw_sg;
++ int mw_nents;
++ enum dma_data_direction mw_dir;
+ union {
+ struct rpcrdma_fmr fmr;
+ struct rpcrdma_frmr frmr;
+ };
+ struct work_struct mw_work;
+ struct rpcrdma_xprt *mw_xprt;
+- struct list_head mw_list;
+ struct list_head mw_all;
+ };
+
+diff --git a/net/sysctl_net.c b/net/sysctl_net.c
+index ed98c1fc3de1..46a71c701e7c 100644
+--- a/net/sysctl_net.c
++++ b/net/sysctl_net.c
+@@ -46,7 +46,7 @@ static int net_ctl_permissions(struct ctl_table_header *head,
+ kgid_t root_gid = make_kgid(net->user_ns, 0);
+
+ /* Allow network administrator to have same access as root. */
+- if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
++ if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) ||
+ uid_eq(root_uid, current_euid())) {
+ int mode = (table->mode >> 6) & 7;
+ return (mode << 6) | (mode << 3) | mode;
+diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
+index 705c2879d3a9..7347fcc4f451 100644
+--- a/security/apparmor/policy.c
++++ b/security/apparmor/policy.c
+@@ -766,7 +766,9 @@ struct aa_profile *aa_find_child(struct aa_profile *parent, const char *name)
+ struct aa_profile *profile;
+
+ rcu_read_lock();
+- profile = aa_get_profile(__find_child(&parent->base.profiles, name));
++ do {
++ profile = __find_child(&parent->base.profiles, name);
++ } while (profile && !aa_get_profile_not0(profile));
+ rcu_read_unlock();
+
+ /* refcount released by caller */
+diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
+index 795437b10082..b450a27588c8 100644
+--- a/sound/core/rawmidi.c
++++ b/sound/core/rawmidi.c
+@@ -1633,11 +1633,13 @@ static int snd_rawmidi_dev_register(struct snd_device *device)
+ return -EBUSY;
+ }
+ list_add_tail(&rmidi->list, &snd_rawmidi_devices);
++ mutex_unlock(®ister_mutex);
+ err = snd_register_device(SNDRV_DEVICE_TYPE_RAWMIDI,
+ rmidi->card, rmidi->device,
+ &snd_rawmidi_f_ops, rmidi, &rmidi->dev);
+ if (err < 0) {
+ rmidi_err(rmidi, "unable to register\n");
++ mutex_lock(®ister_mutex);
+ list_del(&rmidi->list);
+ mutex_unlock(®ister_mutex);
+ return err;
+@@ -1645,6 +1647,7 @@ static int snd_rawmidi_dev_register(struct snd_device *device)
+ if (rmidi->ops && rmidi->ops->dev_register &&
+ (err = rmidi->ops->dev_register(rmidi)) < 0) {
+ snd_unregister_device(&rmidi->dev);
++ mutex_lock(®ister_mutex);
+ list_del(&rmidi->list);
+ mutex_unlock(®ister_mutex);
+ return err;
+@@ -1677,7 +1680,6 @@ static int snd_rawmidi_dev_register(struct snd_device *device)
+ }
+ }
+ #endif /* CONFIG_SND_OSSEMUL */
+- mutex_unlock(®ister_mutex);
+ sprintf(name, "midi%d", rmidi->device);
+ entry = snd_info_create_card_entry(rmidi->card, name, rmidi->card->proc_root);
+ if (entry) {
+diff --git a/sound/core/timer.c b/sound/core/timer.c
+index 9a6157ea6881..fc144f43faa6 100644
+--- a/sound/core/timer.c
++++ b/sound/core/timer.c
+@@ -35,6 +35,9 @@
+ #include <sound/initval.h>
+ #include <linux/kmod.h>
+
++/* internal flags */
++#define SNDRV_TIMER_IFLG_PAUSED 0x00010000
++
+ #if IS_ENABLED(CONFIG_SND_HRTIMER)
+ #define DEFAULT_TIMER_LIMIT 4
+ #else
+@@ -294,8 +297,21 @@ int snd_timer_open(struct snd_timer_instance **ti,
+ get_device(&timer->card->card_dev);
+ timeri->slave_class = tid->dev_sclass;
+ timeri->slave_id = slave_id;
+- if (list_empty(&timer->open_list_head) && timer->hw.open)
+- timer->hw.open(timer);
++
++ if (list_empty(&timer->open_list_head) && timer->hw.open) {
++ int err = timer->hw.open(timer);
++ if (err) {
++ kfree(timeri->owner);
++ kfree(timeri);
++
++ if (timer->card)
++ put_device(&timer->card->card_dev);
++ module_put(timer->module);
++ mutex_unlock(®ister_mutex);
++ return err;
++ }
++ }
++
+ list_add_tail(&timeri->open_list, &timer->open_list_head);
+ snd_timer_check_master(timeri);
+ mutex_unlock(®ister_mutex);
+@@ -526,6 +542,10 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop)
+ }
+ }
+ timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START);
++ if (stop)
++ timeri->flags &= ~SNDRV_TIMER_IFLG_PAUSED;
++ else
++ timeri->flags |= SNDRV_TIMER_IFLG_PAUSED;
+ snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
+ SNDRV_TIMER_EVENT_CONTINUE);
+ unlock:
+@@ -587,6 +607,10 @@ int snd_timer_stop(struct snd_timer_instance *timeri)
+ */
+ int snd_timer_continue(struct snd_timer_instance *timeri)
+ {
++ /* timer can continue only after pause */
++ if (!(timeri->flags & SNDRV_TIMER_IFLG_PAUSED))
++ return -EINVAL;
++
+ if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
+ return snd_timer_start_slave(timeri, false);
+ else
+@@ -813,6 +837,7 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid,
+ timer->tmr_subdevice = tid->subdevice;
+ if (id)
+ strlcpy(timer->id, id, sizeof(timer->id));
++ timer->sticks = 1;
+ INIT_LIST_HEAD(&timer->device_list);
+ INIT_LIST_HEAD(&timer->open_list_head);
+ INIT_LIST_HEAD(&timer->active_list_head);
+@@ -1817,6 +1842,9 @@ static int snd_timer_user_continue(struct file *file)
+ tu = file->private_data;
+ if (!tu->timeri)
+ return -EBADFD;
++ /* start timer instead of continue if it's not used before */
++ if (!(tu->timeri->flags & SNDRV_TIMER_IFLG_PAUSED))
++ return snd_timer_user_start(file);
+ tu->timeri->lost = 0;
+ return (err = snd_timer_continue(tu->timeri)) < 0 ? err : 0;
+ }
+@@ -1958,6 +1986,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
+ tu->qused--;
+ spin_unlock_irq(&tu->qlock);
+
++ mutex_lock(&tu->ioctl_lock);
+ if (tu->tread) {
+ if (copy_to_user(buffer, &tu->tqueue[qhead],
+ sizeof(struct snd_timer_tread)))
+@@ -1967,6 +1996,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
+ sizeof(struct snd_timer_read)))
+ err = -EFAULT;
+ }
++ mutex_unlock(&tu->ioctl_lock);
+
+ spin_lock_irq(&tu->qlock);
+ if (err < 0)
+diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h
+index 03ed35237e2b..d73c12b8753d 100644
+--- a/sound/firewire/fireworks/fireworks.h
++++ b/sound/firewire/fireworks/fireworks.h
+@@ -108,7 +108,6 @@ struct snd_efw {
+ u8 *resp_buf;
+ u8 *pull_ptr;
+ u8 *push_ptr;
+- unsigned int resp_queues;
+ };
+
+ int snd_efw_transaction_cmd(struct fw_unit *unit,
+diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c
+index 33df8655fe81..2e1d9a23920c 100644
+--- a/sound/firewire/fireworks/fireworks_hwdep.c
++++ b/sound/firewire/fireworks/fireworks_hwdep.c
+@@ -25,6 +25,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
+ {
+ unsigned int length, till_end, type;
+ struct snd_efw_transaction *t;
++ u8 *pull_ptr;
+ long count = 0;
+
+ if (remained < sizeof(type) + sizeof(struct snd_efw_transaction))
+@@ -38,8 +39,17 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
+ buf += sizeof(type);
+
+ /* write into buffer as many responses as possible */
+- while (efw->resp_queues > 0) {
+- t = (struct snd_efw_transaction *)(efw->pull_ptr);
++ spin_lock_irq(&efw->lock);
++
++ /*
++ * When another task reaches here during this task's access to user
++ * space, it picks up current position in buffer and can read the same
++ * series of responses.
++ */
++ pull_ptr = efw->pull_ptr;
++
++ while (efw->push_ptr != pull_ptr) {
++ t = (struct snd_efw_transaction *)(pull_ptr);
+ length = be32_to_cpu(t->length) * sizeof(__be32);
+
+ /* confirm enough space for this response */
+@@ -49,26 +59,39 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
+ /* copy from ring buffer to user buffer */
+ while (length > 0) {
+ till_end = snd_efw_resp_buf_size -
+- (unsigned int)(efw->pull_ptr - efw->resp_buf);
++ (unsigned int)(pull_ptr - efw->resp_buf);
+ till_end = min_t(unsigned int, length, till_end);
+
+- if (copy_to_user(buf, efw->pull_ptr, till_end))
++ spin_unlock_irq(&efw->lock);
++
++ if (copy_to_user(buf, pull_ptr, till_end))
+ return -EFAULT;
+
+- efw->pull_ptr += till_end;
+- if (efw->pull_ptr >= efw->resp_buf +
+- snd_efw_resp_buf_size)
+- efw->pull_ptr -= snd_efw_resp_buf_size;
++ spin_lock_irq(&efw->lock);
++
++ pull_ptr += till_end;
++ if (pull_ptr >= efw->resp_buf + snd_efw_resp_buf_size)
++ pull_ptr -= snd_efw_resp_buf_size;
+
+ length -= till_end;
+ buf += till_end;
+ count += till_end;
+ remained -= till_end;
+ }
+-
+- efw->resp_queues--;
+ }
+
++ /*
++ * All of tasks can read from the buffer nearly simultaneously, but the
++ * last position for each task is different depending on the length of
++ * given buffer. Here, for simplicity, a position of buffer is set by
++ * the latest task. It's better for a listening application to allow one
++ * thread to read from the buffer. Unless, each task can read different
++ * sequence of responses depending on variation of buffer length.
++ */
++ efw->pull_ptr = pull_ptr;
++
++ spin_unlock_irq(&efw->lock);
++
+ return count;
+ }
+
+@@ -76,14 +99,17 @@ static long
+ hwdep_read_locked(struct snd_efw *efw, char __user *buf, long count,
+ loff_t *offset)
+ {
+- union snd_firewire_event event;
++ union snd_firewire_event event = {
++ .lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS,
++ };
+
+- memset(&event, 0, sizeof(event));
++ spin_lock_irq(&efw->lock);
+
+- event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS;
+ event.lock_status.status = (efw->dev_lock_count > 0);
+ efw->dev_lock_changed = false;
+
++ spin_unlock_irq(&efw->lock);
++
+ count = min_t(long, count, sizeof(event.lock_status));
+
+ if (copy_to_user(buf, &event, count))
+@@ -98,10 +124,15 @@ hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
+ {
+ struct snd_efw *efw = hwdep->private_data;
+ DEFINE_WAIT(wait);
++ bool dev_lock_changed;
++ bool queued;
+
+ spin_lock_irq(&efw->lock);
+
+- while ((!efw->dev_lock_changed) && (efw->resp_queues == 0)) {
++ dev_lock_changed = efw->dev_lock_changed;
++ queued = efw->push_ptr != efw->pull_ptr;
++
++ while (!dev_lock_changed && !queued) {
+ prepare_to_wait(&efw->hwdep_wait, &wait, TASK_INTERRUPTIBLE);
+ spin_unlock_irq(&efw->lock);
+ schedule();
+@@ -109,15 +140,17 @@ hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+ spin_lock_irq(&efw->lock);
++ dev_lock_changed = efw->dev_lock_changed;
++ queued = efw->push_ptr != efw->pull_ptr;
+ }
+
+- if (efw->dev_lock_changed)
++ spin_unlock_irq(&efw->lock);
++
++ if (dev_lock_changed)
+ count = hwdep_read_locked(efw, buf, count, offset);
+- else if (efw->resp_queues > 0)
++ else if (queued)
+ count = hwdep_read_resp_buf(efw, buf, count, offset);
+
+- spin_unlock_irq(&efw->lock);
+-
+ return count;
+ }
+
+@@ -160,7 +193,7 @@ hwdep_poll(struct snd_hwdep *hwdep, struct file *file, poll_table *wait)
+ poll_wait(file, &efw->hwdep_wait, wait);
+
+ spin_lock_irq(&efw->lock);
+- if (efw->dev_lock_changed || (efw->resp_queues > 0))
++ if (efw->dev_lock_changed || efw->pull_ptr != efw->push_ptr)
+ events = POLLIN | POLLRDNORM;
+ else
+ events = 0;
+diff --git a/sound/firewire/fireworks/fireworks_proc.c b/sound/firewire/fireworks/fireworks_proc.c
+index 0639dcb13f7d..beb0a0ffee57 100644
+--- a/sound/firewire/fireworks/fireworks_proc.c
++++ b/sound/firewire/fireworks/fireworks_proc.c
+@@ -188,8 +188,8 @@ proc_read_queues_state(struct snd_info_entry *entry,
+ else
+ consumed = (unsigned int)(efw->push_ptr - efw->pull_ptr);
+
+- snd_iprintf(buffer, "%d %d/%d\n",
+- efw->resp_queues, consumed, snd_efw_resp_buf_size);
++ snd_iprintf(buffer, "%d/%d\n",
++ consumed, snd_efw_resp_buf_size);
+ }
+
+ static void
+diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c
+index f550808d1784..36a08ba51ec7 100644
+--- a/sound/firewire/fireworks/fireworks_transaction.c
++++ b/sound/firewire/fireworks/fireworks_transaction.c
+@@ -121,11 +121,11 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode)
+ size_t capacity, till_end;
+ struct snd_efw_transaction *t;
+
+- spin_lock_irq(&efw->lock);
+-
+ t = (struct snd_efw_transaction *)data;
+ length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length);
+
++ spin_lock_irq(&efw->lock);
++
+ if (efw->push_ptr < efw->pull_ptr)
+ capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr);
+ else
+@@ -155,7 +155,6 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode)
+ }
+
+ /* for hwdep */
+- efw->resp_queues++;
+ wake_up(&efw->hwdep_wait);
+
+ *rcode = RCODE_COMPLETE;
+diff --git a/sound/firewire/tascam/tascam-hwdep.c b/sound/firewire/tascam/tascam-hwdep.c
+index 131267c3a042..106406cbfaa3 100644
+--- a/sound/firewire/tascam/tascam-hwdep.c
++++ b/sound/firewire/tascam/tascam-hwdep.c
+@@ -16,31 +16,14 @@
+
+ #include "tascam.h"
+
+-static long hwdep_read_locked(struct snd_tscm *tscm, char __user *buf,
+- long count)
+-{
+- union snd_firewire_event event;
+-
+- memset(&event, 0, sizeof(event));
+-
+- event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS;
+- event.lock_status.status = (tscm->dev_lock_count > 0);
+- tscm->dev_lock_changed = false;
+-
+- count = min_t(long, count, sizeof(event.lock_status));
+-
+- if (copy_to_user(buf, &event, count))
+- return -EFAULT;
+-
+- return count;
+-}
+-
+ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
+ loff_t *offset)
+ {
+ struct snd_tscm *tscm = hwdep->private_data;
+ DEFINE_WAIT(wait);
+- union snd_firewire_event event;
++ union snd_firewire_event event = {
++ .lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS,
++ };
+
+ spin_lock_irq(&tscm->lock);
+
+@@ -54,10 +37,16 @@ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
+ spin_lock_irq(&tscm->lock);
+ }
+
+- memset(&event, 0, sizeof(event));
+- count = hwdep_read_locked(tscm, buf, count);
++ event.lock_status.status = (tscm->dev_lock_count > 0);
++ tscm->dev_lock_changed = false;
++
+ spin_unlock_irq(&tscm->lock);
+
++ count = min_t(long, count, sizeof(event.lock_status));
++
++ if (copy_to_user(buf, &event, count))
++ return -EFAULT;
++
+ return count;
+ }
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index f25479ba3981..eaee626ab185 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -4840,6 +4840,7 @@ enum {
+ ALC221_FIXUP_HP_FRONT_MIC,
+ ALC292_FIXUP_TPT460,
+ ALC298_FIXUP_SPK_VOLUME,
++ ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER,
+ };
+
+ static const struct hda_fixup alc269_fixups[] = {
+@@ -5501,6 +5502,15 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+ },
++ [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ { 0x1b, 0x90170151 },
++ { }
++ },
++ .chained = true,
++ .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
++ },
+ };
+
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -5545,6 +5555,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
++ SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
+ SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
+@@ -5879,6 +5890,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ {0x12, 0x90a60170},
+ {0x14, 0x90170120},
+ {0x21, 0x02211030}),
++ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell Inspiron 5468", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++ {0x12, 0x90a60180},
++ {0x14, 0x90170120},
++ {0x21, 0x02211030}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ ALC256_STANDARD_PINS),
+ SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
+diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
+index 1267e1af0fae..633d54ca78c8 100644
+--- a/sound/soc/atmel/atmel_ssc_dai.c
++++ b/sound/soc/atmel/atmel_ssc_dai.c
+@@ -299,8 +299,9 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream,
+ clk_enable(ssc_p->ssc->clk);
+ ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk);
+
+- /* Reset the SSC to keep it at a clean status */
+- ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST));
++ /* Reset the SSC unless initialized to keep it in a clean state */
++ if (!ssc_p->initialized)
++ ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST));
+
+ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ dir = 0;
+diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
+index 6cf1f3597455..152292e5ee2b 100644
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -1141,6 +1141,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
+ case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
+ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
+ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
++ case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */
+ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
+ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
+ case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-09-24 10:40 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-09-24 10:40 UTC (permalink / raw
To: gentoo-commits
commit: 7802bd38ec6a3e5cebd97ec87d85ebc4ac15d346
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sat Sep 24 10:40:25 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sat Sep 24 10:40:25 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=7802bd38
Linux patch 4.7.5
0000_README | 4 +
1004_linux-4.7.5.patch | 6989 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 6993 insertions(+)
diff --git a/0000_README b/0000_README
index 2b11683..fefac23 100644
--- a/0000_README
+++ b/0000_README
@@ -59,6 +59,10 @@ Patch: 1003_linux-4.7.4.patch
From: http://www.kernel.org
Desc: Linux 4.7.4
+Patch: 1004_linux-4.7.5.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.5
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1004_linux-4.7.5.patch b/1004_linux-4.7.5.patch
new file mode 100644
index 0000000..4332e2c
--- /dev/null
+++ b/1004_linux-4.7.5.patch
@@ -0,0 +1,6989 @@
+diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
+index bf99e2f24788..205593f56fe7 100644
+--- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
++++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
+@@ -16,6 +16,11 @@ Required properties:
+ - vref-supply: The regulator supply ADC reference voltage.
+ - #io-channel-cells: Should be 1, see ../iio-bindings.txt
+
++Optional properties:
++- resets: Must contain an entry for each entry in reset-names if need support
++ this option. See ../reset/reset.txt for details.
++- reset-names: Must include the name "saradc-apb".
++
+ Example:
+ saradc: saradc@2006c000 {
+ compatible = "rockchip,saradc";
+@@ -23,6 +28,8 @@ Example:
+ interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
+ clock-names = "saradc", "apb_pclk";
++ resets = <&cru SRST_SARADC>;
++ reset-names = "saradc-apb";
+ #io-channel-cells = <1>;
+ vref-supply = <&vcc18>;
+ };
+diff --git a/Documentation/mic/mpssd/mpssd.c b/Documentation/mic/mpssd/mpssd.c
+index 30fb842a976d..49db1def1721 100644
+--- a/Documentation/mic/mpssd/mpssd.c
++++ b/Documentation/mic/mpssd/mpssd.c
+@@ -1538,9 +1538,9 @@ set_cmdline(struct mic_info *mic)
+
+ len = snprintf(buffer, PATH_MAX,
+ "clocksource=tsc highres=off nohz=off ");
+- len += snprintf(buffer + len, PATH_MAX,
++ len += snprintf(buffer + len, PATH_MAX - len,
+ "cpufreq_on;corec6_off;pc3_off;pc6_off ");
+- len += snprintf(buffer + len, PATH_MAX,
++ len += snprintf(buffer + len, PATH_MAX - len,
+ "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
+ mic->id + 1);
+
+diff --git a/Makefile b/Makefile
+index ec3bd119fbf8..dd755d199ad6 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 4
++SUBLEVEL = 5
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
+index c419b43c461d..466e42e96bfa 100644
+--- a/arch/alpha/include/asm/uaccess.h
++++ b/arch/alpha/include/asm/uaccess.h
+@@ -371,14 +371,6 @@ __copy_tofrom_user_nocheck(void *to, const void *from, long len)
+ return __cu_len;
+ }
+
+-extern inline long
+-__copy_tofrom_user(void *to, const void *from, long len, const void __user *validate)
+-{
+- if (__access_ok((unsigned long)validate, len, get_fs()))
+- len = __copy_tofrom_user_nocheck(to, from, len);
+- return len;
+-}
+-
+ #define __copy_to_user(to, from, n) \
+ ({ \
+ __chk_user_ptr(to); \
+@@ -393,17 +385,22 @@ __copy_tofrom_user(void *to, const void *from, long len, const void __user *vali
+ #define __copy_to_user_inatomic __copy_to_user
+ #define __copy_from_user_inatomic __copy_from_user
+
+-
+ extern inline long
+ copy_to_user(void __user *to, const void *from, long n)
+ {
+- return __copy_tofrom_user((__force void *)to, from, n, to);
++ if (likely(__access_ok((unsigned long)to, n, get_fs())))
++ n = __copy_tofrom_user_nocheck((__force void *)to, from, n);
++ return n;
+ }
+
+ extern inline long
+ copy_from_user(void *to, const void __user *from, long n)
+ {
+- return __copy_tofrom_user(to, (__force void *)from, n, from);
++ if (likely(__access_ok((unsigned long)from, n, get_fs())))
++ n = __copy_tofrom_user_nocheck(to, (__force void *)from, n);
++ else
++ memset(to, 0, n);
++ return n;
+ }
+
+ extern void __do_clear_user(void);
+diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
+index a78d5670884f..41faf17cd28d 100644
+--- a/arch/arc/include/asm/uaccess.h
++++ b/arch/arc/include/asm/uaccess.h
+@@ -83,7 +83,10 @@
+ "2: ;nop\n" \
+ " .section .fixup, \"ax\"\n" \
+ " .align 4\n" \
+- "3: mov %0, %3\n" \
++ "3: # return -EFAULT\n" \
++ " mov %0, %3\n" \
++ " # zero out dst ptr\n" \
++ " mov %1, 0\n" \
+ " j 2b\n" \
+ " .previous\n" \
+ " .section __ex_table, \"a\"\n" \
+@@ -101,7 +104,11 @@
+ "2: ;nop\n" \
+ " .section .fixup, \"ax\"\n" \
+ " .align 4\n" \
+- "3: mov %0, %3\n" \
++ "3: # return -EFAULT\n" \
++ " mov %0, %3\n" \
++ " # zero out dst ptr\n" \
++ " mov %1, 0\n" \
++ " mov %R1, 0\n" \
+ " j 2b\n" \
+ " .previous\n" \
+ " .section __ex_table, \"a\"\n" \
+diff --git a/arch/arm/boot/dts/armada-388-clearfog.dts b/arch/arm/boot/dts/armada-388-clearfog.dts
+index c60206efb583..7b7c15adaa8b 100644
+--- a/arch/arm/boot/dts/armada-388-clearfog.dts
++++ b/arch/arm/boot/dts/armada-388-clearfog.dts
+@@ -406,12 +406,12 @@
+
+ port@0 {
+ reg = <0>;
+- label = "lan1";
++ label = "lan5";
+ };
+
+ port@1 {
+ reg = <1>;
+- label = "lan2";
++ label = "lan4";
+ };
+
+ port@2 {
+@@ -421,12 +421,12 @@
+
+ port@3 {
+ reg = <3>;
+- label = "lan4";
++ label = "lan2";
+ };
+
+ port@4 {
+ reg = <4>;
+- label = "lan5";
++ label = "lan1";
+ };
+
+ port@5 {
+diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
+index ed613ebe0812..8b1738930145 100644
+--- a/arch/arm/boot/dts/imx6qdl.dtsi
++++ b/arch/arm/boot/dts/imx6qdl.dtsi
+@@ -242,7 +242,7 @@
+ clocks = <&clks IMX6QDL_CLK_SPDIF_GCLK>, <&clks IMX6QDL_CLK_OSC>,
+ <&clks IMX6QDL_CLK_SPDIF>, <&clks IMX6QDL_CLK_ASRC>,
+ <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_ESAI_EXTAL>,
+- <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_MLB>,
++ <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_DUMMY>,
+ <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_SPBA>;
+ clock-names = "core", "rxtx0",
+ "rxtx1", "rxtx2",
+diff --git a/arch/arm/boot/dts/kirkwood-ib62x0.dts b/arch/arm/boot/dts/kirkwood-ib62x0.dts
+index ef84d8699a76..5bf62897014c 100644
+--- a/arch/arm/boot/dts/kirkwood-ib62x0.dts
++++ b/arch/arm/boot/dts/kirkwood-ib62x0.dts
+@@ -113,7 +113,7 @@
+
+ partition@e0000 {
+ label = "u-boot environment";
+- reg = <0xe0000 0x100000>;
++ reg = <0xe0000 0x20000>;
+ };
+
+ partition@100000 {
+diff --git a/arch/arm/boot/dts/kirkwood-openrd.dtsi b/arch/arm/boot/dts/kirkwood-openrd.dtsi
+index e4ecab112601..7175511a92da 100644
+--- a/arch/arm/boot/dts/kirkwood-openrd.dtsi
++++ b/arch/arm/boot/dts/kirkwood-openrd.dtsi
+@@ -116,6 +116,10 @@
+ };
+ };
+
++&pciec {
++ status = "okay";
++};
++
+ &pcie0 {
+ status = "okay";
+ };
+diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi
+index de256fa8da48..3e946cac55f3 100644
+--- a/arch/arm/boot/dts/omap3-overo-base.dtsi
++++ b/arch/arm/boot/dts/omap3-overo-base.dtsi
+@@ -223,7 +223,9 @@
+ };
+
+ &gpmc {
+- ranges = <0 0 0x00000000 0x20000000>;
++ ranges = <0 0 0x30000000 0x1000000>, /* CS0 */
++ <4 0 0x2b000000 0x1000000>, /* CS4 */
++ <5 0 0x2c000000 0x1000000>; /* CS5 */
+
+ nand@0,0 {
+ compatible = "ti,omap2-nand";
+diff --git a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
+index 7df27926ead2..4f4c6efbd518 100644
+--- a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
++++ b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
+@@ -55,8 +55,6 @@
+ #include "omap-gpmc-smsc9221.dtsi"
+
+ &gpmc {
+- ranges = <5 0 0x2c000000 0x1000000>; /* CS5 */
+-
+ ethernet@gpmc {
+ reg = <5 0 0xff>;
+ interrupt-parent = <&gpio6>;
+diff --git a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
+index 9e24b6a1d07b..1b304e2f1bd2 100644
+--- a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
++++ b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
+@@ -27,8 +27,6 @@
+ #include "omap-gpmc-smsc9221.dtsi"
+
+ &gpmc {
+- ranges = <5 0 0x2c000000 0x1000000>; /* CS5 */
+-
+ ethernet@gpmc {
+ reg = <5 0 0xff>;
+ interrupt-parent = <&gpio6>;
+diff --git a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
+index 334109e14613..82e98ee3023a 100644
+--- a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
++++ b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
+@@ -15,9 +15,6 @@
+ #include "omap-gpmc-smsc9221.dtsi"
+
+ &gpmc {
+- ranges = <4 0 0x2b000000 0x1000000>, /* CS4 */
+- <5 0 0x2c000000 0x1000000>; /* CS5 */
+-
+ smsc1: ethernet@gpmc {
+ reg = <5 0 0xff>;
+ interrupt-parent = <&gpio6>;
+diff --git a/arch/arm/boot/dts/rk3066a.dtsi b/arch/arm/boot/dts/rk3066a.dtsi
+index c0ba86c3a2ab..0d0dae3a1694 100644
+--- a/arch/arm/boot/dts/rk3066a.dtsi
++++ b/arch/arm/boot/dts/rk3066a.dtsi
+@@ -197,6 +197,8 @@
+ clock-names = "saradc", "apb_pclk";
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ #io-channel-cells = <1>;
++ resets = <&cru SRST_SARADC>;
++ reset-names = "saradc-apb";
+ status = "disabled";
+ };
+
+diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
+index 3b44ef3cff12..fd77e10b4746 100644
+--- a/arch/arm/boot/dts/rk3288.dtsi
++++ b/arch/arm/boot/dts/rk3288.dtsi
+@@ -279,6 +279,8 @@
+ #io-channel-cells = <1>;
+ clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
+ clock-names = "saradc", "apb_pclk";
++ resets = <&cru SRST_SARADC>;
++ reset-names = "saradc-apb";
+ status = "disabled";
+ };
+
+diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi
+index 99bbcc2c9b89..e2cd683b4e4b 100644
+--- a/arch/arm/boot/dts/rk3xxx.dtsi
++++ b/arch/arm/boot/dts/rk3xxx.dtsi
+@@ -399,6 +399,8 @@
+ #io-channel-cells = <1>;
+ clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
+ clock-names = "saradc", "apb_pclk";
++ resets = <&cru SRST_SARADC>;
++ reset-names = "saradc-apb";
+ status = "disabled";
+ };
+
+diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi
+index d294e82447a2..8b063ab10c19 100644
+--- a/arch/arm/boot/dts/stih407-family.dtsi
++++ b/arch/arm/boot/dts/stih407-family.dtsi
+@@ -550,8 +550,9 @@
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_mmc0>;
+- clock-names = "mmc";
+- clocks = <&clk_s_c0_flexgen CLK_MMC_0>;
++ clock-names = "mmc", "icn";
++ clocks = <&clk_s_c0_flexgen CLK_MMC_0>,
++ <&clk_s_c0_flexgen CLK_RX_ICN_HVA>;
+ bus-width = <8>;
+ non-removable;
+ };
+@@ -565,8 +566,9 @@
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sd1>;
+- clock-names = "mmc";
+- clocks = <&clk_s_c0_flexgen CLK_MMC_1>;
++ clock-names = "mmc", "icn";
++ clocks = <&clk_s_c0_flexgen CLK_MMC_1>,
++ <&clk_s_c0_flexgen CLK_RX_ICN_HVA>;
+ resets = <&softreset STIH407_MMC1_SOFTRESET>;
+ bus-width = <4>;
+ };
+diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi
+index 18ed1ad10d32..40318869c733 100644
+--- a/arch/arm/boot/dts/stih410.dtsi
++++ b/arch/arm/boot/dts/stih410.dtsi
+@@ -41,7 +41,8 @@
+ compatible = "st,st-ohci-300x";
+ reg = <0x9a03c00 0x100>;
+ interrupts = <GIC_SPI 180 IRQ_TYPE_NONE>;
+- clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
++ clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
++ <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
+ resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>,
+ <&softreset STIH407_USB2_PORT0_SOFTRESET>;
+ reset-names = "power", "softreset";
+@@ -57,7 +58,8 @@
+ interrupts = <GIC_SPI 151 IRQ_TYPE_NONE>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb0>;
+- clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
++ clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
++ <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
+ resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>,
+ <&softreset STIH407_USB2_PORT0_SOFTRESET>;
+ reset-names = "power", "softreset";
+@@ -71,7 +73,8 @@
+ compatible = "st,st-ohci-300x";
+ reg = <0x9a83c00 0x100>;
+ interrupts = <GIC_SPI 181 IRQ_TYPE_NONE>;
+- clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
++ clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
++ <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
+ resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>,
+ <&softreset STIH407_USB2_PORT1_SOFTRESET>;
+ reset-names = "power", "softreset";
+@@ -87,7 +90,8 @@
+ interrupts = <GIC_SPI 153 IRQ_TYPE_NONE>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb1>;
+- clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
++ clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
++ <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
+ resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>,
+ <&softreset STIH407_USB2_PORT1_SOFTRESET>;
+ reset-names = "power", "softreset";
+diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
+index f1bde7c4e736..4e1b22a80f55 100644
+--- a/arch/arm/kvm/arm.c
++++ b/arch/arm/kvm/arm.c
+@@ -157,8 +157,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
+ {
+ int i;
+
+- kvm_free_stage2_pgd(kvm);
+-
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (kvm->vcpus[i]) {
+ kvm_arch_vcpu_free(kvm->vcpus[i]);
+diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
+index 45c43aecb8f2..15c8d839c1b3 100644
+--- a/arch/arm/kvm/mmu.c
++++ b/arch/arm/kvm/mmu.c
+@@ -1909,6 +1909,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
+
+ void kvm_arch_flush_shadow_all(struct kvm *kvm)
+ {
++ kvm_free_stage2_pgd(kvm);
+ }
+
+ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c
+index 58924b3844df..fe708e26d021 100644
+--- a/arch/arm/mach-imx/pm-imx6.c
++++ b/arch/arm/mach-imx/pm-imx6.c
+@@ -295,7 +295,7 @@ int imx6_set_lpm(enum mxc_cpu_pwr_mode mode)
+ val &= ~BM_CLPCR_SBYOS;
+ if (cpu_is_imx6sl())
+ val |= BM_CLPCR_BYPASS_PMIC_READY;
+- if (cpu_is_imx6sl() || cpu_is_imx6sx())
++ if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul())
+ val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS;
+ else
+ val |= BM_CLPCR_BYP_MMDC_CH1_LPM_HS;
+@@ -310,7 +310,7 @@ int imx6_set_lpm(enum mxc_cpu_pwr_mode mode)
+ val |= 0x3 << BP_CLPCR_STBY_COUNT;
+ val |= BM_CLPCR_VSTBY;
+ val |= BM_CLPCR_SBYOS;
+- if (cpu_is_imx6sl())
++ if (cpu_is_imx6sl() || cpu_is_imx6sx())
+ val |= BM_CLPCR_BYPASS_PMIC_READY;
+ if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul())
+ val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS;
+diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
+index aed33621deeb..3a350f8879da 100644
+--- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
++++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
+@@ -1476,6 +1476,7 @@ static void omap_hwmod_am43xx_rst(void)
+ {
+ RSTCTRL(am33xx_pruss_hwmod, AM43XX_RM_PER_RSTCTRL_OFFSET);
+ RSTCTRL(am33xx_gfx_hwmod, AM43XX_RM_GFX_RSTCTRL_OFFSET);
++ RSTST(am33xx_pruss_hwmod, AM43XX_RM_PER_RSTST_OFFSET);
+ RSTST(am33xx_gfx_hwmod, AM43XX_RM_GFX_RSTST_OFFSET);
+ }
+
+diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+index 9869a75c5d96..caf15c8e0c47 100644
+--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
++++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+@@ -722,8 +722,20 @@ static struct omap_hwmod omap3xxx_dss_dispc_hwmod = {
+ * display serial interface controller
+ */
+
++static struct omap_hwmod_class_sysconfig omap3xxx_dsi_sysc = {
++ .rev_offs = 0x0000,
++ .sysc_offs = 0x0010,
++ .syss_offs = 0x0014,
++ .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY |
++ SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE |
++ SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
++ .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
++ .sysc_fields = &omap_hwmod_sysc_type1,
++};
++
+ static struct omap_hwmod_class omap3xxx_dsi_hwmod_class = {
+ .name = "dsi",
++ .sysc = &omap3xxx_dsi_sysc,
+ };
+
+ static struct omap_hwmod_irq_info omap3xxx_dsi1_irqs[] = {
+diff --git a/arch/arm/mach-omap2/prcm43xx.h b/arch/arm/mach-omap2/prcm43xx.h
+index 7c34c44eb0ae..babb5db5a3a4 100644
+--- a/arch/arm/mach-omap2/prcm43xx.h
++++ b/arch/arm/mach-omap2/prcm43xx.h
+@@ -39,6 +39,7 @@
+
+ /* RM RSTST offsets */
+ #define AM43XX_RM_GFX_RSTST_OFFSET 0x0014
++#define AM43XX_RM_PER_RSTST_OFFSET 0x0014
+ #define AM43XX_RM_WKUP_RSTST_OFFSET 0x0014
+
+ /* CM instances */
+diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
+index c410d84b243d..66070acaa888 100644
+--- a/arch/arm/mach-pxa/idp.c
++++ b/arch/arm/mach-pxa/idp.c
+@@ -83,7 +83,8 @@ static struct resource smc91x_resources[] = {
+ };
+
+ static struct smc91x_platdata smc91x_platdata = {
+- .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT,
++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++ SMC91X_USE_DMA | SMC91X_NOWAIT,
+ };
+
+ static struct platform_device smc91x_device = {
+diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c
+index 3f06cd90567a..056369ef250e 100644
+--- a/arch/arm/mach-pxa/xcep.c
++++ b/arch/arm/mach-pxa/xcep.c
+@@ -120,7 +120,8 @@ static struct resource smc91x_resources[] = {
+ };
+
+ static struct smc91x_platdata xcep_smc91x_info = {
+- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA,
++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++ SMC91X_NOWAIT | SMC91X_USE_DMA,
+ };
+
+ static struct platform_device smc91x_device = {
+diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
+index baf174542e36..a0ead0ae23d6 100644
+--- a/arch/arm/mach-realview/core.c
++++ b/arch/arm/mach-realview/core.c
+@@ -93,7 +93,8 @@ static struct smsc911x_platform_config smsc911x_config = {
+ };
+
+ static struct smc91x_platdata smc91x_platdata = {
+- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++ SMC91X_NOWAIT,
+ };
+
+ static struct platform_device realview_eth_device = {
+diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
+index 1525d7b5f1b7..88149f85bc49 100644
+--- a/arch/arm/mach-sa1100/pleb.c
++++ b/arch/arm/mach-sa1100/pleb.c
+@@ -45,7 +45,7 @@ static struct resource smc91x_resources[] = {
+ };
+
+ static struct smc91x_platdata smc91x_platdata = {
+- .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
++ .flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT,
+ };
+
+ static struct platform_device smc91x_device = {
+diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
+index e875a5a551d7..89206b568cd4 100644
+--- a/arch/arm64/include/asm/spinlock.h
++++ b/arch/arm64/include/asm/spinlock.h
+@@ -363,4 +363,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
+ #define arch_read_relax(lock) cpu_relax()
+ #define arch_write_relax(lock) cpu_relax()
+
++/*
++ * Accesses appearing in program order before a spin_lock() operation
++ * can be reordered with accesses inside the critical section, by virtue
++ * of arch_spin_lock being constructed using acquire semantics.
++ *
++ * In cases where this is problematic (e.g. try_to_wake_up), an
++ * smp_mb__before_spinlock() can restore the required ordering.
++ */
++#define smp_mb__before_spinlock() smp_mb()
++
+ #endif /* __ASM_SPINLOCK_H */
+diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h
+index 68cf638faf48..b1ec1fa06463 100644
+--- a/arch/avr32/include/asm/uaccess.h
++++ b/arch/avr32/include/asm/uaccess.h
+@@ -74,7 +74,7 @@ extern __kernel_size_t __copy_user(void *to, const void *from,
+
+ extern __kernel_size_t copy_to_user(void __user *to, const void *from,
+ __kernel_size_t n);
+-extern __kernel_size_t copy_from_user(void *to, const void __user *from,
++extern __kernel_size_t ___copy_from_user(void *to, const void __user *from,
+ __kernel_size_t n);
+
+ static inline __kernel_size_t __copy_to_user(void __user *to, const void *from,
+@@ -88,6 +88,15 @@ static inline __kernel_size_t __copy_from_user(void *to,
+ {
+ return __copy_user(to, (const void __force *)from, n);
+ }
++static inline __kernel_size_t copy_from_user(void *to,
++ const void __user *from,
++ __kernel_size_t n)
++{
++ size_t res = ___copy_from_user(to, from, n);
++ if (unlikely(res))
++ memset(to + (n - res), 0, res);
++ return res;
++}
+
+ #define __copy_to_user_inatomic __copy_to_user
+ #define __copy_from_user_inatomic __copy_from_user
+diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c
+index d93ead02daed..7c6cf14f0985 100644
+--- a/arch/avr32/kernel/avr32_ksyms.c
++++ b/arch/avr32/kernel/avr32_ksyms.c
+@@ -36,7 +36,7 @@ EXPORT_SYMBOL(copy_page);
+ /*
+ * Userspace access stuff.
+ */
+-EXPORT_SYMBOL(copy_from_user);
++EXPORT_SYMBOL(___copy_from_user);
+ EXPORT_SYMBOL(copy_to_user);
+ EXPORT_SYMBOL(__copy_user);
+ EXPORT_SYMBOL(strncpy_from_user);
+diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S
+index ea59c04b07de..075373471da1 100644
+--- a/arch/avr32/lib/copy_user.S
++++ b/arch/avr32/lib/copy_user.S
+@@ -23,13 +23,13 @@
+ */
+ .text
+ .align 1
+- .global copy_from_user
+- .type copy_from_user, @function
+-copy_from_user:
++ .global ___copy_from_user
++ .type ___copy_from_user, @function
++___copy_from_user:
+ branch_if_kernel r8, __copy_user
+ ret_if_privileged r8, r11, r10, r10
+ rjmp __copy_user
+- .size copy_from_user, . - copy_from_user
++ .size ___copy_from_user, . - ___copy_from_user
+
+ .global copy_to_user
+ .type copy_to_user, @function
+diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h
+index 12f5d6851bbc..0a2a70096d8b 100644
+--- a/arch/blackfin/include/asm/uaccess.h
++++ b/arch/blackfin/include/asm/uaccess.h
+@@ -171,11 +171,12 @@ static inline int bad_user_access_length(void)
+ static inline unsigned long __must_check
+ copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
+- if (access_ok(VERIFY_READ, from, n))
++ if (likely(access_ok(VERIFY_READ, from, n))) {
+ memcpy(to, (const void __force *)from, n);
+- else
+- return n;
+- return 0;
++ return 0;
++ }
++ memset(to, 0, n);
++ return n;
+ }
+
+ static inline unsigned long __must_check
+diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
+index c6db52ba3a06..10c57771822d 100644
+--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
++++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
+@@ -146,7 +146,8 @@ static struct platform_device hitachi_fb_device = {
+ #include <linux/smc91x.h>
+
+ static struct smc91x_platdata smc91x_info = {
+- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++ SMC91X_NOWAIT,
+ .leda = RPC_LED_100_10,
+ .ledb = RPC_LED_TX_RX,
+ };
+diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c
+index f35525b55819..57d1c43726d9 100644
+--- a/arch/blackfin/mach-bf561/boards/ezkit.c
++++ b/arch/blackfin/mach-bf561/boards/ezkit.c
+@@ -134,7 +134,8 @@ static struct platform_device net2272_bfin_device = {
+ #include <linux/smc91x.h>
+
+ static struct smc91x_platdata smc91x_info = {
+- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++ SMC91X_NOWAIT,
+ .leda = RPC_LED_100_10,
+ .ledb = RPC_LED_TX_RX,
+ };
+diff --git a/arch/cris/include/asm/uaccess.h b/arch/cris/include/asm/uaccess.h
+index e3530d0f13ee..56c7d5750abd 100644
+--- a/arch/cris/include/asm/uaccess.h
++++ b/arch/cris/include/asm/uaccess.h
+@@ -194,30 +194,6 @@ extern unsigned long __copy_user(void __user *to, const void *from, unsigned lon
+ extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n);
+ extern unsigned long __do_clear_user(void __user *to, unsigned long n);
+
+-static inline unsigned long
+-__generic_copy_to_user(void __user *to, const void *from, unsigned long n)
+-{
+- if (access_ok(VERIFY_WRITE, to, n))
+- return __copy_user(to, from, n);
+- return n;
+-}
+-
+-static inline unsigned long
+-__generic_copy_from_user(void *to, const void __user *from, unsigned long n)
+-{
+- if (access_ok(VERIFY_READ, from, n))
+- return __copy_user_zeroing(to, from, n);
+- return n;
+-}
+-
+-static inline unsigned long
+-__generic_clear_user(void __user *to, unsigned long n)
+-{
+- if (access_ok(VERIFY_WRITE, to, n))
+- return __do_clear_user(to, n);
+- return n;
+-}
+-
+ static inline long
+ __strncpy_from_user(char *dst, const char __user *src, long count)
+ {
+@@ -282,7 +258,7 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n)
+ else if (n == 24)
+ __asm_copy_from_user_24(to, from, ret);
+ else
+- ret = __generic_copy_from_user(to, from, n);
++ ret = __copy_user_zeroing(to, from, n);
+
+ return ret;
+ }
+@@ -333,7 +309,7 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n)
+ else if (n == 24)
+ __asm_copy_to_user_24(to, from, ret);
+ else
+- ret = __generic_copy_to_user(to, from, n);
++ ret = __copy_user(to, from, n);
+
+ return ret;
+ }
+@@ -366,26 +342,43 @@ __constant_clear_user(void __user *to, unsigned long n)
+ else if (n == 24)
+ __asm_clear_24(to, ret);
+ else
+- ret = __generic_clear_user(to, n);
++ ret = __do_clear_user(to, n);
+
+ return ret;
+ }
+
+
+-#define clear_user(to, n) \
+- (__builtin_constant_p(n) ? \
+- __constant_clear_user(to, n) : \
+- __generic_clear_user(to, n))
++static inline size_t clear_user(void __user *to, size_t n)
++{
++ if (unlikely(!access_ok(VERIFY_WRITE, to, n)))
++ return n;
++ if (__builtin_constant_p(n))
++ return __constant_clear_user(to, n);
++ else
++ return __do_clear_user(to, n);
++}
+
+-#define copy_from_user(to, from, n) \
+- (__builtin_constant_p(n) ? \
+- __constant_copy_from_user(to, from, n) : \
+- __generic_copy_from_user(to, from, n))
++static inline size_t copy_from_user(void *to, const void __user *from, size_t n)
++{
++ if (unlikely(!access_ok(VERIFY_READ, from, n))) {
++ memset(to, 0, n);
++ return n;
++ }
++ if (__builtin_constant_p(n))
++ return __constant_copy_from_user(to, from, n);
++ else
++ return __copy_user_zeroing(to, from, n);
++}
+
+-#define copy_to_user(to, from, n) \
+- (__builtin_constant_p(n) ? \
+- __constant_copy_to_user(to, from, n) : \
+- __generic_copy_to_user(to, from, n))
++static inline size_t copy_to_user(void __user *to, const void *from, size_t n)
++{
++ if (unlikely(!access_ok(VERIFY_WRITE, to, n)))
++ return n;
++ if (__builtin_constant_p(n))
++ return __constant_copy_to_user(to, from, n);
++ else
++ return __copy_user(to, from, n);
++}
+
+ /* We let the __ versions of copy_from/to_user inline, because they're often
+ * used in fast paths and have only a small space overhead.
+diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h
+index 3ac9a59d65d4..87d9e34c5df8 100644
+--- a/arch/frv/include/asm/uaccess.h
++++ b/arch/frv/include/asm/uaccess.h
+@@ -263,19 +263,25 @@ do { \
+ extern long __memset_user(void *dst, unsigned long count);
+ extern long __memcpy_user(void *dst, const void *src, unsigned long count);
+
+-#define clear_user(dst,count) __memset_user(____force(dst), (count))
++#define __clear_user(dst,count) __memset_user(____force(dst), (count))
+ #define __copy_from_user_inatomic(to, from, n) __memcpy_user((to), ____force(from), (n))
+ #define __copy_to_user_inatomic(to, from, n) __memcpy_user(____force(to), (from), (n))
+
+ #else
+
+-#define clear_user(dst,count) (memset(____force(dst), 0, (count)), 0)
++#define __clear_user(dst,count) (memset(____force(dst), 0, (count)), 0)
+ #define __copy_from_user_inatomic(to, from, n) (memcpy((to), ____force(from), (n)), 0)
+ #define __copy_to_user_inatomic(to, from, n) (memcpy(____force(to), (from), (n)), 0)
+
+ #endif
+
+-#define __clear_user clear_user
++static inline unsigned long __must_check
++clear_user(void __user *to, unsigned long n)
++{
++ if (likely(__access_ok(to, n)))
++ n = __clear_user(to, n);
++ return n;
++}
+
+ static inline unsigned long __must_check
+ __copy_to_user(void __user *to, const void *from, unsigned long n)
+diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h
+index f000a382bc7f..f61cfb28e9f2 100644
+--- a/arch/hexagon/include/asm/uaccess.h
++++ b/arch/hexagon/include/asm/uaccess.h
+@@ -103,7 +103,8 @@ static inline long hexagon_strncpy_from_user(char *dst, const char __user *src,
+ {
+ long res = __strnlen_user(src, n);
+
+- /* return from strnlen can't be zero -- that would be rubbish. */
++ if (unlikely(!res))
++ return -EFAULT;
+
+ if (res > n) {
+ copy_from_user(dst, src, n);
+diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
+index 2189d5ddc1ee..ebcd8a15338b 100644
+--- a/arch/ia64/include/asm/uaccess.h
++++ b/arch/ia64/include/asm/uaccess.h
+@@ -263,17 +263,15 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
+ __cu_len; \
+ })
+
+-#define copy_from_user(to, from, n) \
+-({ \
+- void *__cu_to = (to); \
+- const void __user *__cu_from = (from); \
+- long __cu_len = (n); \
+- \
+- __chk_user_ptr(__cu_from); \
+- if (__access_ok(__cu_from, __cu_len, get_fs())) \
+- __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \
+- __cu_len; \
+-})
++static inline unsigned long
++copy_from_user(void *to, const void __user *from, unsigned long n)
++{
++ if (likely(__access_ok(from, n, get_fs())))
++ n = __copy_user((__force void __user *) to, from, n);
++ else
++ memset(to, 0, n);
++ return n;
++}
+
+ #define __copy_in_user(to, from, size) __copy_user((to), (from), (size))
+
+diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h
+index cac7014daef3..6f8982157a75 100644
+--- a/arch/m32r/include/asm/uaccess.h
++++ b/arch/m32r/include/asm/uaccess.h
+@@ -219,7 +219,7 @@ extern int fixup_exception(struct pt_regs *regs);
+ #define __get_user_nocheck(x, ptr, size) \
+ ({ \
+ long __gu_err = 0; \
+- unsigned long __gu_val; \
++ unsigned long __gu_val = 0; \
+ might_fault(); \
+ __get_user_size(__gu_val, (ptr), (size), __gu_err); \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
+diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
+index 8282cbce7e39..273e61225c27 100644
+--- a/arch/metag/include/asm/uaccess.h
++++ b/arch/metag/include/asm/uaccess.h
+@@ -204,8 +204,9 @@ extern unsigned long __must_check __copy_user_zeroing(void *to,
+ static inline unsigned long
+ copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
+- if (access_ok(VERIFY_READ, from, n))
++ if (likely(access_ok(VERIFY_READ, from, n)))
+ return __copy_user_zeroing(to, from, n);
++ memset(to, 0, n);
+ return n;
+ }
+
+diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
+index 331b0d35f89c..826676778094 100644
+--- a/arch/microblaze/include/asm/uaccess.h
++++ b/arch/microblaze/include/asm/uaccess.h
+@@ -227,7 +227,7 @@ extern long __user_bad(void);
+
+ #define __get_user(x, ptr) \
+ ({ \
+- unsigned long __gu_val; \
++ unsigned long __gu_val = 0; \
+ /*unsigned long __gu_ptr = (unsigned long)(ptr);*/ \
+ long __gu_err; \
+ switch (sizeof(*(ptr))) { \
+@@ -373,10 +373,13 @@ extern long __user_bad(void);
+ static inline long copy_from_user(void *to,
+ const void __user *from, unsigned long n)
+ {
++ unsigned long res = n;
+ might_fault();
+- if (access_ok(VERIFY_READ, from, n))
+- return __copy_from_user(to, from, n);
+- return n;
++ if (likely(access_ok(VERIFY_READ, from, n)))
++ res = __copy_from_user(to, from, n);
++ if (unlikely(res))
++ memset(to + (n - res), 0, res);
++ return res;
+ }
+
+ #define __copy_to_user(to, from, n) \
+diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
+index 7f109d4f64a4..b6e20f3053f4 100644
+--- a/arch/mips/include/asm/uaccess.h
++++ b/arch/mips/include/asm/uaccess.h
+@@ -14,6 +14,7 @@
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+ #include <linux/thread_info.h>
++#include <linux/string.h>
+ #include <asm/asm-eva.h>
+
+ /*
+@@ -1170,6 +1171,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
+ __cu_len = __invoke_copy_from_user(__cu_to, \
+ __cu_from, \
+ __cu_len); \
++ } else { \
++ memset(__cu_to, 0, __cu_len); \
+ } \
+ } \
+ __cu_len; \
+diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
+index ad2270ff83d1..38bfbc9066eb 100644
+--- a/arch/mips/kvm/tlb.c
++++ b/arch/mips/kvm/tlb.c
+@@ -159,7 +159,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ pfn = kvm_mips_gfn_to_pfn(kvm, gfn);
+
+- if (kvm_mips_is_error_pfn(pfn)) {
++ if (is_error_noslot_pfn(pfn)) {
+ kvm_err("Couldn't get pfn for gfn %#" PRIx64 "!\n", gfn);
+ err = -EFAULT;
+ goto out;
+diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
+index 20f7bf6de384..d012e877a95a 100644
+--- a/arch/mn10300/include/asm/uaccess.h
++++ b/arch/mn10300/include/asm/uaccess.h
+@@ -166,6 +166,7 @@ struct __large_struct { unsigned long buf[100]; };
+ "2:\n" \
+ " .section .fixup,\"ax\"\n" \
+ "3:\n\t" \
++ " mov 0,%1\n" \
+ " mov %3,%0\n" \
+ " jmp 2b\n" \
+ " .previous\n" \
+diff --git a/arch/mn10300/lib/usercopy.c b/arch/mn10300/lib/usercopy.c
+index 7826e6c364e7..ce8899e5e171 100644
+--- a/arch/mn10300/lib/usercopy.c
++++ b/arch/mn10300/lib/usercopy.c
+@@ -9,7 +9,7 @@
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+-#include <asm/uaccess.h>
++#include <linux/uaccess.h>
+
+ unsigned long
+ __generic_copy_to_user(void *to, const void *from, unsigned long n)
+@@ -24,6 +24,8 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n)
+ {
+ if (access_ok(VERIFY_READ, from, n))
+ __copy_user_zeroing(to, from, n);
++ else
++ memset(to, 0, n);
+ return n;
+ }
+
+diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
+index caa51ff85a3c..0ab82324c817 100644
+--- a/arch/nios2/include/asm/uaccess.h
++++ b/arch/nios2/include/asm/uaccess.h
+@@ -102,9 +102,12 @@ extern long __copy_to_user(void __user *to, const void *from, unsigned long n);
+ static inline long copy_from_user(void *to, const void __user *from,
+ unsigned long n)
+ {
+- if (!access_ok(VERIFY_READ, from, n))
+- return n;
+- return __copy_from_user(to, from, n);
++ unsigned long res = n;
++ if (access_ok(VERIFY_READ, from, n))
++ res = __copy_from_user(to, from, n);
++ if (unlikely(res))
++ memset(to + (n - res), 0, res);
++ return res;
+ }
+
+ static inline long copy_to_user(void __user *to, const void *from,
+@@ -139,7 +142,7 @@ extern long strnlen_user(const char __user *s, long n);
+
+ #define __get_user_unknown(val, size, ptr, err) do { \
+ err = 0; \
+- if (copy_from_user(&(val), ptr, size)) { \
++ if (__copy_from_user(&(val), ptr, size)) { \
+ err = -EFAULT; \
+ } \
+ } while (0)
+@@ -166,7 +169,7 @@ do { \
+ ({ \
+ long __gu_err = -EFAULT; \
+ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \
+- unsigned long __gu_val; \
++ unsigned long __gu_val = 0; \
+ __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\
+ (x) = (__force __typeof__(x))__gu_val; \
+ __gu_err; \
+diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
+index a6bd07ca3d6c..5cc6b4f1b795 100644
+--- a/arch/openrisc/include/asm/uaccess.h
++++ b/arch/openrisc/include/asm/uaccess.h
+@@ -273,28 +273,20 @@ __copy_tofrom_user(void *to, const void *from, unsigned long size);
+ static inline unsigned long
+ copy_from_user(void *to, const void *from, unsigned long n)
+ {
+- unsigned long over;
+-
+- if (access_ok(VERIFY_READ, from, n))
+- return __copy_tofrom_user(to, from, n);
+- if ((unsigned long)from < TASK_SIZE) {
+- over = (unsigned long)from + n - TASK_SIZE;
+- return __copy_tofrom_user(to, from, n - over) + over;
+- }
+- return n;
++ unsigned long res = n;
++
++ if (likely(access_ok(VERIFY_READ, from, n)))
++ res = __copy_tofrom_user(to, from, n);
++ if (unlikely(res))
++ memset(to + (n - res), 0, res);
++ return res;
+ }
+
+ static inline unsigned long
+ copy_to_user(void *to, const void *from, unsigned long n)
+ {
+- unsigned long over;
+-
+- if (access_ok(VERIFY_WRITE, to, n))
+- return __copy_tofrom_user(to, from, n);
+- if ((unsigned long)to < TASK_SIZE) {
+- over = (unsigned long)to + n - TASK_SIZE;
+- return __copy_tofrom_user(to, from, n - over) + over;
+- }
++ if (likely(access_ok(VERIFY_WRITE, to, n)))
++ n = __copy_tofrom_user(to, from, n);
+ return n;
+ }
+
+@@ -303,13 +295,8 @@ extern unsigned long __clear_user(void *addr, unsigned long size);
+ static inline __must_check unsigned long
+ clear_user(void *addr, unsigned long size)
+ {
+-
+- if (access_ok(VERIFY_WRITE, addr, size))
+- return __clear_user(addr, size);
+- if ((unsigned long)addr < TASK_SIZE) {
+- unsigned long over = (unsigned long)addr + size - TASK_SIZE;
+- return __clear_user(addr, size - over) + over;
+- }
++ if (likely(access_ok(VERIFY_WRITE, addr, size)))
++ size = __clear_user(addr, size);
+ return size;
+ }
+
+diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
+index 0f59fd9ca205..37a1bee96444 100644
+--- a/arch/parisc/include/asm/uaccess.h
++++ b/arch/parisc/include/asm/uaccess.h
+@@ -10,6 +10,7 @@
+ #include <asm-generic/uaccess-unaligned.h>
+
+ #include <linux/bug.h>
++#include <linux/string.h>
+
+ #define VERIFY_READ 0
+ #define VERIFY_WRITE 1
+@@ -221,13 +222,14 @@ static inline unsigned long __must_check copy_from_user(void *to,
+ unsigned long n)
+ {
+ int sz = __compiletime_object_size(to);
+- int ret = -EFAULT;
++ unsigned long ret = n;
+
+ if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n))
+ ret = __copy_from_user(to, from, n);
+ else
+ copy_from_user_overflow();
+-
++ if (unlikely(ret))
++ memset(to + (n - ret), 0, ret);
+ return ret;
+ }
+
+diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
+index b7c20f0b8fbe..43888c8b9d1c 100644
+--- a/arch/powerpc/include/asm/uaccess.h
++++ b/arch/powerpc/include/asm/uaccess.h
+@@ -308,30 +308,17 @@ extern unsigned long __copy_tofrom_user(void __user *to,
+ static inline unsigned long copy_from_user(void *to,
+ const void __user *from, unsigned long n)
+ {
+- unsigned long over;
+-
+- if (access_ok(VERIFY_READ, from, n))
++ if (likely(access_ok(VERIFY_READ, from, n)))
+ return __copy_tofrom_user((__force void __user *)to, from, n);
+- if ((unsigned long)from < TASK_SIZE) {
+- over = (unsigned long)from + n - TASK_SIZE;
+- return __copy_tofrom_user((__force void __user *)to, from,
+- n - over) + over;
+- }
++ memset(to, 0, n);
+ return n;
+ }
+
+ static inline unsigned long copy_to_user(void __user *to,
+ const void *from, unsigned long n)
+ {
+- unsigned long over;
+-
+ if (access_ok(VERIFY_WRITE, to, n))
+ return __copy_tofrom_user(to, (__force void __user *)from, n);
+- if ((unsigned long)to < TASK_SIZE) {
+- over = (unsigned long)to + n - TASK_SIZE;
+- return __copy_tofrom_user(to, (__force void __user *)from,
+- n - over) + over;
+- }
+ return n;
+ }
+
+@@ -422,10 +409,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
+ might_fault();
+ if (likely(access_ok(VERIFY_WRITE, addr, size)))
+ return __clear_user(addr, size);
+- if ((unsigned long)addr < TASK_SIZE) {
+- unsigned long over = (unsigned long)addr + size - TASK_SIZE;
+- return __clear_user(addr, size - over) + over;
+- }
+ return size;
+ }
+
+diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
+index 73e461a3dfbb..96fd031a3f78 100644
+--- a/arch/powerpc/kernel/entry_64.S
++++ b/arch/powerpc/kernel/entry_64.S
+@@ -368,13 +368,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ tabort_syscall:
+ /* Firstly we need to enable TM in the kernel */
+ mfmsr r10
+- li r13, 1
+- rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG
++ li r9, 1
++ rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r10, 0
+
+ /* tabort, this dooms the transaction, nothing else */
+- li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+- TABORT(R13)
++ li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
++ TABORT(R9)
+
+ /*
+ * Return directly to userspace. We have corrupted user register state,
+@@ -382,8 +382,8 @@ tabort_syscall:
+ * resume after the tbegin of the aborted transaction with the
+ * checkpointed register state.
+ */
+- li r13, MSR_RI
+- andc r10, r10, r13
++ li r9, MSR_RI
++ andc r10, r10, r9
+ mtmsrd r10, 1
+ mtspr SPRN_SRR0, r11
+ mtspr SPRN_SRR1, r12
+diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
+index d90870a66b60..aa8214f30c92 100644
+--- a/arch/powerpc/lib/checksum_32.S
++++ b/arch/powerpc/lib/checksum_32.S
+@@ -127,17 +127,19 @@ _GLOBAL(csum_partial_copy_generic)
+ stw r7,12(r1)
+ stw r8,8(r1)
+
+- andi. r0,r4,1 /* is destination address even ? */
+- cmplwi cr7,r0,0
+ addic r12,r6,0
+ addi r6,r4,-4
+ neg r0,r4
+ addi r4,r3,-4
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
++ crset 4*cr7+eq
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
++ rlwinm r7,r6,3,0x8
++ rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */
++ cmplwi cr7,r7,0 /* is destination address even ? */
+ andi. r8,r0,3 /* get it word-aligned first */
+ mtctr r8
+ beq+ 61f
+@@ -237,7 +239,7 @@ _GLOBAL(csum_partial_copy_generic)
+ 66: addze r3,r12
+ addi r1,r1,16
+ beqlr+ cr7
+- rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */
++ rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
+ blr
+
+ /* read fault */
+diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
+index dfdb90cb4403..9f1983404e1a 100644
+--- a/arch/powerpc/mm/slb_low.S
++++ b/arch/powerpc/mm/slb_low.S
+@@ -113,7 +113,12 @@ BEGIN_FTR_SECTION
+ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
+ b slb_finish_load_1T
+
+-0:
++0: /*
++ * For userspace addresses, make sure this is region 0.
++ */
++ cmpdi r9, 0
++ bne 8f
++
+ /* when using slices, we extract the psize off the slice bitmaps
+ * and then we need to get the sllp encoding off the mmu_psize_defs
+ * array.
+diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
+index 2ee96431f736..4c827826c05e 100644
+--- a/arch/powerpc/platforms/powernv/opal-dump.c
++++ b/arch/powerpc/platforms/powernv/opal-dump.c
+@@ -370,6 +370,7 @@ static irqreturn_t process_dump(int irq, void *data)
+ uint32_t dump_id, dump_size, dump_type;
+ struct dump_obj *dump;
+ char name[22];
++ struct kobject *kobj;
+
+ rc = dump_read_info(&dump_id, &dump_size, &dump_type);
+ if (rc != OPAL_SUCCESS)
+@@ -381,8 +382,12 @@ static irqreturn_t process_dump(int irq, void *data)
+ * that gracefully and not create two conflicting
+ * entries.
+ */
+- if (kset_find_obj(dump_kset, name))
++ kobj = kset_find_obj(dump_kset, name);
++ if (kobj) {
++ /* Drop reference added by kset_find_obj() */
++ kobject_put(kobj);
+ return 0;
++ }
+
+ dump = create_dump_obj(dump_id, dump_size, dump_type);
+ if (!dump)
+diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
+index 37f959bf392e..f2344cbd2f46 100644
+--- a/arch/powerpc/platforms/powernv/opal-elog.c
++++ b/arch/powerpc/platforms/powernv/opal-elog.c
+@@ -247,6 +247,7 @@ static irqreturn_t elog_event(int irq, void *data)
+ uint64_t elog_type;
+ int rc;
+ char name[2+16+1];
++ struct kobject *kobj;
+
+ rc = opal_get_elog_size(&id, &size, &type);
+ if (rc != OPAL_SUCCESS) {
+@@ -269,8 +270,12 @@ static irqreturn_t elog_event(int irq, void *data)
+ * that gracefully and not create two conflicting
+ * entries.
+ */
+- if (kset_find_obj(elog_kset, name))
++ kobj = kset_find_obj(elog_kset, name);
++ if (kobj) {
++ /* Drop reference added by kset_find_obj() */
++ kobject_put(kobj);
+ return IRQ_HANDLED;
++ }
+
+ create_elog_obj(log_id, elog_size, elog_type);
+
+diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
+index 3a5ea8236db8..9e160fa74b24 100644
+--- a/arch/powerpc/platforms/powernv/pci-ioda.c
++++ b/arch/powerpc/platforms/powernv/pci-ioda.c
+@@ -156,11 +156,12 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
+ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
+ {
+ struct pnv_phb *phb = pe->phb;
++ unsigned int pe_num = pe->pe_number;
+
+ WARN_ON(pe->pdev);
+
+ memset(pe, 0, sizeof(struct pnv_ioda_pe));
+- clear_bit(pe->pe_number, phb->ioda.pe_alloc);
++ clear_bit(pe_num, phb->ioda.pe_alloc);
+ }
+
+ /* The default M64 BAR is shared by all PEs */
+diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
+index 6c110994d902..81d49476c47e 100644
+--- a/arch/powerpc/sysdev/cpm1.c
++++ b/arch/powerpc/sysdev/cpm1.c
+@@ -534,7 +534,8 @@ struct cpm1_gpio16_chip {
+
+ static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
+ {
+- struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
++ struct cpm1_gpio16_chip *cpm1_gc =
++ container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
+ struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+
+ cpm1_gc->cpdata = in_be16(&iop->dat);
+@@ -649,7 +650,8 @@ struct cpm1_gpio32_chip {
+
+ static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
+ {
+- struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
++ struct cpm1_gpio32_chip *cpm1_gc =
++ container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
+ struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+
+ cpm1_gc->cpdata = in_be32(&iop->dat);
+diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
+index 0ac12e5fd8ab..649a15f1a50c 100644
+--- a/arch/powerpc/sysdev/cpm_common.c
++++ b/arch/powerpc/sysdev/cpm_common.c
+@@ -82,7 +82,8 @@ struct cpm2_gpio32_chip {
+
+ static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
+ {
+- struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc);
++ struct cpm2_gpio32_chip *cpm2_gc =
++ container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc);
+ struct cpm2_ioports __iomem *iop = mm_gc->regs;
+
+ cpm2_gc->cpdata = in_be32(&iop->dat);
+diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
+index e0900ddf91dd..666fd8ba157e 100644
+--- a/arch/s390/include/asm/uaccess.h
++++ b/arch/s390/include/asm/uaccess.h
+@@ -209,28 +209,28 @@ int __put_user_bad(void) __attribute__((noreturn));
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: { \
+- unsigned char __x; \
++ unsigned char __x = 0; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 2: { \
+- unsigned short __x; \
++ unsigned short __x = 0; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 4: { \
+- unsigned int __x; \
++ unsigned int __x = 0; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 8: { \
+- unsigned long long __x; \
++ unsigned long long __x = 0; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
+index 43f2a2b80490..13c62e036521 100644
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -1951,9 +1951,10 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+ return -EINVAL;
+ current->thread.fpu.fpc = fpu->fpc;
+ if (MACHINE_HAS_VX)
+- convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
++ convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
++ (freg_t *) fpu->fprs);
+ else
+- memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
++ memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+ return 0;
+ }
+
+@@ -1962,9 +1963,10 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+ /* make sure we have the latest values */
+ save_fpu_regs();
+ if (MACHINE_HAS_VX)
+- convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
++ convert_vx_to_fp((freg_t *) fpu->fprs,
++ (__vector128 *) vcpu->run->s.regs.vrs);
+ else
+- memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
++ memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
+ fpu->fpc = current->thread.fpu.fpc;
+ return 0;
+ }
+diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h
+index 20a3591225cc..01aec8ccde83 100644
+--- a/arch/score/include/asm/uaccess.h
++++ b/arch/score/include/asm/uaccess.h
+@@ -163,7 +163,7 @@ do { \
+ __get_user_asm(val, "lw", ptr); \
+ break; \
+ case 8: \
+- if ((copy_from_user((void *)&val, ptr, 8)) == 0) \
++ if (__copy_from_user((void *)&val, ptr, 8) == 0) \
+ __gu_err = 0; \
+ else \
+ __gu_err = -EFAULT; \
+@@ -188,6 +188,8 @@ do { \
+ \
+ if (likely(access_ok(VERIFY_READ, __gu_ptr, size))) \
+ __get_user_common((x), size, __gu_ptr); \
++ else \
++ (x) = 0; \
+ \
+ __gu_err; \
+ })
+@@ -201,6 +203,7 @@ do { \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3:li %0, %4\n" \
++ "li %1, 0\n" \
+ "j 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+@@ -298,35 +301,34 @@ extern int __copy_tofrom_user(void *to, const void *from, unsigned long len);
+ static inline unsigned long
+ copy_from_user(void *to, const void *from, unsigned long len)
+ {
+- unsigned long over;
++ unsigned long res = len;
+
+- if (access_ok(VERIFY_READ, from, len))
+- return __copy_tofrom_user(to, from, len);
++ if (likely(access_ok(VERIFY_READ, from, len)))
++ res = __copy_tofrom_user(to, from, len);
+
+- if ((unsigned long)from < TASK_SIZE) {
+- over = (unsigned long)from + len - TASK_SIZE;
+- return __copy_tofrom_user(to, from, len - over) + over;
+- }
+- return len;
++ if (unlikely(res))
++ memset(to + (len - res), 0, res);
++
++ return res;
+ }
+
+ static inline unsigned long
+ copy_to_user(void *to, const void *from, unsigned long len)
+ {
+- unsigned long over;
+-
+- if (access_ok(VERIFY_WRITE, to, len))
+- return __copy_tofrom_user(to, from, len);
++ if (likely(access_ok(VERIFY_WRITE, to, len)))
++ len = __copy_tofrom_user(to, from, len);
+
+- if ((unsigned long)to < TASK_SIZE) {
+- over = (unsigned long)to + len - TASK_SIZE;
+- return __copy_tofrom_user(to, from, len - over) + over;
+- }
+ return len;
+ }
+
+-#define __copy_from_user(to, from, len) \
+- __copy_tofrom_user((to), (from), (len))
++static inline unsigned long
++__copy_from_user(void *to, const void *from, unsigned long len)
++{
++ unsigned long left = __copy_tofrom_user(to, from, len);
++ if (unlikely(left))
++ memset(to + (len - left), 0, left);
++ return left;
++}
+
+ #define __copy_to_user(to, from, len) \
+ __copy_tofrom_user((to), (from), (len))
+@@ -340,17 +342,17 @@ __copy_to_user_inatomic(void *to, const void *from, unsigned long len)
+ static inline unsigned long
+ __copy_from_user_inatomic(void *to, const void *from, unsigned long len)
+ {
+- return __copy_from_user(to, from, len);
++ return __copy_tofrom_user(to, from, len);
+ }
+
+-#define __copy_in_user(to, from, len) __copy_from_user(to, from, len)
++#define __copy_in_user(to, from, len) __copy_tofrom_user(to, from, len)
+
+ static inline unsigned long
+ copy_in_user(void *to, const void *from, unsigned long len)
+ {
+ if (access_ok(VERIFY_READ, from, len) &&
+ access_ok(VERFITY_WRITE, to, len))
+- return copy_from_user(to, from, len);
++ return __copy_tofrom_user(to, from, len);
+ }
+
+ /*
+diff --git a/arch/sh/include/asm/cmpxchg-xchg.h b/arch/sh/include/asm/cmpxchg-xchg.h
+index 7219719c23a3..1e881f5db659 100644
+--- a/arch/sh/include/asm/cmpxchg-xchg.h
++++ b/arch/sh/include/asm/cmpxchg-xchg.h
+@@ -21,7 +21,7 @@ static inline u32 __xchg_cmpxchg(volatile void *ptr, u32 x, int size)
+ int off = (unsigned long)ptr % sizeof(u32);
+ volatile u32 *p = ptr - off;
+ #ifdef __BIG_ENDIAN
+- int bitoff = (sizeof(u32) - 1 - off) * BITS_PER_BYTE;
++ int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE;
+ #else
+ int bitoff = off * BITS_PER_BYTE;
+ #endif
+diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h
+index a49635c51266..92ade79ac427 100644
+--- a/arch/sh/include/asm/uaccess.h
++++ b/arch/sh/include/asm/uaccess.h
+@@ -151,7 +151,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
+ __kernel_size_t __copy_size = (__kernel_size_t) n;
+
+ if (__copy_size && __access_ok(__copy_from, __copy_size))
+- return __copy_user(to, from, __copy_size);
++ __copy_size = __copy_user(to, from, __copy_size);
++
++ if (unlikely(__copy_size))
++ memset(to + (n - __copy_size), 0, __copy_size);
+
+ return __copy_size;
+ }
+diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h
+index c01376c76b86..ca5073dd4596 100644
+--- a/arch/sh/include/asm/uaccess_64.h
++++ b/arch/sh/include/asm/uaccess_64.h
+@@ -24,6 +24,7 @@
+ #define __get_user_size(x,ptr,size,retval) \
+ do { \
+ retval = 0; \
++ x = 0; \
+ switch (size) { \
+ case 1: \
+ retval = __get_user_asm_b((void *)&x, \
+diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
+index 57aca2792d29..3e8de69c6dcc 100644
+--- a/arch/sparc/include/asm/uaccess_32.h
++++ b/arch/sparc/include/asm/uaccess_32.h
+@@ -263,8 +263,10 @@ static inline unsigned long copy_from_user(void *to, const void __user *from, un
+ {
+ if (n && __access_ok((unsigned long) from, n))
+ return __copy_user((__force void __user *) to, from, n);
+- else
++ else {
++ memset(to, 0, n);
+ return n;
++ }
+ }
+
+ static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
+diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
+index 52fef606bc54..a34da5b49002 100644
+--- a/arch/x86/boot/compressed/eboot.c
++++ b/arch/x86/boot/compressed/eboot.c
+@@ -1006,79 +1006,87 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
+ return status;
+ }
+
+-static efi_status_t exit_boot(struct boot_params *boot_params,
+- void *handle, bool is64)
+-{
+- struct efi_info *efi = &boot_params->efi_info;
+- unsigned long map_sz, key, desc_size;
+- efi_memory_desc_t *mem_map;
++struct exit_boot_struct {
++ struct boot_params *boot_params;
++ struct efi_info *efi;
+ struct setup_data *e820ext;
+- const char *signature;
+ __u32 e820ext_size;
+- __u32 nr_desc, prev_nr_desc;
+- efi_status_t status;
+- __u32 desc_version;
+- bool called_exit = false;
+- u8 nr_entries;
+- int i;
+-
+- nr_desc = 0;
+- e820ext = NULL;
+- e820ext_size = 0;
+-
+-get_map:
+- status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size,
+- &desc_version, &key);
+-
+- if (status != EFI_SUCCESS)
+- return status;
+-
+- prev_nr_desc = nr_desc;
+- nr_desc = map_sz / desc_size;
+- if (nr_desc > prev_nr_desc &&
+- nr_desc > ARRAY_SIZE(boot_params->e820_map)) {
+- u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map);
+-
+- status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size);
+- if (status != EFI_SUCCESS)
+- goto free_mem_map;
++ bool is64;
++};
+
+- efi_call_early(free_pool, mem_map);
+- goto get_map; /* Allocated memory, get map again */
++static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
++ struct efi_boot_memmap *map,
++ void *priv)
++{
++ static bool first = true;
++ const char *signature;
++ __u32 nr_desc;
++ efi_status_t status;
++ struct exit_boot_struct *p = priv;
++
++ if (first) {
++ nr_desc = *map->buff_size / *map->desc_size;
++ if (nr_desc > ARRAY_SIZE(p->boot_params->e820_map)) {
++ u32 nr_e820ext = nr_desc -
++ ARRAY_SIZE(p->boot_params->e820_map);
++
++ status = alloc_e820ext(nr_e820ext, &p->e820ext,
++ &p->e820ext_size);
++ if (status != EFI_SUCCESS)
++ return status;
++ }
++ first = false;
+ }
+
+- signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE;
+- memcpy(&efi->efi_loader_signature, signature, sizeof(__u32));
++ signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE;
++ memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
+
+- efi->efi_systab = (unsigned long)sys_table;
+- efi->efi_memdesc_size = desc_size;
+- efi->efi_memdesc_version = desc_version;
+- efi->efi_memmap = (unsigned long)mem_map;
+- efi->efi_memmap_size = map_sz;
++ p->efi->efi_systab = (unsigned long)sys_table_arg;
++ p->efi->efi_memdesc_size = *map->desc_size;
++ p->efi->efi_memdesc_version = *map->desc_ver;
++ p->efi->efi_memmap = (unsigned long)*map->map;
++ p->efi->efi_memmap_size = *map->map_size;
+
+ #ifdef CONFIG_X86_64
+- efi->efi_systab_hi = (unsigned long)sys_table >> 32;
+- efi->efi_memmap_hi = (unsigned long)mem_map >> 32;
++ p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
++ p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
+ #endif
+
++ return EFI_SUCCESS;
++}
++
++static efi_status_t exit_boot(struct boot_params *boot_params,
++ void *handle, bool is64)
++{
++ unsigned long map_sz, key, desc_size, buff_size;
++ efi_memory_desc_t *mem_map;
++ struct setup_data *e820ext;
++ __u32 e820ext_size;
++ efi_status_t status;
++ __u32 desc_version;
++ struct efi_boot_memmap map;
++ struct exit_boot_struct priv;
++
++ map.map = &mem_map;
++ map.map_size = &map_sz;
++ map.desc_size = &desc_size;
++ map.desc_ver = &desc_version;
++ map.key_ptr = &key;
++ map.buff_size = &buff_size;
++ priv.boot_params = boot_params;
++ priv.efi = &boot_params->efi_info;
++ priv.e820ext = NULL;
++ priv.e820ext_size = 0;
++ priv.is64 = is64;
++
+ /* Might as well exit boot services now */
+- status = efi_call_early(exit_boot_services, handle, key);
+- if (status != EFI_SUCCESS) {
+- /*
+- * ExitBootServices() will fail if any of the event
+- * handlers change the memory map. In which case, we
+- * must be prepared to retry, but only once so that
+- * we're guaranteed to exit on repeated failures instead
+- * of spinning forever.
+- */
+- if (called_exit)
+- goto free_mem_map;
+-
+- called_exit = true;
+- efi_call_early(free_pool, mem_map);
+- goto get_map;
+- }
++ status = efi_exit_boot_services(sys_table, handle, &map, &priv,
++ exit_boot_func);
++ if (status != EFI_SUCCESS)
++ return status;
+
++ e820ext = priv.e820ext;
++ e820ext_size = priv.e820ext_size;
+ /* Historic? */
+ boot_params->alt_mem_k = 32 * 1024;
+
+@@ -1087,10 +1095,6 @@ get_map:
+ return status;
+
+ return EFI_SUCCESS;
+-
+-free_mem_map:
+- efi_call_early(free_pool, mem_map);
+- return status;
+ }
+
+ /*
+diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
+index 4e2ecfa23c15..4b429df40d7a 100644
+--- a/arch/x86/configs/tiny.config
++++ b/arch/x86/configs/tiny.config
+@@ -1 +1,3 @@
+ CONFIG_NOHIGHMEM=y
++# CONFIG_HIGHMEM4G is not set
++# CONFIG_HIGHMEM64G is not set
+diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
+index bd3e8421b57c..a8309ea677a1 100644
+--- a/arch/x86/events/amd/core.c
++++ b/arch/x86/events/amd/core.c
+@@ -119,8 +119,8 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
+ {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
+- [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
++ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
++ [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
+diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
+index 7b5fd811ef45..4ff41ae514a3 100644
+--- a/arch/x86/events/intel/cqm.c
++++ b/arch/x86/events/intel/cqm.c
+@@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info);
+ static void init_mbm_sample(u32 rmid, u32 evt_type);
+ static void __intel_mbm_event_count(void *info);
+
++static bool is_cqm_event(int e)
++{
++ return (e == QOS_L3_OCCUP_EVENT_ID);
++}
++
+ static bool is_mbm_event(int e)
+ {
+ return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
+@@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event)
+ (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
+ return -EINVAL;
+
++ if ((is_cqm_event(event->attr.config) && !cqm_enabled) ||
++ (is_mbm_event(event->attr.config) && !mbm_enabled))
++ return -EINVAL;
++
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
+index 7ce9f3f669e6..9b983a474253 100644
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -1274,18 +1274,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+ struct pebs_record_nhm *p = at;
+ u64 pebs_status;
+
+- /* PEBS v3 has accurate status bits */
++ pebs_status = p->status & cpuc->pebs_enabled;
++ pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
++
++ /* PEBS v3 has more accurate status bits */
+ if (x86_pmu.intel_cap.pebs_format >= 3) {
+- for_each_set_bit(bit, (unsigned long *)&p->status,
+- MAX_PEBS_EVENTS)
++ for_each_set_bit(bit, (unsigned long *)&pebs_status,
++ x86_pmu.max_pebs_events)
+ counts[bit]++;
+
+ continue;
+ }
+
+- pebs_status = p->status & cpuc->pebs_enabled;
+- pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+-
+ /*
+ * On some CPUs the PEBS status can be zero when PEBS is
+ * racing with clearing of GLOBAL_STATUS.
+@@ -1333,8 +1333,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+ continue;
+
+ event = cpuc->events[bit];
+- WARN_ON_ONCE(!event);
+- WARN_ON_ONCE(!event->attr.precise_ip);
++ if (WARN_ON_ONCE(!event))
++ continue;
++
++ if (WARN_ON_ONCE(!event->attr.precise_ip))
++ continue;
+
+ /* log dropped samples number */
+ if (error[bit])
+diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
+index 04bb5fb5a8d7..861a7d9cb60f 100644
+--- a/arch/x86/events/intel/pt.c
++++ b/arch/x86/events/intel/pt.c
+@@ -1074,6 +1074,11 @@ static void pt_addr_filters_fini(struct perf_event *event)
+ event->hw.addr_filters = NULL;
+ }
+
++static inline bool valid_kernel_ip(unsigned long ip)
++{
++ return virt_addr_valid(ip) && kernel_ip(ip);
++}
++
+ static int pt_event_addr_filters_validate(struct list_head *filters)
+ {
+ struct perf_addr_filter *filter;
+@@ -1081,11 +1086,16 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
+
+ list_for_each_entry(filter, filters, entry) {
+ /* PT doesn't support single address triggers */
+- if (!filter->range)
++ if (!filter->range || !filter->size)
+ return -EOPNOTSUPP;
+
+- if (!filter->inode && !kernel_ip(filter->offset))
+- return -EINVAL;
++ if (!filter->inode) {
++ if (!valid_kernel_ip(filter->offset))
++ return -EINVAL;
++
++ if (!valid_kernel_ip(filter->offset + filter->size))
++ return -EINVAL;
++ }
+
+ if (++range > pt_cap_get(PT_CAP_num_address_ranges))
+ return -EOPNOTSUPP;
+@@ -1111,7 +1121,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
+ } else {
+ /* apply the offset */
+ msr_a = filter->offset + offs[range];
+- msr_b = filter->size + msr_a;
++ msr_b = filter->size + msr_a - 1;
+ }
+
+ filters->filter[range].msr_a = msr_a;
+diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
+index 2982387ba817..0328c2ccc55d 100644
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -414,7 +414,11 @@ do { \
+ #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \
+ asm volatile("1: mov"itype" %1,%"rtype"0\n" \
+ "2:\n" \
+- _ASM_EXTABLE_EX(1b, 2b) \
++ ".section .fixup,\"ax\"\n" \
++ "3:xor"itype" %"rtype"0,%"rtype"0\n" \
++ " jmp 2b\n" \
++ ".previous\n" \
++ _ASM_EXTABLE_EX(1b, 3b) \
+ : ltype(x) : "m" (__m(addr)))
+
+ #define __put_user_nocheck(x, ptr, size) \
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index f5c69d8974e1..b81fe2d63e15 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -669,6 +669,17 @@ static void init_amd_gh(struct cpuinfo_x86 *c)
+ set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
+ }
+
++#define MSR_AMD64_DE_CFG 0xC0011029
++
++static void init_amd_ln(struct cpuinfo_x86 *c)
++{
++ /*
++ * Apply erratum 665 fix unconditionally so machines without a BIOS
++ * fix work.
++ */
++ msr_set_bit(MSR_AMD64_DE_CFG, 31);
++}
++
+ static void init_amd_bd(struct cpuinfo_x86 *c)
+ {
+ u64 value;
+@@ -726,6 +737,7 @@ static void init_amd(struct cpuinfo_x86 *c)
+ case 6: init_amd_k7(c); break;
+ case 0xf: init_amd_k8(c); break;
+ case 0x10: init_amd_gh(c); break;
++ case 0x12: init_amd_ln(c); break;
+ case 0x15: init_amd_bd(c); break;
+ }
+
+diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
+index 7b3b3f24c3ea..078c933f9fcc 100644
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -55,12 +55,12 @@ asm (".pushsection .entry.text, \"ax\"\n"
+ ".popsection");
+
+ /* identity function, which can be inlined */
+-u32 _paravirt_ident_32(u32 x)
++u32 notrace _paravirt_ident_32(u32 x)
+ {
+ return x;
+ }
+
+-u64 _paravirt_ident_64(u64 x)
++u64 notrace _paravirt_ident_64(u64 x)
+ {
+ return x;
+ }
+diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
+index 5f42d038fcb4..c7220ba94aa7 100644
+--- a/arch/x86/kvm/ioapic.c
++++ b/arch/x86/kvm/ioapic.c
+@@ -109,6 +109,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+ {
+ bool new_val, old_val;
+ struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
++ struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
+ union kvm_ioapic_redirect_entry *e;
+
+ e = &ioapic->redirtbl[RTC_GSI];
+@@ -117,16 +118,17 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+ return;
+
+ new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
+- old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
++ old_val = test_bit(vcpu->vcpu_id, dest_map->map);
+
+ if (new_val == old_val)
+ return;
+
+ if (new_val) {
+- __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
++ __set_bit(vcpu->vcpu_id, dest_map->map);
++ dest_map->vectors[vcpu->vcpu_id] = e->fields.vector;
+ ioapic->rtc_status.pending_eoi++;
+ } else {
+- __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
++ __clear_bit(vcpu->vcpu_id, dest_map->map);
+ ioapic->rtc_status.pending_eoi--;
+ rtc_status_pending_eoi_check_valid(ioapic);
+ }
+diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
+index 39b91127ef07..cd944435dfbd 100644
+--- a/arch/x86/kvm/pmu_amd.c
++++ b/arch/x86/kvm/pmu_amd.c
+@@ -23,8 +23,8 @@
+ static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
+ [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+ [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+- [2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES },
+- [3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES },
++ [2] = { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES },
++ [3] = { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES },
+ [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+ [6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
+index fb0604f11eec..5431a32f79e7 100644
+--- a/arch/x86/mm/pat.c
++++ b/arch/x86/mm/pat.c
+@@ -931,9 +931,10 @@ int track_pfn_copy(struct vm_area_struct *vma)
+ }
+
+ /*
+- * prot is passed in as a parameter for the new mapping. If the vma has a
+- * linear pfn mapping for the entire range reserve the entire vma range with
+- * single reserve_pfn_range call.
++ * prot is passed in as a parameter for the new mapping. If the vma has
++ * a linear pfn mapping for the entire range, or no vma is provided,
++ * reserve the entire pfn + size range with single reserve_pfn_range
++ * call.
+ */
+ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn, unsigned long addr, unsigned long size)
+@@ -942,11 +943,12 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+ enum page_cache_mode pcm;
+
+ /* reserve the whole chunk starting from paddr */
+- if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
++ if (!vma || (addr == vma->vm_start
++ && size == (vma->vm_end - vma->vm_start))) {
+ int ret;
+
+ ret = reserve_pfn_range(paddr, size, prot, 0);
+- if (!ret)
++ if (ret == 0 && vma)
+ vma->vm_flags |= VM_PAT;
+ return ret;
+ }
+@@ -1001,7 +1003,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ resource_size_t paddr;
+ unsigned long prot;
+
+- if (!(vma->vm_flags & VM_PAT))
++ if (vma && !(vma->vm_flags & VM_PAT))
+ return;
+
+ /* free the chunk starting from pfn or the whole chunk */
+@@ -1015,7 +1017,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ size = vma->vm_end - vma->vm_start;
+ }
+ free_pfn_range(paddr, size);
+- vma->vm_flags &= ~VM_PAT;
++ if (vma)
++ vma->vm_flags &= ~VM_PAT;
+ }
+
+ /*
+diff --git a/crypto/cryptd.c b/crypto/cryptd.c
+index 7921251cdb13..90fefae402a5 100644
+--- a/crypto/cryptd.c
++++ b/crypto/cryptd.c
+@@ -594,9 +594,14 @@ static int cryptd_hash_export(struct ahash_request *req, void *out)
+
+ static int cryptd_hash_import(struct ahash_request *req, const void *in)
+ {
+- struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
++ struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm);
++ struct shash_desc *desc = cryptd_shash_desc(req);
++
++ desc->tfm = ctx->child;
++ desc->flags = req->base.flags;
+
+- return crypto_shash_import(&rctx->desc, in);
++ return crypto_shash_import(desc, in);
+ }
+
+ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
+diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
+index 71b07198e207..ccd8cc47c4cf 100644
+--- a/drivers/ata/libahci.c
++++ b/drivers/ata/libahci.c
+@@ -2516,7 +2516,7 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host,
+
+ /* Do not receive interrupts sent by dummy ports */
+ if (!pp) {
+- disable_irq(irq + i);
++ disable_irq(irq);
+ continue;
+ }
+
+diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
+index acc3eb542c74..d88372b794e4 100644
+--- a/drivers/bus/arm-ccn.c
++++ b/drivers/bus/arm-ccn.c
+@@ -187,6 +187,7 @@ struct arm_ccn {
+ struct arm_ccn_component *xp;
+
+ struct arm_ccn_dt dt;
++ int mn_id;
+ };
+
+
+@@ -326,6 +327,7 @@ struct arm_ccn_pmu_event {
+ static ssize_t arm_ccn_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+ {
++ struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
+ struct arm_ccn_pmu_event *event = container_of(attr,
+ struct arm_ccn_pmu_event, attr);
+ ssize_t res;
+@@ -352,6 +354,9 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev,
+ res += snprintf(buf + res, PAGE_SIZE - res,
+ ",cmp_l=?,cmp_h=?,mask=?");
+ break;
++ case CCN_TYPE_MN:
++ res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id);
++ break;
+ default:
+ res += snprintf(buf + res, PAGE_SIZE - res, ",node=?");
+ break;
+@@ -381,9 +386,9 @@ static umode_t arm_ccn_pmu_events_is_visible(struct kobject *kobj,
+ }
+
+ static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = {
+- CCN_EVENT_MN(eobarrier, "dir=0,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
+- CCN_EVENT_MN(ecbarrier, "dir=0,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
+- CCN_EVENT_MN(dvmop, "dir=0,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
++ CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
++ CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
++ CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
+ CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY),
+ CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY),
+ CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY),
+@@ -757,6 +762,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
+
+ /* Validate node/xp vs topology */
+ switch (type) {
++ case CCN_TYPE_MN:
++ if (node_xp != ccn->mn_id) {
++ dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp);
++ return -EINVAL;
++ }
++ break;
+ case CCN_TYPE_XP:
+ if (node_xp >= ccn->num_xps) {
+ dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp);
+@@ -884,6 +895,10 @@ static void arm_ccn_pmu_xp_dt_config(struct perf_event *event, int enable)
+ struct arm_ccn_component *xp;
+ u32 val, dt_cfg;
+
++ /* Nothing to do for cycle counter */
++ if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER)
++ return;
++
+ if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP)
+ xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)];
+ else
+@@ -986,7 +1001,7 @@ static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event)
+
+ /* Comparison values */
+ writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp));
+- writel((cmp_l >> 32) & 0xefffffff,
++ writel((cmp_l >> 32) & 0x7fffffff,
+ source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4);
+ writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp));
+ writel((cmp_h >> 32) & 0x0fffffff,
+@@ -994,7 +1009,7 @@ static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event)
+
+ /* Mask */
+ writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp));
+- writel((mask_l >> 32) & 0xefffffff,
++ writel((mask_l >> 32) & 0x7fffffff,
+ source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4);
+ writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp));
+ writel((mask_h >> 32) & 0x0fffffff,
+@@ -1369,6 +1384,8 @@ static int arm_ccn_init_nodes(struct arm_ccn *ccn, int region,
+
+ switch (type) {
+ case CCN_TYPE_MN:
++ ccn->mn_id = id;
++ return 0;
+ case CCN_TYPE_DT:
+ return 0;
+ case CCN_TYPE_XP:
+diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
+index 6f3719d73390..e84877a2cacc 100644
+--- a/drivers/clocksource/sun4i_timer.c
++++ b/drivers/clocksource/sun4i_timer.c
+@@ -123,12 +123,16 @@ static struct clock_event_device sun4i_clockevent = {
+ .set_next_event = sun4i_clkevt_next_event,
+ };
+
++static void sun4i_timer_clear_interrupt(void)
++{
++ writel(TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_ST_REG);
++}
+
+ static irqreturn_t sun4i_timer_interrupt(int irq, void *dev_id)
+ {
+ struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+
+- writel(0x1, timer_base + TIMER_IRQ_ST_REG);
++ sun4i_timer_clear_interrupt();
+ evt->event_handler(evt);
+
+ return IRQ_HANDLED;
+@@ -193,6 +197,9 @@ static void __init sun4i_timer_init(struct device_node *node)
+ /* Make sure timer is stopped before playing with interrupts */
+ sun4i_clkevt_time_stop(0);
+
++ /* clear timer0 interrupt */
++ sun4i_timer_clear_interrupt();
++
+ sun4i_clockevent.cpumask = cpu_possible_mask;
+ sun4i_clockevent.irq = irq;
+
+diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
+index 3bd127f95315..aded10662020 100644
+--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
+@@ -41,6 +41,8 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
+ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE
+ #endif
+
++#define EFI_MMAP_NR_SLACK_SLOTS 8
++
+ struct file_info {
+ efi_file_handle_t *handle;
+ u64 size;
+@@ -63,49 +65,62 @@ void efi_printk(efi_system_table_t *sys_table_arg, char *str)
+ }
+ }
+
++static inline bool mmap_has_headroom(unsigned long buff_size,
++ unsigned long map_size,
++ unsigned long desc_size)
++{
++ unsigned long slack = buff_size - map_size;
++
++ return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS;
++}
++
+ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
+- efi_memory_desc_t **map,
+- unsigned long *map_size,
+- unsigned long *desc_size,
+- u32 *desc_ver,
+- unsigned long *key_ptr)
++ struct efi_boot_memmap *map)
+ {
+ efi_memory_desc_t *m = NULL;
+ efi_status_t status;
+ unsigned long key;
+ u32 desc_version;
+
+- *map_size = sizeof(*m) * 32;
++ *map->desc_size = sizeof(*m);
++ *map->map_size = *map->desc_size * 32;
++ *map->buff_size = *map->map_size;
+ again:
+- /*
+- * Add an additional efi_memory_desc_t because we're doing an
+- * allocation which may be in a new descriptor region.
+- */
+- *map_size += sizeof(*m);
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+- *map_size, (void **)&m);
++ *map->map_size, (void **)&m);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+- *desc_size = 0;
++ *map->desc_size = 0;
+ key = 0;
+- status = efi_call_early(get_memory_map, map_size, m,
+- &key, desc_size, &desc_version);
+- if (status == EFI_BUFFER_TOO_SMALL) {
++ status = efi_call_early(get_memory_map, map->map_size, m,
++ &key, map->desc_size, &desc_version);
++ if (status == EFI_BUFFER_TOO_SMALL ||
++ !mmap_has_headroom(*map->buff_size, *map->map_size,
++ *map->desc_size)) {
+ efi_call_early(free_pool, m);
++ /*
++ * Make sure there is some entries of headroom so that the
++ * buffer can be reused for a new map after allocations are
++ * no longer permitted. Its unlikely that the map will grow to
++ * exceed this headroom once we are ready to trigger
++ * ExitBootServices()
++ */
++ *map->map_size += *map->desc_size * EFI_MMAP_NR_SLACK_SLOTS;
++ *map->buff_size = *map->map_size;
+ goto again;
+ }
+
+ if (status != EFI_SUCCESS)
+ efi_call_early(free_pool, m);
+
+- if (key_ptr && status == EFI_SUCCESS)
+- *key_ptr = key;
+- if (desc_ver && status == EFI_SUCCESS)
+- *desc_ver = desc_version;
++ if (map->key_ptr && status == EFI_SUCCESS)
++ *map->key_ptr = key;
++ if (map->desc_ver && status == EFI_SUCCESS)
++ *map->desc_ver = desc_version;
+
+ fail:
+- *map = m;
++ *map->map = m;
+ return status;
+ }
+
+@@ -113,13 +128,20 @@ fail:
+ unsigned long get_dram_base(efi_system_table_t *sys_table_arg)
+ {
+ efi_status_t status;
+- unsigned long map_size;
++ unsigned long map_size, buff_size;
+ unsigned long membase = EFI_ERROR;
+ struct efi_memory_map map;
+ efi_memory_desc_t *md;
++ struct efi_boot_memmap boot_map;
+
+- status = efi_get_memory_map(sys_table_arg, (efi_memory_desc_t **)&map.map,
+- &map_size, &map.desc_size, NULL, NULL);
++ boot_map.map = (efi_memory_desc_t **)&map.map;
++ boot_map.map_size = &map_size;
++ boot_map.desc_size = &map.desc_size;
++ boot_map.desc_ver = NULL;
++ boot_map.key_ptr = NULL;
++ boot_map.buff_size = &buff_size;
++
++ status = efi_get_memory_map(sys_table_arg, &boot_map);
+ if (status != EFI_SUCCESS)
+ return membase;
+
+@@ -144,15 +166,22 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
+ unsigned long size, unsigned long align,
+ unsigned long *addr, unsigned long max)
+ {
+- unsigned long map_size, desc_size;
++ unsigned long map_size, desc_size, buff_size;
+ efi_memory_desc_t *map;
+ efi_status_t status;
+ unsigned long nr_pages;
+ u64 max_addr = 0;
+ int i;
++ struct efi_boot_memmap boot_map;
++
++ boot_map.map = ↦
++ boot_map.map_size = &map_size;
++ boot_map.desc_size = &desc_size;
++ boot_map.desc_ver = NULL;
++ boot_map.key_ptr = NULL;
++ boot_map.buff_size = &buff_size;
+
+- status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size,
+- NULL, NULL);
++ status = efi_get_memory_map(sys_table_arg, &boot_map);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+@@ -230,14 +259,21 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
+ unsigned long size, unsigned long align,
+ unsigned long *addr)
+ {
+- unsigned long map_size, desc_size;
++ unsigned long map_size, desc_size, buff_size;
+ efi_memory_desc_t *map;
+ efi_status_t status;
+ unsigned long nr_pages;
+ int i;
++ struct efi_boot_memmap boot_map;
+
+- status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size,
+- NULL, NULL);
++ boot_map.map = ↦
++ boot_map.map_size = &map_size;
++ boot_map.desc_size = &desc_size;
++ boot_map.desc_ver = NULL;
++ boot_map.key_ptr = NULL;
++ boot_map.buff_size = &buff_size;
++
++ status = efi_get_memory_map(sys_table_arg, &boot_map);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+@@ -704,3 +740,76 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+ *cmd_line_len = options_bytes;
+ return (char *)cmdline_addr;
+ }
++
++/*
++ * Handle calling ExitBootServices according to the requirements set out by the
++ * spec. Obtains the current memory map, and returns that info after calling
++ * ExitBootServices. The client must specify a function to perform any
++ * processing of the memory map data prior to ExitBootServices. A client
++ * specific structure may be passed to the function via priv. The client
++ * function may be called multiple times.
++ */
++efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg,
++ void *handle,
++ struct efi_boot_memmap *map,
++ void *priv,
++ efi_exit_boot_map_processing priv_func)
++{
++ efi_status_t status;
++
++ status = efi_get_memory_map(sys_table_arg, map);
++
++ if (status != EFI_SUCCESS)
++ goto fail;
++
++ status = priv_func(sys_table_arg, map, priv);
++ if (status != EFI_SUCCESS)
++ goto free_map;
++
++ status = efi_call_early(exit_boot_services, handle, *map->key_ptr);
++
++ if (status == EFI_INVALID_PARAMETER) {
++ /*
++ * The memory map changed between efi_get_memory_map() and
++ * exit_boot_services(). Per the UEFI Spec v2.6, Section 6.4:
++ * EFI_BOOT_SERVICES.ExitBootServices we need to get the
++ * updated map, and try again. The spec implies one retry
++ * should be sufficent, which is confirmed against the EDK2
++ * implementation. Per the spec, we can only invoke
++ * get_memory_map() and exit_boot_services() - we cannot alloc
++ * so efi_get_memory_map() cannot be used, and we must reuse
++ * the buffer. For all practical purposes, the headroom in the
++ * buffer should account for any changes in the map so the call
++ * to get_memory_map() is expected to succeed here.
++ */
++ *map->map_size = *map->buff_size;
++ status = efi_call_early(get_memory_map,
++ map->map_size,
++ *map->map,
++ map->key_ptr,
++ map->desc_size,
++ map->desc_ver);
++
++ /* exit_boot_services() was called, thus cannot free */
++ if (status != EFI_SUCCESS)
++ goto fail;
++
++ status = priv_func(sys_table_arg, map, priv);
++ /* exit_boot_services() was called, thus cannot free */
++ if (status != EFI_SUCCESS)
++ goto fail;
++
++ status = efi_call_early(exit_boot_services, handle, *map->key_ptr);
++ }
++
++ /* exit_boot_services() was called, thus cannot free */
++ if (status != EFI_SUCCESS)
++ goto fail;
++
++ return EFI_SUCCESS;
++
++free_map:
++ efi_call_early(free_pool, *map->map);
++fail:
++ return status;
++}
+diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
+index e58abfa953cc..a6a93116a8f0 100644
+--- a/drivers/firmware/efi/libstub/fdt.c
++++ b/drivers/firmware/efi/libstub/fdt.c
+@@ -152,6 +152,27 @@ fdt_set_fail:
+ #define EFI_FDT_ALIGN EFI_PAGE_SIZE
+ #endif
+
++struct exit_boot_struct {
++ efi_memory_desc_t *runtime_map;
++ int *runtime_entry_count;
++};
++
++static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
++ struct efi_boot_memmap *map,
++ void *priv)
++{
++ struct exit_boot_struct *p = priv;
++ /*
++ * Update the memory map with virtual addresses. The function will also
++ * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME
++ * entries so that we can pass it straight to SetVirtualAddressMap()
++ */
++ efi_get_virtmap(*map->map, *map->map_size, *map->desc_size,
++ p->runtime_map, p->runtime_entry_count);
++
++ return EFI_SUCCESS;
++}
++
+ /*
+ * Allocate memory for a new FDT, then add EFI, commandline, and
+ * initrd related fields to the FDT. This routine increases the
+@@ -175,13 +196,22 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
+ unsigned long fdt_addr,
+ unsigned long fdt_size)
+ {
+- unsigned long map_size, desc_size;
++ unsigned long map_size, desc_size, buff_size;
+ u32 desc_ver;
+ unsigned long mmap_key;
+ efi_memory_desc_t *memory_map, *runtime_map;
+ unsigned long new_fdt_size;
+ efi_status_t status;
+ int runtime_entry_count = 0;
++ struct efi_boot_memmap map;
++ struct exit_boot_struct priv;
++
++ map.map = &runtime_map;
++ map.map_size = &map_size;
++ map.desc_size = &desc_size;
++ map.desc_ver = &desc_ver;
++ map.key_ptr = &mmap_key;
++ map.buff_size = &buff_size;
+
+ /*
+ * Get a copy of the current memory map that we will use to prepare
+@@ -189,8 +219,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
+ * subsequent allocations adding entries, since they could not affect
+ * the number of EFI_MEMORY_RUNTIME regions.
+ */
+- status = efi_get_memory_map(sys_table, &runtime_map, &map_size,
+- &desc_size, &desc_ver, &mmap_key);
++ status = efi_get_memory_map(sys_table, &map);
+ if (status != EFI_SUCCESS) {
+ pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n");
+ return status;
+@@ -199,6 +228,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
+ pr_efi(sys_table,
+ "Exiting boot services and installing virtual address map...\n");
+
++ map.map = &memory_map;
+ /*
+ * Estimate size of new FDT, and allocate memory for it. We
+ * will allocate a bigger buffer if this ends up being too
+@@ -218,8 +248,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
+ * we can get the memory map key needed for
+ * exit_boot_services().
+ */
+- status = efi_get_memory_map(sys_table, &memory_map, &map_size,
+- &desc_size, &desc_ver, &mmap_key);
++ status = efi_get_memory_map(sys_table, &map);
+ if (status != EFI_SUCCESS)
+ goto fail_free_new_fdt;
+
+@@ -250,16 +279,11 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
+ }
+ }
+
+- /*
+- * Update the memory map with virtual addresses. The function will also
+- * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME
+- * entries so that we can pass it straight into SetVirtualAddressMap()
+- */
+- efi_get_virtmap(memory_map, map_size, desc_size, runtime_map,
+- &runtime_entry_count);
+-
+- /* Now we are ready to exit_boot_services.*/
+- status = sys_table->boottime->exit_boot_services(handle, mmap_key);
++ sys_table->boottime->free_pool(memory_map);
++ priv.runtime_map = runtime_map;
++ priv.runtime_entry_count = &runtime_entry_count;
++ status = efi_exit_boot_services(sys_table, handle, &map, &priv,
++ exit_boot_func);
+
+ if (status == EFI_SUCCESS) {
+ efi_set_virtual_address_map_t *svam;
+diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
+index 53f6d3fe6d86..0c9f58c5ba50 100644
+--- a/drivers/firmware/efi/libstub/random.c
++++ b/drivers/firmware/efi/libstub/random.c
+@@ -73,12 +73,20 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
+ unsigned long random_seed)
+ {
+ unsigned long map_size, desc_size, total_slots = 0, target_slot;
++ unsigned long buff_size;
+ efi_status_t status;
+ efi_memory_desc_t *memory_map;
+ int map_offset;
++ struct efi_boot_memmap map;
+
+- status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size,
+- &desc_size, NULL, NULL);
++ map.map = &memory_map;
++ map.map_size = &map_size;
++ map.desc_size = &desc_size;
++ map.desc_ver = NULL;
++ map.key_ptr = NULL;
++ map.buff_size = &buff_size;
++
++ status = efi_get_memory_map(sys_table_arg, &map);
+ if (status != EFI_SUCCESS)
+ return status;
+
+diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+index 016c191221f3..52c527f6642a 100644
+--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
++++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+@@ -320,19 +320,19 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
+ u32 *coeff_tab = heo_upscaling_ycoef;
+ u32 max_memsize;
+
+- if (state->crtc_w < state->src_w)
++ if (state->crtc_h < state->src_h)
+ coeff_tab = heo_downscaling_ycoef;
+ for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++)
+ atmel_hlcdc_layer_update_cfg(&plane->layer,
+ 33 + i,
+ 0xffffffff,
+ coeff_tab[i]);
+- factor = ((8 * 256 * state->src_w) - (256 * 4)) /
+- state->crtc_w;
++ factor = ((8 * 256 * state->src_h) - (256 * 4)) /
++ state->crtc_h;
+ factor++;
+- max_memsize = ((factor * state->crtc_w) + (256 * 4)) /
++ max_memsize = ((factor * state->crtc_h) + (256 * 4)) /
+ 2048;
+- if (max_memsize > state->src_w)
++ if (max_memsize > state->src_h)
+ factor--;
+ factor_reg |= (factor << 16) | 0x80000000;
+ }
+diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
+index 57676f8d7ecf..a6289752be16 100644
+--- a/drivers/gpu/drm/drm_ioc32.c
++++ b/drivers/gpu/drm/drm_ioc32.c
+@@ -1015,6 +1015,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd,
+ return 0;
+ }
+
++#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+ typedef struct drm_mode_fb_cmd232 {
+ u32 fb_id;
+ u32 width;
+@@ -1071,6 +1072,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd,
+
+ return 0;
+ }
++#endif
+
+ static drm_ioctl_compat_t *drm_compat_ioctls[] = {
+ [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version,
+@@ -1104,7 +1106,9 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = {
+ [DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw,
+ #endif
+ [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank,
++#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+ [DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2,
++#endif
+ };
+
+ /**
+diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
+index 103546834b60..2a6e12956baf 100644
+--- a/drivers/gpu/drm/i915/i915_debugfs.c
++++ b/drivers/gpu/drm/i915/i915_debugfs.c
+@@ -2100,9 +2100,10 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
+ return ret;
+
+ list_for_each_entry(ctx, &dev_priv->context_list, link)
+- if (ctx != dev_priv->kernel_context)
++ if (ctx != dev_priv->kernel_context) {
+ for_each_engine(engine, dev_priv)
+ i915_dump_lrc_obj(m, ctx, engine);
++ }
+
+ mutex_unlock(&dev->struct_mutex);
+
+diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
+index 16e209d326b6..72842aafdcf6 100644
+--- a/drivers/gpu/drm/i915/intel_opregion.c
++++ b/drivers/gpu/drm/i915/intel_opregion.c
+@@ -1013,6 +1013,23 @@ err_out:
+ return err;
+ }
+
++static int intel_use_opregion_panel_type_callback(const struct dmi_system_id *id)
++{
++ DRM_INFO("Using panel type from OpRegion on %s\n", id->ident);
++ return 1;
++}
++
++static const struct dmi_system_id intel_use_opregion_panel_type[] = {
++ {
++ .callback = intel_use_opregion_panel_type_callback,
++ .ident = "Conrac GmbH IX45GM2",
++ .matches = {DMI_MATCH(DMI_SYS_VENDOR, "Conrac GmbH"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "IX45GM2"),
++ },
++ },
++ { }
++};
++
+ int
+ intel_opregion_get_panel_type(struct drm_device *dev)
+ {
+@@ -1039,6 +1056,16 @@ intel_opregion_get_panel_type(struct drm_device *dev)
+ }
+
+ /*
++ * So far we know that some machined must use it, others must not use it.
++ * There doesn't seem to be any way to determine which way to go, except
++ * via a quirk list :(
++ */
++ if (!dmi_check_system(intel_use_opregion_panel_type)) {
++ DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1);
++ return -ENODEV;
++ }
++
++ /*
+ * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us
+ * low vswing for eDP, whereas the VBT panel type (2) gives us normal
+ * vswing instead. Low vswing results in some display flickers, so
+diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
+index 5b2963f32291..16ae246f7cc2 100644
+--- a/drivers/gpu/drm/msm/msm_drv.h
++++ b/drivers/gpu/drm/msm/msm_drv.h
+@@ -148,6 +148,12 @@ struct msm_drm_private {
+ } vram;
+
+ struct msm_vblank_ctrl vblank_ctrl;
++
++ /* task holding struct_mutex.. currently only used in submit path
++ * to detect and reject faults from copy_from_user() for submit
++ * ioctl.
++ */
++ struct task_struct *struct_mutex_task;
+ };
+
+ struct msm_format {
+diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
+index 69836f5685b1..46ffcbf2f30e 100644
+--- a/drivers/gpu/drm/msm/msm_gem.c
++++ b/drivers/gpu/drm/msm/msm_gem.c
+@@ -196,11 +196,20 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct drm_device *dev = obj->dev;
++ struct msm_drm_private *priv = dev->dev_private;
+ struct page **pages;
+ unsigned long pfn;
+ pgoff_t pgoff;
+ int ret;
+
++ /* This should only happen if userspace tries to pass a mmap'd
++ * but unfaulted gem bo vaddr into submit ioctl, triggering
++ * a page fault while struct_mutex is already held. This is
++ * not a valid use-case so just bail.
++ */
++ if (priv->struct_mutex_task == current)
++ return VM_FAULT_SIGBUS;
++
+ /* Make sure we don't parallel update on a fault, nor move or remove
+ * something from beneath our feet
+ */
+diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
+index eb515f04eb9d..c1889d700f26 100644
+--- a/drivers/gpu/drm/msm/msm_gem_submit.c
++++ b/drivers/gpu/drm/msm/msm_gem_submit.c
+@@ -394,6 +394,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
+ return -ENOMEM;
+
+ mutex_lock(&dev->struct_mutex);
++ priv->struct_mutex_task = current;
+
+ ret = submit_lookup_objects(submit, args, file);
+ if (ret)
+@@ -479,6 +480,7 @@ out:
+ submit_cleanup(submit);
+ if (ret)
+ msm_gem_submit_free(submit);
++ priv->struct_mutex_task = NULL;
+ mutex_unlock(&dev->struct_mutex);
+ return ret;
+ }
+diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
+index 197e693e7e7b..3833c83a4065 100644
+--- a/drivers/iio/accel/bmc150-accel-core.c
++++ b/drivers/iio/accel/bmc150-accel-core.c
+@@ -67,6 +67,9 @@
+ #define BMC150_ACCEL_REG_PMU_BW 0x10
+ #define BMC150_ACCEL_DEF_BW 125
+
++#define BMC150_ACCEL_REG_RESET 0x14
++#define BMC150_ACCEL_RESET_VAL 0xB6
++
+ #define BMC150_ACCEL_REG_INT_MAP_0 0x19
+ #define BMC150_ACCEL_INT_MAP_0_BIT_SLOPE BIT(2)
+
+@@ -1497,6 +1500,14 @@ static int bmc150_accel_chip_init(struct bmc150_accel_data *data)
+ int ret, i;
+ unsigned int val;
+
++ /*
++ * Reset chip to get it in a known good state. A delay of 1.8ms after
++ * reset is required according to the data sheets of supported chips.
++ */
++ regmap_write(data->regmap, BMC150_ACCEL_REG_RESET,
++ BMC150_ACCEL_RESET_VAL);
++ usleep_range(1800, 2500);
++
+ ret = regmap_read(data->regmap, BMC150_ACCEL_REG_CHIP_ID, &val);
+ if (ret < 0) {
+ dev_err(dev, "Error: Reading chip id\n");
+diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c
+index 3a9f106787d2..9d72d4bcf5e9 100644
+--- a/drivers/iio/accel/kxsd9.c
++++ b/drivers/iio/accel/kxsd9.c
+@@ -160,11 +160,13 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev,
+ if (ret < 0)
+ goto error_ret;
+ *val = ret;
++ ret = IIO_VAL_INT;
+ break;
+ case IIO_CHAN_INFO_SCALE:
+ ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
+ if (ret < 0)
+ goto error_ret;
++ *val = 0;
+ *val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK];
+ ret = IIO_VAL_INT_PLUS_MICRO;
+ break;
+diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
+index 25378c5882e2..f7232185d191 100644
+--- a/drivers/iio/adc/Kconfig
++++ b/drivers/iio/adc/Kconfig
+@@ -377,6 +377,7 @@ config QCOM_SPMI_VADC
+ config ROCKCHIP_SARADC
+ tristate "Rockchip SARADC driver"
+ depends on ARCH_ROCKCHIP || (ARM && COMPILE_TEST)
++ depends on RESET_CONTROLLER
+ help
+ Say yes here to build support for the SARADC found in SoCs from
+ Rockchip.
+diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c
+index a3f5254f4e51..03962233501c 100644
+--- a/drivers/iio/adc/ad799x.c
++++ b/drivers/iio/adc/ad799x.c
+@@ -533,6 +533,7 @@ static struct attribute_group ad799x_event_attrs_group = {
+ static const struct iio_info ad7991_info = {
+ .read_raw = &ad799x_read_raw,
+ .driver_module = THIS_MODULE,
++ .update_scan_mode = ad799x_update_scan_mode,
+ };
+
+ static const struct iio_info ad7993_4_7_8_noirq_info = {
+diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
+index 52430ba171f3..0438c68015e8 100644
+--- a/drivers/iio/adc/at91_adc.c
++++ b/drivers/iio/adc/at91_adc.c
+@@ -381,8 +381,8 @@ static irqreturn_t at91_adc_rl_interrupt(int irq, void *private)
+ st->ts_bufferedmeasure = false;
+ input_report_key(st->ts_input, BTN_TOUCH, 0);
+ input_sync(st->ts_input);
+- } else if (status & AT91_ADC_EOC(3)) {
+- /* Conversion finished */
++ } else if (status & AT91_ADC_EOC(3) && st->ts_input) {
++ /* Conversion finished and we've a touchscreen */
+ if (st->ts_bufferedmeasure) {
+ /*
+ * Last measurement is always discarded, since it can
+diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
+index f9ad6c2d6821..85d701291654 100644
+--- a/drivers/iio/adc/rockchip_saradc.c
++++ b/drivers/iio/adc/rockchip_saradc.c
+@@ -21,6 +21,8 @@
+ #include <linux/of_device.h>
+ #include <linux/clk.h>
+ #include <linux/completion.h>
++#include <linux/delay.h>
++#include <linux/reset.h>
+ #include <linux/regulator/consumer.h>
+ #include <linux/iio/iio.h>
+
+@@ -53,6 +55,7 @@ struct rockchip_saradc {
+ struct clk *clk;
+ struct completion completion;
+ struct regulator *vref;
++ struct reset_control *reset;
+ const struct rockchip_saradc_data *data;
+ u16 last_val;
+ };
+@@ -190,6 +193,16 @@ static const struct of_device_id rockchip_saradc_match[] = {
+ };
+ MODULE_DEVICE_TABLE(of, rockchip_saradc_match);
+
++/**
++ * Reset SARADC Controller.
++ */
++static void rockchip_saradc_reset_controller(struct reset_control *reset)
++{
++ reset_control_assert(reset);
++ usleep_range(10, 20);
++ reset_control_deassert(reset);
++}
++
+ static int rockchip_saradc_probe(struct platform_device *pdev)
+ {
+ struct rockchip_saradc *info = NULL;
+@@ -218,6 +231,20 @@ static int rockchip_saradc_probe(struct platform_device *pdev)
+ if (IS_ERR(info->regs))
+ return PTR_ERR(info->regs);
+
++ /*
++ * The reset should be an optional property, as it should work
++ * with old devicetrees as well
++ */
++ info->reset = devm_reset_control_get(&pdev->dev, "saradc-apb");
++ if (IS_ERR(info->reset)) {
++ ret = PTR_ERR(info->reset);
++ if (ret != -ENOENT)
++ return ret;
++
++ dev_dbg(&pdev->dev, "no reset control found\n");
++ info->reset = NULL;
++ }
++
+ init_completion(&info->completion);
+
+ irq = platform_get_irq(pdev, 0);
+@@ -252,6 +279,9 @@ static int rockchip_saradc_probe(struct platform_device *pdev)
+ return PTR_ERR(info->vref);
+ }
+
++ if (info->reset)
++ rockchip_saradc_reset_controller(info->reset);
++
+ /*
+ * Use a default value for the converter clock.
+ * This may become user-configurable in the future.
+diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c
+index 73cbf0b54e54..fe96af6059d5 100644
+--- a/drivers/iio/adc/ti-ads1015.c
++++ b/drivers/iio/adc/ti-ads1015.c
+@@ -403,7 +403,8 @@ static const struct iio_info ads1015_info = {
+ #ifdef CONFIG_OF
+ static int ads1015_get_channels_config_of(struct i2c_client *client)
+ {
+- struct ads1015_data *data = i2c_get_clientdata(client);
++ struct iio_dev *indio_dev = i2c_get_clientdata(client);
++ struct ads1015_data *data = iio_priv(indio_dev);
+ struct device_node *node;
+
+ if (!client->dev.of_node ||
+diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
+index c1e05532d437..0470fc843d4e 100644
+--- a/drivers/iio/adc/ti_am335x_adc.c
++++ b/drivers/iio/adc/ti_am335x_adc.c
+@@ -32,6 +32,7 @@
+
+ struct tiadc_device {
+ struct ti_tscadc_dev *mfd_tscadc;
++ struct mutex fifo1_lock; /* to protect fifo access */
+ int channels;
+ u8 channel_line[8];
+ u8 channel_step[8];
+@@ -360,6 +361,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
+ int *val, int *val2, long mask)
+ {
+ struct tiadc_device *adc_dev = iio_priv(indio_dev);
++ int ret = IIO_VAL_INT;
+ int i, map_val;
+ unsigned int fifo1count, read, stepid;
+ bool found = false;
+@@ -373,13 +375,14 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
+ if (!step_en)
+ return -EINVAL;
+
++ mutex_lock(&adc_dev->fifo1_lock);
+ fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
+ while (fifo1count--)
+ tiadc_readl(adc_dev, REG_FIFO1);
+
+ am335x_tsc_se_set_once(adc_dev->mfd_tscadc, step_en);
+
+- timeout = jiffies + usecs_to_jiffies
++ timeout = jiffies + msecs_to_jiffies
+ (IDLE_TIMEOUT * adc_dev->channels);
+ /* Wait for Fifo threshold interrupt */
+ while (1) {
+@@ -389,7 +392,8 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
+
+ if (time_after(jiffies, timeout)) {
+ am335x_tsc_se_adc_done(adc_dev->mfd_tscadc);
+- return -EAGAIN;
++ ret = -EAGAIN;
++ goto err_unlock;
+ }
+ }
+ map_val = adc_dev->channel_step[chan->scan_index];
+@@ -415,8 +419,11 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
+ am335x_tsc_se_adc_done(adc_dev->mfd_tscadc);
+
+ if (found == false)
+- return -EBUSY;
+- return IIO_VAL_INT;
++ ret = -EBUSY;
++
++err_unlock:
++ mutex_unlock(&adc_dev->fifo1_lock);
++ return ret;
+ }
+
+ static const struct iio_info tiadc_info = {
+@@ -485,6 +492,7 @@ static int tiadc_probe(struct platform_device *pdev)
+
+ tiadc_step_config(indio_dev);
+ tiadc_writel(adc_dev, REG_FIFO1THR, FIFO1_THRESHOLD);
++ mutex_init(&adc_dev->fifo1_lock);
+
+ err = tiadc_channel_init(indio_dev, adc_dev->channels);
+ if (err < 0)
+diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+index e81f434760f4..dc33c1dd5191 100644
+--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
++++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+@@ -56,8 +56,8 @@ static struct {
+ {HID_USAGE_SENSOR_ALS, 0, 1, 0},
+ {HID_USAGE_SENSOR_ALS, HID_USAGE_SENSOR_UNITS_LUX, 1, 0},
+
+- {HID_USAGE_SENSOR_PRESSURE, 0, 100000, 0},
+- {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 1, 0},
++ {HID_USAGE_SENSOR_PRESSURE, 0, 100, 0},
++ {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000},
+ };
+
+ static int pow_10(unsigned power)
+diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c
+index 11535911a5c6..0ebced5570af 100644
+--- a/drivers/iio/humidity/am2315.c
++++ b/drivers/iio/humidity/am2315.c
+@@ -244,7 +244,7 @@ static int am2315_probe(struct i2c_client *client,
+ indio_dev->channels = am2315_channels;
+ indio_dev->num_channels = ARRAY_SIZE(am2315_channels);
+
+- ret = iio_triggered_buffer_setup(indio_dev, NULL,
++ ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time,
+ am2315_trigger_handler, NULL);
+ if (ret < 0) {
+ dev_err(&client->dev, "iio triggered buffer setup failed\n");
+diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
+index a03832a5fc95..e0c9c70c2a4a 100644
+--- a/drivers/iio/humidity/hdc100x.c
++++ b/drivers/iio/humidity/hdc100x.c
+@@ -142,7 +142,7 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
+ struct i2c_client *client = data->client;
+ int delay = data->adc_int_us[chan->address];
+ int ret;
+- int val;
++ __be16 val;
+
+ /* start measurement */
+ ret = i2c_smbus_write_byte(client, chan->address);
+@@ -154,26 +154,13 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
+ /* wait for integration time to pass */
+ usleep_range(delay, delay + 1000);
+
+- /*
+- * i2c_smbus_read_word_data cannot() be used here due to the command
+- * value not being understood and causes NAKs preventing any reading
+- * from being accessed.
+- */
+- ret = i2c_smbus_read_byte(client);
++ /* read measurement */
++ ret = i2c_master_recv(data->client, (char *)&val, sizeof(val));
+ if (ret < 0) {
+- dev_err(&client->dev, "cannot read high byte measurement");
++ dev_err(&client->dev, "cannot read sensor data\n");
+ return ret;
+ }
+- val = ret << 8;
+-
+- ret = i2c_smbus_read_byte(client);
+- if (ret < 0) {
+- dev_err(&client->dev, "cannot read low byte measurement");
+- return ret;
+- }
+- val |= ret;
+-
+- return val;
++ return be16_to_cpu(val);
+ }
+
+ static int hdc100x_get_heater_status(struct hdc100x_data *data)
+@@ -272,8 +259,8 @@ static int hdc100x_probe(struct i2c_client *client,
+ struct iio_dev *indio_dev;
+ struct hdc100x_data *data;
+
+- if (!i2c_check_functionality(client->adapter,
+- I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_BYTE))
++ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA |
++ I2C_FUNC_SMBUS_BYTE | I2C_FUNC_I2C))
+ return -EOPNOTSUPP;
+
+ indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
+diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
+index 49bf9c59f117..158aaf44dd95 100644
+--- a/drivers/iio/industrialio-buffer.c
++++ b/drivers/iio/industrialio-buffer.c
+@@ -110,7 +110,7 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf,
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ size_t datum_size;
+ size_t to_wait;
+- int ret;
++ int ret = 0;
+
+ if (!indio_dev->info)
+ return -ENODEV;
+@@ -153,7 +153,7 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf,
+ ret = rb->access->read_first_n(rb, n, buf);
+ if (ret == 0 && (filp->f_flags & O_NONBLOCK))
+ ret = -EAGAIN;
+- } while (ret == 0);
++ } while (ret == 0);
+ remove_wait_queue(&rb->pollq, &wait);
+
+ return ret;
+diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
+index e6319a9346b2..2e6a427588e1 100644
+--- a/drivers/iio/industrialio-core.c
++++ b/drivers/iio/industrialio-core.c
+@@ -532,9 +532,8 @@ ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
+ return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
+ case IIO_VAL_FRACTIONAL:
+ tmp = div_s64((s64)vals[0] * 1000000000LL, vals[1]);
+- vals[1] = do_div(tmp, 1000000000LL);
+- vals[0] = tmp;
+- return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
++ vals[0] = (int)div_s64_rem(tmp, 1000000000, &vals[1]);
++ return sprintf(buf, "%d.%09u\n", vals[0], abs(vals[1]));
+ case IIO_VAL_FRACTIONAL_LOG2:
+ tmp = (s64)vals[0] * 1000000000LL >> vals[1];
+ vals[1] = do_div(tmp, 1000000000LL);
+diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
+index e2f926cdcad2..a0aedda7dfd7 100644
+--- a/drivers/iio/proximity/as3935.c
++++ b/drivers/iio/proximity/as3935.c
+@@ -392,7 +392,7 @@ static int as3935_probe(struct spi_device *spi)
+ return ret;
+ }
+
+- ret = iio_triggered_buffer_setup(indio_dev, NULL,
++ ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time,
+ &as3935_trigger_handler, NULL);
+
+ if (ret) {
+diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
+index 612ccfd39bf9..9245e55debed 100644
+--- a/drivers/infiniband/core/uverbs.h
++++ b/drivers/infiniband/core/uverbs.h
+@@ -116,6 +116,7 @@ struct ib_uverbs_event_file {
+ struct ib_uverbs_file {
+ struct kref ref;
+ struct mutex mutex;
++ struct mutex cleanup_mutex; /* protect cleanup */
+ struct ib_uverbs_device *device;
+ struct ib_ucontext *ucontext;
+ struct ib_event_handler event_handler;
+diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
+index 31f422a70623..09d515763ad6 100644
+--- a/drivers/infiniband/core/uverbs_main.c
++++ b/drivers/infiniband/core/uverbs_main.c
+@@ -931,6 +931,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
+ file->async_file = NULL;
+ kref_init(&file->ref);
+ mutex_init(&file->mutex);
++ mutex_init(&file->cleanup_mutex);
+
+ filp->private_data = file;
+ kobject_get(&dev->kobj);
+@@ -956,18 +957,20 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
+ {
+ struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_device *dev = file->device;
+- struct ib_ucontext *ucontext = NULL;
++
++ mutex_lock(&file->cleanup_mutex);
++ if (file->ucontext) {
++ ib_uverbs_cleanup_ucontext(file, file->ucontext);
++ file->ucontext = NULL;
++ }
++ mutex_unlock(&file->cleanup_mutex);
+
+ mutex_lock(&file->device->lists_mutex);
+- ucontext = file->ucontext;
+- file->ucontext = NULL;
+ if (!file->is_closed) {
+ list_del(&file->list);
+ file->is_closed = 1;
+ }
+ mutex_unlock(&file->device->lists_mutex);
+- if (ucontext)
+- ib_uverbs_cleanup_ucontext(file, ucontext);
+
+ if (file->async_file)
+ kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+@@ -1181,22 +1184,30 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
+ mutex_lock(&uverbs_dev->lists_mutex);
+ while (!list_empty(&uverbs_dev->uverbs_file_list)) {
+ struct ib_ucontext *ucontext;
+-
+ file = list_first_entry(&uverbs_dev->uverbs_file_list,
+ struct ib_uverbs_file, list);
+ file->is_closed = 1;
+- ucontext = file->ucontext;
+ list_del(&file->list);
+- file->ucontext = NULL;
+ kref_get(&file->ref);
+ mutex_unlock(&uverbs_dev->lists_mutex);
+- /* We must release the mutex before going ahead and calling
+- * disassociate_ucontext. disassociate_ucontext might end up
+- * indirectly calling uverbs_close, for example due to freeing
+- * the resources (e.g mmput).
+- */
++
+ ib_uverbs_event_handler(&file->event_handler, &event);
++
++ mutex_lock(&file->cleanup_mutex);
++ ucontext = file->ucontext;
++ file->ucontext = NULL;
++ mutex_unlock(&file->cleanup_mutex);
++
++ /* At this point ib_uverbs_close cannot be running
++ * ib_uverbs_cleanup_ucontext
++ */
+ if (ucontext) {
++ /* We must release the mutex before going ahead and
++ * calling disassociate_ucontext. disassociate_ucontext
++ * might end up indirectly calling uverbs_close,
++ * for example due to freeing the resources
++ * (e.g mmput).
++ */
+ ib_dev->disassociate_ucontext(ucontext);
+ ib_uverbs_cleanup_ucontext(file, ucontext);
+ }
+diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
+index dbab9d9cc288..a49cc88f08a2 100644
+--- a/drivers/infiniband/hw/hfi1/debugfs.c
++++ b/drivers/infiniband/hw/hfi1/debugfs.c
+@@ -223,28 +223,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats)
+ DEBUGFS_FILE_OPS(ctx_stats);
+
+ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+-__acquires(RCU)
++ __acquires(RCU)
+ {
+ struct qp_iter *iter;
+ loff_t n = *pos;
+
+- rcu_read_lock();
+ iter = qp_iter_init(s->private);
++
++ /* stop calls rcu_read_unlock */
++ rcu_read_lock();
++
+ if (!iter)
+ return NULL;
+
+- while (n--) {
++ do {
+ if (qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+- }
++ } while (n--);
+
+ return iter;
+ }
+
+ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
+ loff_t *pos)
++ __must_hold(RCU)
+ {
+ struct qp_iter *iter = iter_ptr;
+
+@@ -259,7 +263,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
+ }
+
+ static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+-__releases(RCU)
++ __releases(RCU)
+ {
+ rcu_read_unlock();
+ }
+diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
+index 03df9322f862..1d09f767b680 100644
+--- a/drivers/infiniband/hw/hfi1/platform.c
++++ b/drivers/infiniband/hw/hfi1/platform.c
+@@ -638,9 +638,13 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
+ if (ret)
+ return ret;
+
++ /*
++ * We'll change the QSFP memory contents from here on out, thus we set a
++ * flag here to remind ourselves to reset the QSFP module. This prevents
++ * reuse of stale settings established in our previous pass through.
++ */
+ if (ppd->qsfp_info.reset_needed) {
+ reset_qsfp(ppd);
+- ppd->qsfp_info.reset_needed = 0;
+ refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+ } else {
+ ppd->qsfp_info.reset_needed = 1;
+diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
+index 1a942ffba4cb..995c897669b2 100644
+--- a/drivers/infiniband/hw/hfi1/qp.c
++++ b/drivers/infiniband/hw/hfi1/qp.c
+@@ -595,10 +595,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
+
+ iter->dev = dev;
+ iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
+- if (qp_iter_next(iter)) {
+- kfree(iter);
+- return NULL;
+- }
+
+ return iter;
+ }
+diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
+index 5e75b43c596b..5bad8e3b40bb 100644
+--- a/drivers/infiniband/hw/qib/qib_debugfs.c
++++ b/drivers/infiniband/hw/qib/qib_debugfs.c
+@@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v)
+ DEBUGFS_FILE(ctx_stats)
+
+ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
++ __acquires(RCU)
+ {
+ struct qib_qp_iter *iter;
+ loff_t n = *pos;
+
+- rcu_read_lock();
+ iter = qib_qp_iter_init(s->private);
++
++ /* stop calls rcu_read_unlock */
++ rcu_read_lock();
++
+ if (!iter)
+ return NULL;
+
+- while (n--) {
++ do {
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+- }
++ } while (n--);
+
+ return iter;
+ }
+
+ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
+ loff_t *pos)
++ __must_hold(RCU)
+ {
+ struct qib_qp_iter *iter = iter_ptr;
+
+@@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
+ }
+
+ static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
++ __releases(RCU)
+ {
+ rcu_read_unlock();
+ }
+diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
+index 575b737d9ef3..7119a7da289f 100644
+--- a/drivers/infiniband/hw/qib/qib_qp.c
++++ b/drivers/infiniband/hw/qib/qib_qp.c
+@@ -530,10 +530,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
+ return NULL;
+
+ iter->dev = dev;
+- if (qib_qp_iter_next(iter)) {
+- kfree(iter);
+- return NULL;
+- }
+
+ return iter;
+ }
+diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
+index 6a86b5d1defa..7330a66e2b7e 100644
+--- a/drivers/iommu/dmar.c
++++ b/drivers/iommu/dmar.c
+@@ -1871,10 +1871,11 @@ static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
+ /*
+ * All PCI devices managed by this unit should have been destroyed.
+ */
+- if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt)
++ if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
+ for_each_active_dev_scope(dmaru->devices,
+ dmaru->devices_cnt, i, dev)
+ return -EBUSY;
++ }
+
+ ret = dmar_ir_hotplug(dmaru, false);
+ if (ret == 0)
+diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
+index d416242c4ab8..50b639ba3daa 100644
+--- a/drivers/iommu/intel-iommu.c
++++ b/drivers/iommu/intel-iommu.c
+@@ -4272,10 +4272,11 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
+ if (!atsru)
+ return 0;
+
+- if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
++ if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
+ for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
+ i, dev)
+ return -EBUSY;
++ }
+
+ return 0;
+ }
+diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c
+index 112e17c2768b..37f952dd9fc9 100644
+--- a/drivers/irqchip/irq-atmel-aic.c
++++ b/drivers/irqchip/irq-atmel-aic.c
+@@ -176,6 +176,7 @@ static int aic_irq_domain_xlate(struct irq_domain *d,
+ {
+ struct irq_domain_chip_generic *dgc = d->gc;
+ struct irq_chip_generic *gc;
++ unsigned long flags;
+ unsigned smr;
+ int idx;
+ int ret;
+@@ -194,11 +195,11 @@ static int aic_irq_domain_xlate(struct irq_domain *d,
+
+ gc = dgc->gc[idx];
+
+- irq_gc_lock(gc);
++ irq_gc_lock_irqsave(gc, flags);
+ smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq));
+ aic_common_set_priority(intspec[2], &smr);
+ irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
+- irq_gc_unlock(gc);
++ irq_gc_unlock_irqrestore(gc, flags);
+
+ return ret;
+ }
+diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
+index 4f0d068e1abe..2a624d87a035 100644
+--- a/drivers/irqchip/irq-atmel-aic5.c
++++ b/drivers/irqchip/irq-atmel-aic5.c
+@@ -258,6 +258,7 @@ static int aic5_irq_domain_xlate(struct irq_domain *d,
+ unsigned int *out_type)
+ {
+ struct irq_chip_generic *bgc = irq_get_domain_generic_chip(d, 0);
++ unsigned long flags;
+ unsigned smr;
+ int ret;
+
+@@ -269,12 +270,12 @@ static int aic5_irq_domain_xlate(struct irq_domain *d,
+ if (ret)
+ return ret;
+
+- irq_gc_lock(bgc);
++ irq_gc_lock_irqsave(bgc, flags);
+ irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR);
+ smr = irq_reg_readl(bgc, AT91_AIC5_SMR);
+ aic_common_set_priority(intspec[2], &smr);
+ irq_reg_writel(bgc, smr, AT91_AIC5_SMR);
+- irq_gc_unlock(bgc);
++ irq_gc_unlock_irqrestore(bgc, flags);
+
+ return ret;
+ }
+diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
+index 4f3cb3554944..cfe28cf66ce4 100644
+--- a/drivers/md/dm-crypt.c
++++ b/drivers/md/dm-crypt.c
+@@ -1453,7 +1453,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode)
+ unsigned i;
+ int err;
+
+- cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_skcipher *),
++ cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *),
+ GFP_KERNEL);
+ if (!cc->tfms)
+ return -ENOMEM;
+@@ -1923,6 +1923,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
+ return DM_MAPIO_REMAPPED;
+ }
+
++ /*
++ * Check if bio is too large, split as needed.
++ */
++ if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
++ bio_data_dir(bio) == WRITE)
++ dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
++
+ io = dm_per_bio_data(bio, cc->per_bio_data_size);
+ crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+ io->ctx.req = (struct skcipher_request *)(io + 1);
+diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
+index 4bb49cd602e9..4eb5c67b1309 100644
+--- a/drivers/md/dm-flakey.c
++++ b/drivers/md/dm-flakey.c
+@@ -289,15 +289,13 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
+ pb->bio_submitted = true;
+
+ /*
+- * Map reads as normal only if corrupt_bio_byte set.
++ * Error reads if neither corrupt_bio_byte or drop_writes are set.
++ * Otherwise, flakey_end_io() will decide if the reads should be modified.
+ */
+ if (bio_data_dir(bio) == READ) {
+- /* If flags were specified, only corrupt those that match. */
+- if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
+- all_corrupt_bio_flags_match(bio, fc))
+- goto map_bio;
+- else
++ if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags))
+ return -EIO;
++ goto map_bio;
+ }
+
+ /*
+@@ -334,14 +332,21 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+ struct flakey_c *fc = ti->private;
+ struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
+
+- /*
+- * Corrupt successful READs while in down state.
+- */
+ if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+- if (fc->corrupt_bio_byte)
++ if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
++ all_corrupt_bio_flags_match(bio, fc)) {
++ /*
++ * Corrupt successful matching READs while in down state.
++ */
+ corrupt_bio_data(bio, fc);
+- else
++
++ } else if (!test_bit(DROP_WRITES, &fc->flags)) {
++ /*
++ * Error read during the down_interval if drop_writes
++ * wasn't configured.
++ */
+ return -EIO;
++ }
+ }
+
+ return error;
+diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
+index 608302e222af..d8f8cc85f96c 100644
+--- a/drivers/md/dm-log-writes.c
++++ b/drivers/md/dm-log-writes.c
+@@ -258,12 +258,12 @@ static int log_one_block(struct log_writes_c *lc,
+ goto out;
+ sector++;
+
++ atomic_inc(&lc->io_blocks);
+ bio = bio_alloc(GFP_KERNEL, block->vec_cnt);
+ if (!bio) {
+ DMERR("Couldn't alloc log bio");
+ goto error;
+ }
+- atomic_inc(&lc->io_blocks);
+ bio->bi_iter.bi_size = 0;
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_bdev = lc->logdev->bdev;
+@@ -456,9 +456,9 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+ goto bad;
+ }
+
+- ret = -EINVAL;
+ lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write");
+- if (!lc->log_kthread) {
++ if (IS_ERR(lc->log_kthread)) {
++ ret = PTR_ERR(lc->log_kthread);
+ ti->error = "Couldn't alloc kthread";
+ dm_put_device(ti, lc->dev);
+ dm_put_device(ti, lc->logdev);
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 866825f10b4c..0678a0a95761 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -7599,16 +7599,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations);
+
+ int md_setup_cluster(struct mddev *mddev, int nodes)
+ {
+- int err;
+-
+- err = request_module("md-cluster");
+- if (err) {
+- pr_err("md-cluster module not found.\n");
+- return -ENOENT;
+- }
+-
++ if (!md_cluster_ops)
++ request_module("md-cluster");
+ spin_lock(&pers_lock);
++ /* ensure module won't be unloaded */
+ if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
++ pr_err("can't find md-cluster module or get it's reference.\n");
+ spin_unlock(&pers_lock);
+ return -ENOENT;
+ }
+diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
+index 15508df24e5d..73ec3200191e 100644
+--- a/drivers/memory/omap-gpmc.c
++++ b/drivers/memory/omap-gpmc.c
+@@ -2250,7 +2250,7 @@ static int gpmc_probe_dt(struct platform_device *pdev)
+ return 0;
+ }
+
+-static int gpmc_probe_dt_children(struct platform_device *pdev)
++static void gpmc_probe_dt_children(struct platform_device *pdev)
+ {
+ int ret;
+ struct device_node *child;
+@@ -2265,11 +2265,11 @@ static int gpmc_probe_dt_children(struct platform_device *pdev)
+ else
+ ret = gpmc_probe_generic_child(pdev, child);
+
+- if (ret)
+- return ret;
++ if (ret) {
++ dev_err(&pdev->dev, "failed to probe DT child '%s': %d\n",
++ child->name, ret);
++ }
+ }
+-
+- return 0;
+ }
+ #else
+ static int gpmc_probe_dt(struct platform_device *pdev)
+@@ -2277,9 +2277,8 @@ static int gpmc_probe_dt(struct platform_device *pdev)
+ return 0;
+ }
+
+-static int gpmc_probe_dt_children(struct platform_device *pdev)
++static void gpmc_probe_dt_children(struct platform_device *pdev)
+ {
+- return 0;
+ }
+ #endif
+
+@@ -2372,16 +2371,10 @@ static int gpmc_probe(struct platform_device *pdev)
+ goto setup_irq_failed;
+ }
+
+- rc = gpmc_probe_dt_children(pdev);
+- if (rc < 0) {
+- dev_err(gpmc->dev, "failed to probe DT children\n");
+- goto dt_children_failed;
+- }
++ gpmc_probe_dt_children(pdev);
+
+ return 0;
+
+-dt_children_failed:
+- gpmc_free_irq(gpmc);
+ setup_irq_failed:
+ gpmc_gpio_exit(gpmc);
+ gpio_init_failed:
+diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
+index 320e1c2f8853..b7eaecfdd796 100644
+--- a/drivers/mmc/host/sdhci-st.c
++++ b/drivers/mmc/host/sdhci-st.c
+@@ -28,6 +28,7 @@
+
+ struct st_mmc_platform_data {
+ struct reset_control *rstc;
++ struct clk *icnclk;
+ void __iomem *top_ioaddr;
+ };
+
+@@ -353,7 +354,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
+ struct sdhci_host *host;
+ struct st_mmc_platform_data *pdata;
+ struct sdhci_pltfm_host *pltfm_host;
+- struct clk *clk;
++ struct clk *clk, *icnclk;
+ int ret = 0;
+ u16 host_version;
+ struct resource *res;
+@@ -365,6 +366,11 @@ static int sdhci_st_probe(struct platform_device *pdev)
+ return PTR_ERR(clk);
+ }
+
++ /* ICN clock isn't compulsory, but use it if it's provided. */
++ icnclk = devm_clk_get(&pdev->dev, "icn");
++ if (IS_ERR(icnclk))
++ icnclk = NULL;
++
+ rstc = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(rstc))
+ rstc = NULL;
+@@ -389,6 +395,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
+ }
+
+ clk_prepare_enable(clk);
++ clk_prepare_enable(icnclk);
+
+ /* Configure the FlashSS Top registers for setting eMMC TX/RX delay */
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+@@ -400,6 +407,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
+ }
+
+ pltfm_host->clk = clk;
++ pdata->icnclk = icnclk;
+
+ /* Configure the Arasan HC inside the flashSS */
+ st_mmcss_cconfig(np, host);
+@@ -422,6 +430,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
+ return 0;
+
+ err_out:
++ clk_disable_unprepare(icnclk);
+ clk_disable_unprepare(clk);
+ err_of:
+ sdhci_pltfm_free(pdev);
+@@ -442,6 +451,8 @@ static int sdhci_st_remove(struct platform_device *pdev)
+
+ ret = sdhci_pltfm_unregister(pdev);
+
++ clk_disable_unprepare(pdata->icnclk);
++
+ if (rstc)
+ reset_control_assert(rstc);
+
+@@ -462,6 +473,7 @@ static int sdhci_st_suspend(struct device *dev)
+ if (pdata->rstc)
+ reset_control_assert(pdata->rstc);
+
++ clk_disable_unprepare(pdata->icnclk);
+ clk_disable_unprepare(pltfm_host->clk);
+ out:
+ return ret;
+@@ -475,6 +487,7 @@ static int sdhci_st_resume(struct device *dev)
+ struct device_node *np = dev->of_node;
+
+ clk_prepare_enable(pltfm_host->clk);
++ clk_prepare_enable(pdata->icnclk);
+
+ if (pdata->rstc)
+ reset_control_deassert(pdata->rstc);
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index 4d7981946f79..70dac7302d51 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1341,9 +1341,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ slave_dev->name);
+ }
+
+- /* already enslaved */
+- if (slave_dev->flags & IFF_SLAVE) {
+- netdev_dbg(bond_dev, "Error: Device was already enslaved\n");
++ /* already in-use? */
++ if (netdev_is_rx_handler_busy(slave_dev)) {
++ netdev_err(bond_dev,
++ "Error: Device is in use and cannot be enslaved\n");
+ return -EBUSY;
+ }
+
+diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
+index 200b1f5fdb56..71b1e529812e 100644
+--- a/drivers/net/dsa/bcm_sf2.h
++++ b/drivers/net/dsa/bcm_sf2.h
+@@ -189,8 +189,8 @@ static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val, \
+ static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \
+ u32 mask) \
+ { \
+- intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \
+ priv->irq##which##_mask &= ~(mask); \
++ intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \
+ } \
+ static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \
+ u32 mask) \
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index c777cde85ce4..e655b76e8f31 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -293,8 +293,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ push_len = (length + sizeof(*tx_push) + 7) / 8;
+ if (push_len > 16) {
+ __iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
+- __iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+- push_len - 16);
++ __iowrite32_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
++ (push_len - 16) << 1);
+ } else {
+ __iowrite64_copy(txr->tx_doorbell, tx_push_buf,
+ push_len);
+diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
+index 8a13824ef802..644743c9ca82 100644
+--- a/drivers/net/ethernet/cadence/macb.h
++++ b/drivers/net/ethernet/cadence/macb.h
+@@ -403,11 +403,11 @@
+ #define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII 0x00000004
+ #define MACB_CAPS_NO_GIGABIT_HALF 0x00000008
+ #define MACB_CAPS_USRIO_DISABLED 0x00000010
++#define MACB_CAPS_JUMBO 0x00000020
+ #define MACB_CAPS_FIFO_MODE 0x10000000
+ #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000
+ #define MACB_CAPS_SG_DISABLED 0x40000000
+ #define MACB_CAPS_MACB_IS_GEM 0x80000000
+-#define MACB_CAPS_JUMBO 0x00000010
+
+ /* Bit manipulation macros */
+ #define MACB_BIT(name) \
+diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
+index afb10e326b4f..fab35a593898 100644
+--- a/drivers/net/ethernet/cavium/thunder/nic_reg.h
++++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h
+@@ -170,7 +170,6 @@
+ #define NIC_QSET_SQ_0_7_DOOR (0x010838)
+ #define NIC_QSET_SQ_0_7_STATUS (0x010840)
+ #define NIC_QSET_SQ_0_7_DEBUG (0x010848)
+-#define NIC_QSET_SQ_0_7_CNM_CHG (0x010860)
+ #define NIC_QSET_SQ_0_7_STAT_0_1 (0x010900)
+
+ #define NIC_QSET_RBDR_0_1_CFG (0x010C00)
+diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+index d2d8ef270142..ad4fddb55421 100644
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+@@ -382,7 +382,10 @@ static void nicvf_get_regs(struct net_device *dev,
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q);
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q);
+- p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CNM_CHG, q);
++ /* Padding, was NIC_QSET_SQ_0_7_CNM_CHG, which
++ * produces bus errors when read
++ */
++ p[i++] = 0;
+ p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q);
+ reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3);
+ p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+index d6e2a1cae19a..c2ec01a22d55 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
+ return cmd->cmd_buf + (idx << cmd->log_stride);
+ }
+
+-static u8 xor8_buf(void *buf, int len)
++static u8 xor8_buf(void *buf, size_t offset, int len)
+ {
+ u8 *ptr = buf;
+ u8 sum = 0;
+ int i;
++ int end = len + offset;
+
+- for (i = 0; i < len; i++)
++ for (i = offset; i < end; i++)
+ sum ^= ptr[i];
+
+ return sum;
+@@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len)
+
+ static int verify_block_sig(struct mlx5_cmd_prot_block *block)
+ {
+- if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
++ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
++ int xor_len = sizeof(*block) - sizeof(block->data) - 1;
++
++ if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
+ return -EINVAL;
+
+- if (xor8_buf(block, sizeof(*block)) != 0xff)
++ if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
+ return -EINVAL;
+
+ return 0;
+ }
+
+-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token,
+- int csum)
++static void calc_block_sig(struct mlx5_cmd_prot_block *block)
+ {
+- block->token = token;
+- if (csum) {
+- block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) -
+- sizeof(block->data) - 2);
+- block->sig = ~xor8_buf(block, sizeof(*block) - 1);
+- }
++ int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
++ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
++
++ block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
++ block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
+ }
+
+-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum)
++static void calc_chain_sig(struct mlx5_cmd_msg *msg)
+ {
+ struct mlx5_cmd_mailbox *next = msg->next;
+-
+- while (next) {
+- calc_block_sig(next->buf, token, csum);
++ int size = msg->len;
++ int blen = size - min_t(int, sizeof(msg->first.data), size);
++ int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
++ / MLX5_CMD_DATA_BLOCK_SIZE;
++ int i = 0;
++
++ for (i = 0; i < n && next; i++) {
++ calc_block_sig(next->buf);
+ next = next->next;
+ }
+ }
+
+ static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
+ {
+- ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
+- calc_chain_sig(ent->in, ent->token, csum);
+- calc_chain_sig(ent->out, ent->token, csum);
++ ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay));
++ if (csum) {
++ calc_chain_sig(ent->in);
++ calc_chain_sig(ent->out);
++ }
+ }
+
+ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
+@@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
+ struct mlx5_cmd_mailbox *next = ent->out->next;
+ int err;
+ u8 sig;
++ int size = ent->out->len;
++ int blen = size - min_t(int, sizeof(ent->out->first.data), size);
++ int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
++ / MLX5_CMD_DATA_BLOCK_SIZE;
++ int i = 0;
+
+- sig = xor8_buf(ent->lay, sizeof(*ent->lay));
++ sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
+ if (sig != 0xff)
+ return -EINVAL;
+
+- while (next) {
++ for (i = 0; i < n && next; i++) {
+ err = verify_block_sig(next->buf);
+ if (err)
+ return err;
+@@ -656,7 +670,6 @@ static void cmd_work_handler(struct work_struct *work)
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+ }
+
+- ent->token = alloc_token(cmd);
+ cmd->ent_arr[ent->idx] = ent;
+ lay = get_inst(cmd, ent->idx);
+ ent->lay = lay;
+@@ -766,7 +779,8 @@ static u8 *get_status_ptr(struct mlx5_outbox_hdr *out)
+ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t callback,
+- void *context, int page_queue, u8 *status)
++ void *context, int page_queue, u8 *status,
++ u8 token)
+ {
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct mlx5_cmd_work_ent *ent;
+@@ -783,6 +797,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
++ ent->token = token;
++
+ if (!callback)
+ init_completion(&ent->done);
+
+@@ -854,7 +870,8 @@ static const struct file_operations fops = {
+ .write = dbg_write,
+ };
+
+-static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
++static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
++ u8 token)
+ {
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_mailbox *next;
+@@ -880,6 +897,7 @@ static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
+ memcpy(block->data, from, copy);
+ from += copy;
+ size -= copy;
++ block->token = token;
+ next = next->next;
+ }
+
+@@ -949,7 +967,8 @@ static void free_cmd_box(struct mlx5_core_dev *dev,
+ }
+
+ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+- gfp_t flags, int size)
++ gfp_t flags, int size,
++ u8 token)
+ {
+ struct mlx5_cmd_mailbox *tmp, *head = NULL;
+ struct mlx5_cmd_prot_block *block;
+@@ -978,6 +997,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+ tmp->next = head;
+ block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
+ block->block_num = cpu_to_be32(n - i - 1);
++ block->token = token;
+ head = tmp;
+ }
+ msg->next = head;
+@@ -1352,7 +1372,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
+ }
+
+ if (IS_ERR(msg))
+- msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
++ msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
+
+ return msg;
+ }
+@@ -1377,6 +1397,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int err;
+ u8 status = 0;
+ u32 drv_synd;
++ u8 token;
+
+ if (pci_channel_offline(dev->pdev) ||
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+@@ -1395,20 +1416,22 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ return err;
+ }
+
+- err = mlx5_copy_to_msg(inb, in, in_size);
++ token = alloc_token(&dev->cmd);
++
++ err = mlx5_copy_to_msg(inb, in, in_size, token);
+ if (err) {
+ mlx5_core_warn(dev, "err %d\n", err);
+ goto out_in;
+ }
+
+- outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
++ outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
+ if (IS_ERR(outb)) {
+ err = PTR_ERR(outb);
+ goto out_in;
+ }
+
+ err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
+- pages_queue, &status);
++ pages_queue, &status, token);
+ if (err)
+ goto out_out;
+
+@@ -1476,7 +1499,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev)
+ INIT_LIST_HEAD(&cmd->cache.med.head);
+
+ for (i = 0; i < NUM_LONG_LISTS; i++) {
+- msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
++ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
+ if (IS_ERR(msg)) {
+ err = PTR_ERR(msg);
+ goto ex_err;
+@@ -1486,7 +1509,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev)
+ }
+
+ for (i = 0; i < NUM_MED_LISTS; i++) {
+- msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
++ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
+ if (IS_ERR(msg)) {
+ err = PTR_ERR(msg);
+ goto ex_err;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+index 9f2a16a507e0..e41a06675ee5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -648,24 +648,32 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
+ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt)
+ {
+- struct ethhdr *eth = (struct ethhdr *)(skb->data);
+- struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN);
+- struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + ETH_HLEN);
++ struct ethhdr *eth = (struct ethhdr *)(skb->data);
++ struct iphdr *ipv4;
++ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
++ int network_depth = 0;
++ __be16 proto;
++ u16 tot_len;
+
+ u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+ int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) ||
+ (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
+
+- u16 tot_len = cqe_bcnt - ETH_HLEN;
++ skb->mac_len = ETH_HLEN;
++ proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
+
+- if (eth->h_proto == htons(ETH_P_IP)) {
+- tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
++ ipv4 = (struct iphdr *)(skb->data + network_depth);
++ ipv6 = (struct ipv6hdr *)(skb->data + network_depth);
++ tot_len = cqe_bcnt - network_depth;
++
++ if (proto == htons(ETH_P_IP)) {
++ tcp = (struct tcphdr *)(skb->data + network_depth +
+ sizeof(struct iphdr));
+ ipv6 = NULL;
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ } else {
+- tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
++ tcp = (struct tcphdr *)(skb->data + network_depth +
+ sizeof(struct ipv6hdr));
+ ipv4 = NULL;
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index 704c3d30493e..0db51cc3949b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -150,7 +150,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+- FLOW_DISSECTOR_KEY_BASIC,
++ FLOW_DISSECTOR_KEY_CONTROL,
+ f->key);
+ addr_type = key->addr_type;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 6695893ddd2d..e782d0fde09e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1392,36 +1392,12 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
+ dev_info(&pdev->dev, "%s was called\n", __func__);
+ mlx5_enter_error_state(dev);
+ mlx5_unload_one(dev, priv);
++ pci_save_state(pdev);
+ mlx5_pci_disable_device(dev);
+ return state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+ }
+
+-static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+-{
+- struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+- int err = 0;
+-
+- dev_info(&pdev->dev, "%s was called\n", __func__);
+-
+- err = mlx5_pci_enable_device(dev);
+- if (err) {
+- dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+- , __func__, err);
+- return PCI_ERS_RESULT_DISCONNECT;
+- }
+- pci_set_master(pdev);
+- pci_set_power_state(pdev, PCI_D0);
+- pci_restore_state(pdev);
+-
+- return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+-}
+-
+-void mlx5_disable_device(struct mlx5_core_dev *dev)
+-{
+- mlx5_pci_err_detected(dev->pdev, 0);
+-}
+-
+ /* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
+ */
+@@ -1449,21 +1425,44 @@ static int wait_vital(struct pci_dev *pdev)
+ return -ETIMEDOUT;
+ }
+
+-static void mlx5_pci_resume(struct pci_dev *pdev)
++static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+ {
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+- struct mlx5_priv *priv = &dev->priv;
+ int err;
+
+ dev_info(&pdev->dev, "%s was called\n", __func__);
+
+- pci_save_state(pdev);
+- err = wait_vital(pdev);
++ err = mlx5_pci_enable_device(dev);
+ if (err) {
++ dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
++ , __func__, err);
++ return PCI_ERS_RESULT_DISCONNECT;
++ }
++
++ pci_set_master(pdev);
++ pci_restore_state(pdev);
++
++ if (wait_vital(pdev)) {
+ dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+- return;
++ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
++ return PCI_ERS_RESULT_RECOVERED;
++}
++
++void mlx5_disable_device(struct mlx5_core_dev *dev)
++{
++ mlx5_pci_err_detected(dev->pdev, 0);
++}
++
++static void mlx5_pci_resume(struct pci_dev *pdev)
++{
++ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
++ struct mlx5_priv *priv = &dev->priv;
++ int err;
++
++ dev_info(&pdev->dev, "%s was called\n", __func__);
++
+ err = mlx5_load_one(dev, priv);
+ if (err)
+ dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
+diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
+index 18ac52ded696..b69d0e1e8daa 100644
+--- a/drivers/net/ethernet/smsc/smc91x.c
++++ b/drivers/net/ethernet/smsc/smc91x.c
+@@ -2269,6 +2269,13 @@ static int smc_drv_probe(struct platform_device *pdev)
+ if (pd) {
+ memcpy(&lp->cfg, pd, sizeof(lp->cfg));
+ lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags);
++
++ if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) {
++ dev_err(&pdev->dev,
++ "at least one of 8-bit or 16-bit access support is required.\n");
++ ret = -ENXIO;
++ goto out_free_netdev;
++ }
+ }
+
+ #if IS_BUILTIN(CONFIG_OF)
+diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h
+index 1a55c7976df0..e17671c9d1b0 100644
+--- a/drivers/net/ethernet/smsc/smc91x.h
++++ b/drivers/net/ethernet/smsc/smc91x.h
+@@ -37,6 +37,27 @@
+ #include <linux/smc91x.h>
+
+ /*
++ * Any 16-bit access is performed with two 8-bit accesses if the hardware
++ * can't do it directly. Most registers are 16-bit so those are mandatory.
++ */
++#define SMC_outw_b(x, a, r) \
++ do { \
++ unsigned int __val16 = (x); \
++ unsigned int __reg = (r); \
++ SMC_outb(__val16, a, __reg); \
++ SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT)); \
++ } while (0)
++
++#define SMC_inw_b(a, r) \
++ ({ \
++ unsigned int __val16; \
++ unsigned int __reg = r; \
++ __val16 = SMC_inb(a, __reg); \
++ __val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \
++ __val16; \
++ })
++
++/*
+ * Define your architecture specific bus configuration parameters here.
+ */
+
+@@ -55,10 +76,30 @@
+ #define SMC_IO_SHIFT (lp->io_shift)
+
+ #define SMC_inb(a, r) readb((a) + (r))
+-#define SMC_inw(a, r) readw((a) + (r))
++#define SMC_inw(a, r) \
++ ({ \
++ unsigned int __smc_r = r; \
++ SMC_16BIT(lp) ? readw((a) + __smc_r) : \
++ SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) : \
++ ({ BUG(); 0; }); \
++ })
++
+ #define SMC_inl(a, r) readl((a) + (r))
+ #define SMC_outb(v, a, r) writeb(v, (a) + (r))
++#define SMC_outw(v, a, r) \
++ do { \
++ unsigned int __v = v, __smc_r = r; \
++ if (SMC_16BIT(lp)) \
++ __SMC_outw(__v, a, __smc_r); \
++ else if (SMC_8BIT(lp)) \
++ SMC_outw_b(__v, a, __smc_r); \
++ else \
++ BUG(); \
++ } while (0)
++
+ #define SMC_outl(v, a, r) writel(v, (a) + (r))
++#define SMC_insb(a, r, p, l) readsb((a) + (r), p, l)
++#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, l)
+ #define SMC_insw(a, r, p, l) readsw((a) + (r), p, l)
+ #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l)
+ #define SMC_insl(a, r, p, l) readsl((a) + (r), p, l)
+@@ -66,7 +107,7 @@
+ #define SMC_IRQ_FLAGS (-1) /* from resource */
+
+ /* We actually can't write halfwords properly if not word aligned */
+-static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
++static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
+ {
+ if ((machine_is_mainstone() || machine_is_stargate2() ||
+ machine_is_pxa_idp()) && reg & 2) {
+@@ -416,24 +457,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma,
+
+ #if ! SMC_CAN_USE_16BIT
+
+-/*
+- * Any 16-bit access is performed with two 8-bit accesses if the hardware
+- * can't do it directly. Most registers are 16-bit so those are mandatory.
+- */
+-#define SMC_outw(x, ioaddr, reg) \
+- do { \
+- unsigned int __val16 = (x); \
+- SMC_outb( __val16, ioaddr, reg ); \
+- SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\
+- } while (0)
+-#define SMC_inw(ioaddr, reg) \
+- ({ \
+- unsigned int __val16; \
+- __val16 = SMC_inb( ioaddr, reg ); \
+- __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \
+- __val16; \
+- })
+-
++#define SMC_outw(x, ioaddr, reg) SMC_outw_b(x, ioaddr, reg)
++#define SMC_inw(ioaddr, reg) SMC_inw_b(ioaddr, reg)
+ #define SMC_insw(a, r, p, l) BUG()
+ #define SMC_outsw(a, r, p, l) BUG()
+
+diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
+index c5dc2c363f96..c6f66832a1a6 100644
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -722,8 +722,10 @@ phy_err:
+ int phy_start_interrupts(struct phy_device *phydev)
+ {
+ atomic_set(&phydev->irq_disable, 0);
+- if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt",
+- phydev) < 0) {
++ if (request_irq(phydev->irq, phy_interrupt,
++ IRQF_SHARED,
++ "phy_interrupt",
++ phydev) < 0) {
+ pr_warn("%s: Can't get IRQ %d (PHY)\n",
+ phydev->mdio.bus->name, phydev->irq);
+ phydev->irq = PHY_POLL;
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index e16487cc6a9a..34259bd0a3b7 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -878,11 +878,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+ goto drop;
+
+- if (skb->sk && sk_fullsock(skb->sk)) {
+- sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags,
+- &skb_shinfo(skb)->tx_flags);
+- sw_tx_timestamp(skb);
+- }
++ skb_tx_timestamp(skb);
+
+ /* Orphan the skb - required as we might hang on to it
+ * for indefinite time.
+diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
+index 8b6398850657..4b59a4c1552d 100644
+--- a/drivers/net/wireless/ath/ath9k/main.c
++++ b/drivers/net/wireless/ath/ath9k/main.c
+@@ -718,9 +718,12 @@ static int ath9k_start(struct ieee80211_hw *hw)
+ if (!ath_complete_reset(sc, false))
+ ah->reset_power_on = false;
+
+- if (ah->led_pin >= 0)
++ if (ah->led_pin >= 0) {
+ ath9k_hw_set_gpio(ah, ah->led_pin,
+ (ah->config.led_active_high) ? 1 : 0);
++ ath9k_hw_gpio_request_out(ah, ah->led_pin, NULL,
++ AR_GPIO_OUTPUT_MUX_AS_OUTPUT);
++ }
+
+ /*
+ * Reset key cache to sane defaults (all entries cleared) instead of
+@@ -864,9 +867,11 @@ static void ath9k_stop(struct ieee80211_hw *hw)
+
+ spin_lock_bh(&sc->sc_pcu_lock);
+
+- if (ah->led_pin >= 0)
++ if (ah->led_pin >= 0) {
+ ath9k_hw_set_gpio(ah, ah->led_pin,
+ (ah->config.led_active_high) ? 0 : 1);
++ ath9k_hw_gpio_request_in(ah, ah->led_pin, NULL);
++ }
+
+ ath_prepare_reset(sc);
+
+@@ -1552,13 +1557,13 @@ static int ath9k_sta_state(struct ieee80211_hw *hw,
+ struct ath_common *common = ath9k_hw_common(sc->sc_ah);
+ int ret = 0;
+
+- if (old_state == IEEE80211_STA_AUTH &&
+- new_state == IEEE80211_STA_ASSOC) {
++ if (old_state == IEEE80211_STA_NOTEXIST &&
++ new_state == IEEE80211_STA_NONE) {
+ ret = ath9k_sta_add(hw, vif, sta);
+ ath_dbg(common, CONFIG,
+ "Add station: %pM\n", sta->addr);
+- } else if (old_state == IEEE80211_STA_ASSOC &&
+- new_state == IEEE80211_STA_AUTH) {
++ } else if (old_state == IEEE80211_STA_NONE &&
++ new_state == IEEE80211_STA_NOTEXIST) {
+ ret = ath9k_sta_remove(hw, vif, sta);
+ ath_dbg(common, CONFIG,
+ "Remove station: %pM\n", sta->addr);
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+index 62f475e31077..121baba7acb1 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+@@ -4467,7 +4467,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
+ (u8 *)&settings->beacon.head[ie_offset],
+ settings->beacon.head_len - ie_offset,
+ WLAN_EID_SSID);
+- if (!ssid_ie)
++ if (!ssid_ie || ssid_ie->len > IEEE80211_MAX_SSID_LEN)
+ return -EINVAL;
+
+ memcpy(ssid_le.SSID, ssid_ie->data, ssid_ie->len);
+diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c
+index 7bcedbb53d94..209dc9988455 100644
+--- a/drivers/net/wireless/intel/iwlegacy/3945.c
++++ b/drivers/net/wireless/intel/iwlegacy/3945.c
+@@ -1019,12 +1019,13 @@ il3945_hw_txq_ctx_free(struct il_priv *il)
+ int txq_id;
+
+ /* Tx queues */
+- if (il->txq)
++ if (il->txq) {
+ for (txq_id = 0; txq_id < il->hw_params.max_txq_num; txq_id++)
+ if (txq_id == IL39_CMD_QUEUE_NUM)
+ il_cmd_queue_free(il);
+ else
+ il_tx_queue_free(il, txq_id);
++ }
+
+ /* free tx queue structure */
+ il_free_txq_mem(il);
+diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
+index c6d410ef8de0..5bf8e78e0f47 100644
+--- a/drivers/pinctrl/pinctrl-pistachio.c
++++ b/drivers/pinctrl/pinctrl-pistachio.c
+@@ -809,17 +809,17 @@ static const struct pistachio_pin_group pistachio_groups[] = {
+ PADS_FUNCTION_SELECT2, 12, 0x3),
+ MFIO_MUX_PIN_GROUP(83, MIPS_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
+ PADS_FUNCTION_SELECT2, 14, 0x3),
+- MFIO_MUX_PIN_GROUP(84, SYS_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
++ MFIO_MUX_PIN_GROUP(84, AUDIO_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
+ PADS_FUNCTION_SELECT2, 16, 0x3),
+- MFIO_MUX_PIN_GROUP(85, WIFI_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
++ MFIO_MUX_PIN_GROUP(85, RPU_V_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
+ PADS_FUNCTION_SELECT2, 18, 0x3),
+- MFIO_MUX_PIN_GROUP(86, BT_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
++ MFIO_MUX_PIN_GROUP(86, RPU_L_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
+ PADS_FUNCTION_SELECT2, 20, 0x3),
+- MFIO_MUX_PIN_GROUP(87, RPU_V_PLL_LOCK, DREQ2, SOCIF_DEBUG,
++ MFIO_MUX_PIN_GROUP(87, SYS_PLL_LOCK, DREQ2, SOCIF_DEBUG,
+ PADS_FUNCTION_SELECT2, 22, 0x3),
+- MFIO_MUX_PIN_GROUP(88, RPU_L_PLL_LOCK, DREQ3, SOCIF_DEBUG,
++ MFIO_MUX_PIN_GROUP(88, WIFI_PLL_LOCK, DREQ3, SOCIF_DEBUG,
+ PADS_FUNCTION_SELECT2, 24, 0x3),
+- MFIO_MUX_PIN_GROUP(89, AUDIO_PLL_LOCK, DREQ4, DREQ5,
++ MFIO_MUX_PIN_GROUP(89, BT_PLL_LOCK, DREQ4, DREQ5,
+ PADS_FUNCTION_SELECT2, 26, 0x3),
+ PIN_GROUP(TCK, "tck"),
+ PIN_GROUP(TRSTN, "trstn"),
+diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c
+index 55083d278bb1..51fbf85301be 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c
++++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c
+@@ -485,12 +485,12 @@ static const struct sunxi_desc_pin sun8i_a23_pins[] = {
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "uart2"), /* RTS */
++ SUNXI_FUNCTION(0x2, "uart1"), /* RTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x4, 2, 8)), /* PG_EINT8 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "uart2"), /* CTS */
++ SUNXI_FUNCTION(0x2, "uart1"), /* CTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x4, 2, 9)), /* PG_EINT9 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
+index 8b381d69df86..584cdedea7a4 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
++++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
+@@ -407,12 +407,12 @@ static const struct sunxi_desc_pin sun8i_a33_pins[] = {
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "uart2"), /* RTS */
++ SUNXI_FUNCTION(0x2, "uart1"), /* RTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x4, 1, 8)), /* PG_EINT8 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+- SUNXI_FUNCTION(0x2, "uart2"), /* CTS */
++ SUNXI_FUNCTION(0x2, "uart1"), /* CTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x4, 1, 9)), /* PG_EINT9 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
+index b5b455614f8a..68d2bae00892 100644
+--- a/drivers/rapidio/devices/tsi721.c
++++ b/drivers/rapidio/devices/tsi721.c
+@@ -1148,7 +1148,7 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
+ } else if (ibw_start < (ib_win->rstart + ib_win->size) &&
+ (ibw_start + ibw_size) > ib_win->rstart) {
+ /* Return error if address translation involved */
+- if (direct && ib_win->xlat) {
++ if (!direct || ib_win->xlat) {
+ ret = -EFAULT;
+ break;
+ }
+diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c
+index 86379a79a6a3..0f50a3f5e05d 100644
+--- a/drivers/tty/serial/8250/8250_mid.c
++++ b/drivers/tty/serial/8250/8250_mid.c
+@@ -154,6 +154,9 @@ static void mid8250_set_termios(struct uart_port *p,
+ unsigned long w = BIT(24) - 1;
+ unsigned long mul, div;
+
++ /* Gracefully handle the B0 case: fall back to B9600 */
++ fuart = fuart ? fuart : 9600 * 16;
++
+ if (mid->board->freq < fuart) {
+ /* Find prescaler value that satisfies Fuart < Fref */
+ if (mid->board->freq > baud)
+diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
+index 8dd250fbd367..e67a46301862 100644
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -1939,6 +1939,43 @@ pci_wch_ch38x_setup(struct serial_private *priv,
+ #define PCI_DEVICE_ID_PERICOM_PI7C9X7954 0x7954
+ #define PCI_DEVICE_ID_PERICOM_PI7C9X7958 0x7958
+
++#define PCI_VENDOR_ID_ACCESIO 0x494f
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB 0x1051
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S 0x1053
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB 0x105C
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S 0x105E
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB 0x1091
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2 0x1093
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB 0x1099
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4 0x109B
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB 0x10D1
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM 0x10D3
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB 0x10DA
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM 0x10DC
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1 0x1108
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2 0x1110
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2 0x1111
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4 0x1118
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4 0x1119
++#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S 0x1152
++#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S 0x115A
++#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2 0x1190
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2 0x1191
++#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4 0x1198
++#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4 0x1199
++#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM 0x11D0
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4 0x105A
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4 0x105B
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8 0x106A
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8 0x106B
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4 0x1098
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8 0x10A9
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM 0x10D9
++#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM 0x10E9
++#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM 0x11D8
++
++
++
+ /* Unknown vendors/cards - this should not be in linux/pci_ids.h */
+ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584 0x1584
+ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588 0x1588
+@@ -5093,6 +5130,108 @@ static struct pci_device_id serial_pci_tbl[] = {
+ 0,
+ 0, pbn_pericom_PI7C9X7958 },
+ /*
++ * ACCES I/O Products quad
++ */
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7954 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM,
++ PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++ pbn_pericom_PI7C9X7958 },
++ /*
+ * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke)
+ */
+ { PCI_VENDOR_ID_TOPIC, PCI_DEVICE_ID_TOPIC_TP560,
+diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
+index dfec5a176315..b93356834bb5 100644
+--- a/drivers/usb/chipidea/udc.c
++++ b/drivers/usb/chipidea/udc.c
+@@ -949,6 +949,15 @@ static int isr_setup_status_phase(struct ci_hdrc *ci)
+ int retval;
+ struct ci_hw_ep *hwep;
+
++ /*
++ * Unexpected USB controller behavior, caused by bad signal integrity
++ * or ground reference problems, can lead to isr_setup_status_phase
++ * being called with ci->status equal to NULL.
++ * If this situation occurs, you should review your USB hardware design.
++ */
++ if (WARN_ON_ONCE(!ci->status))
++ return -EPIPE;
++
+ hwep = (ci->ep0_dir == TX) ? ci->ep0out : ci->ep0in;
+ ci->status->context = ci;
+ ci->status->complete = isr_setup_status_complete;
+diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
+index 15ce4ab11688..a2d90aca779f 100644
+--- a/drivers/usb/core/config.c
++++ b/drivers/usb/core/config.c
+@@ -240,8 +240,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
+ memcpy(&endpoint->desc, d, n);
+ INIT_LIST_HEAD(&endpoint->urb_list);
+
+- /* Fix up bInterval values outside the legal range. Use 32 ms if no
+- * proper value can be guessed. */
++ /*
++ * Fix up bInterval values outside the legal range.
++ * Use 10 or 8 ms if no proper value can be guessed.
++ */
+ i = 0; /* i = min, j = max, n = default */
+ j = 255;
+ if (usb_endpoint_xfer_int(d)) {
+@@ -250,13 +252,15 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
+ case USB_SPEED_SUPER_PLUS:
+ case USB_SPEED_SUPER:
+ case USB_SPEED_HIGH:
+- /* Many device manufacturers are using full-speed
++ /*
++ * Many device manufacturers are using full-speed
+ * bInterval values in high-speed interrupt endpoint
+- * descriptors. Try to fix those and fall back to a
+- * 32 ms default value otherwise. */
++ * descriptors. Try to fix those and fall back to an
++ * 8-ms default value otherwise.
++ */
+ n = fls(d->bInterval*8);
+ if (n == 0)
+- n = 9; /* 32 ms = 2^(9-1) uframes */
++ n = 7; /* 8 ms = 2^(7-1) uframes */
+ j = 16;
+
+ /*
+@@ -271,10 +275,12 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
+ }
+ break;
+ default: /* USB_SPEED_FULL or _LOW */
+- /* For low-speed, 10 ms is the official minimum.
++ /*
++ * For low-speed, 10 ms is the official minimum.
+ * But some "overclocked" devices might want faster
+- * polling so we'll allow it. */
+- n = 32;
++ * polling so we'll allow it.
++ */
++ n = 10;
+ break;
+ }
+ } else if (usb_endpoint_xfer_isoc(d)) {
+@@ -282,10 +288,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
+ j = 16;
+ switch (to_usb_device(ddev)->speed) {
+ case USB_SPEED_HIGH:
+- n = 9; /* 32 ms = 2^(9-1) uframes */
++ n = 7; /* 8 ms = 2^(7-1) uframes */
+ break;
+ default: /* USB_SPEED_FULL */
+- n = 6; /* 32 ms = 2^(6-1) frames */
++ n = 4; /* 8 ms = 2^(4-1) frames */
+ break;
+ }
+ }
+diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
+index 93a3bec81df7..fb8fc34827ab 100644
+--- a/drivers/usb/gadget/udc/renesas_usb3.c
++++ b/drivers/usb/gadget/udc/renesas_usb3.c
+@@ -106,6 +106,7 @@
+
+ /* DRD_CON */
+ #define DRD_CON_PERI_CON BIT(24)
++#define DRD_CON_VBOUT BIT(0)
+
+ /* USB_INT_ENA_1 and USB_INT_STA_1 */
+ #define USB_INT_1_B3_PLLWKUP BIT(31)
+@@ -363,6 +364,7 @@ static void usb3_init_epc_registers(struct renesas_usb3 *usb3)
+ {
+ /* FIXME: How to change host / peripheral mode as well? */
+ usb3_set_bit(usb3, DRD_CON_PERI_CON, USB3_DRD_CON);
++ usb3_clear_bit(usb3, DRD_CON_VBOUT, USB3_DRD_CON);
+
+ usb3_write(usb3, ~0, USB3_USB_INT_STA_1);
+ usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG);
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index bc17bcf57bbd..e262cccbcdb2 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -840,6 +840,10 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
+ spin_lock_irqsave(&xhci->lock, flags);
+
+ ep->stop_cmds_pending--;
++ if (xhci->xhc_state & XHCI_STATE_REMOVING) {
++ spin_unlock_irqrestore(&xhci->lock, flags);
++ return;
++ }
+ if (xhci->xhc_state & XHCI_STATE_DYING) {
+ xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
+ "Stop EP timer ran, but another timer marked "
+@@ -893,7 +897,7 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
+ "Calling usb_hc_died()");
+- usb_hc_died(xhci_to_hcd(xhci)->primary_hcd);
++ usb_hc_died(xhci_to_hcd(xhci));
+ xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
+ "xHCI host controller is dead.");
+ }
+diff --git a/drivers/usb/renesas_usbhs/mod.c b/drivers/usb/renesas_usbhs/mod.c
+index d4be5d594896..28965ef4f824 100644
+--- a/drivers/usb/renesas_usbhs/mod.c
++++ b/drivers/usb/renesas_usbhs/mod.c
+@@ -282,9 +282,16 @@ static irqreturn_t usbhs_interrupt(int irq, void *data)
+ if (usbhs_mod_is_host(priv))
+ usbhs_write(priv, INTSTS1, ~irq_state.intsts1 & INTSTS1_MAGIC);
+
+- usbhs_write(priv, BRDYSTS, ~irq_state.brdysts);
++ /*
++ * The driver should not clear the xxxSTS after the line of
++ * "call irq callback functions" because each "if" statement is
++ * possible to call the callback function for avoiding any side effects.
++ */
++ if (irq_state.intsts0 & BRDY)
++ usbhs_write(priv, BRDYSTS, ~irq_state.brdysts);
+ usbhs_write(priv, NRDYSTS, ~irq_state.nrdysts);
+- usbhs_write(priv, BEMPSTS, ~irq_state.bempsts);
++ if (irq_state.intsts0 & BEMP)
++ usbhs_write(priv, BEMPSTS, ~irq_state.bempsts);
+
+ /*
+ * call irq callback functions
+diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c
+index a204782ae530..e98b6e57b703 100644
+--- a/drivers/usb/serial/usb-serial-simple.c
++++ b/drivers/usb/serial/usb-serial-simple.c
+@@ -54,7 +54,8 @@ DEVICE(funsoft, FUNSOFT_IDS);
+ /* Infineon Flashloader driver */
+ #define FLASHLOADER_IDS() \
+ { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \
+- { USB_DEVICE(0x8087, 0x0716) }
++ { USB_DEVICE(0x8087, 0x0716) }, \
++ { USB_DEVICE(0x8087, 0x0801) }
+ DEVICE(flashloader, FLASHLOADER_IDS);
+
+ /* Google Serial USB SubClass */
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index c05f69a8ec42..542379f8feea 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -2851,6 +2851,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+
+ if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
+ blk_finish_plug(&plug);
++ list_del_init(&root_log_ctx.list);
+ mutex_unlock(&log_root_tree->log_mutex);
+ ret = root_log_ctx.log_ret;
+ goto out;
+diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
+index 37c134a132c7..cc543fea5d1e 100644
+--- a/fs/devpts/inode.c
++++ b/fs/devpts/inode.c
+@@ -584,7 +584,8 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
+ */
+ void *devpts_get_priv(struct dentry *dentry)
+ {
+- WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC);
++ if (dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC)
++ return NULL;
+ return dentry->d_fsdata;
+ }
+
+diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
+index 28cc412852af..64eddc5289eb 100644
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -782,7 +782,13 @@ resizefs_out:
+ goto encryption_policy_out;
+ }
+
++ err = mnt_want_write_file(filp);
++ if (err)
++ goto encryption_policy_out;
++
+ err = ext4_process_policy(&policy, inode);
++
++ mnt_drop_write_file(filp);
+ encryption_policy_out:
+ return err;
+ #else
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index 6cac3dc33521..76962a349d57 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -540,13 +540,13 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
+ req->out.args[0].size = count;
+ }
+
+-static void fuse_release_user_pages(struct fuse_req *req, int write)
++static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
+ {
+ unsigned i;
+
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+- if (write)
++ if (should_dirty)
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+@@ -1331,6 +1331,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+ loff_t *ppos, int flags)
+ {
+ int write = flags & FUSE_DIO_WRITE;
++ bool should_dirty = !write && iter_is_iovec(iter);
+ int cuse = flags & FUSE_DIO_CUSE;
+ struct file *file = io->file;
+ struct inode *inode = file->f_mapping->host;
+@@ -1374,7 +1375,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+ nres = fuse_send_read(req, io, pos, nbytes, owner);
+
+ if (!io->async)
+- fuse_release_user_pages(req, !write);
++ fuse_release_user_pages(req, should_dirty);
+ if (req->out.h.error) {
+ err = req->out.h.error;
+ break;
+diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
+index e1574008adc9..2bcb86e6e6ca 100644
+--- a/fs/kernfs/file.c
++++ b/fs/kernfs/file.c
+@@ -840,21 +840,35 @@ repeat:
+ mutex_lock(&kernfs_mutex);
+
+ list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
++ struct kernfs_node *parent;
+ struct inode *inode;
+- struct dentry *dentry;
+
++ /*
++ * We want fsnotify_modify() on @kn but as the
++ * modifications aren't originating from userland don't
++ * have the matching @file available. Look up the inodes
++ * and generate the events manually.
++ */
+ inode = ilookup(info->sb, kn->ino);
+ if (!inode)
+ continue;
+
+- dentry = d_find_any_alias(inode);
+- if (dentry) {
+- fsnotify_parent(NULL, dentry, FS_MODIFY);
+- fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
+- NULL, 0);
+- dput(dentry);
++ parent = kernfs_get_parent(kn);
++ if (parent) {
++ struct inode *p_inode;
++
++ p_inode = ilookup(info->sb, parent->ino);
++ if (p_inode) {
++ fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD,
++ inode, FSNOTIFY_EVENT_INODE, kn->name, 0);
++ iput(p_inode);
++ }
++
++ kernfs_put(parent);
+ }
+
++ fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
++ kn->name, 0);
+ iput(inode);
+ }
+
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index a7f2e6e33305..52a28311e2a4 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -275,6 +275,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
+ err_socks:
+ svc_rpcb_cleanup(serv, net);
+ err_bind:
++ nn->cb_users[minorversion]--;
+ dprintk("NFS: Couldn't create callback socket: err = %d; "
+ "net = %p\n", ret, net);
+ return ret;
+diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
+index aaa2e8d3df6f..8cfa21f40acd 100644
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -430,11 +430,8 @@ static bool referring_call_exists(struct nfs_client *clp,
+ ((u32 *)&rclist->rcl_sessionid.data)[3],
+ ref->rc_sequenceid, ref->rc_slotid);
+
+- spin_lock(&tbl->slot_tbl_lock);
+- status = (test_bit(ref->rc_slotid, tbl->used_slots) &&
+- tbl->slots[ref->rc_slotid].seq_nr ==
++ status = nfs4_slot_seqid_in_use(tbl, ref->rc_slotid,
+ ref->rc_sequenceid);
+- spin_unlock(&tbl->slot_tbl_lock);
+ if (status)
+ goto out;
+ }
+diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
+index 0e8018bc9880..6da14aedde2b 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayout.c
++++ b/fs/nfs/flexfilelayout/flexfilelayout.c
+@@ -806,11 +806,14 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
+ {
+ struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
+ struct nfs4_pnfs_ds *ds;
++ bool fail_return = false;
+ int idx;
+
+ /* mirrors are sorted by efficiency */
+ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
+- ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
++ if (idx+1 == fls->mirror_array_cnt)
++ fail_return = true;
++ ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
+ if (ds) {
+ *best_idx = idx;
+ return ds;
+@@ -859,6 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
+ struct nfs4_pnfs_ds *ds;
+ int ds_idx;
+
++retry:
+ /* Use full layout for now */
+ if (!pgio->pg_lseg)
+ ff_layout_pg_get_read(pgio, req, false);
+@@ -871,10 +875,13 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
+
+ ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
+ if (!ds) {
+- if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
+- goto out_pnfs;
+- else
++ if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
+ goto out_mds;
++ pnfs_put_lseg(pgio->pg_lseg);
++ pgio->pg_lseg = NULL;
++ /* Sleep for 1 second before retrying */
++ ssleep(1);
++ goto retry;
+ }
+
+ mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
+@@ -890,12 +897,6 @@ out_mds:
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ nfs_pageio_reset_read_mds(pgio);
+- return;
+-
+-out_pnfs:
+- pnfs_set_lo_fail(pgio->pg_lseg);
+- pnfs_put_lseg(pgio->pg_lseg);
+- pgio->pg_lseg = NULL;
+ }
+
+ static void
+@@ -909,6 +910,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
+ int i;
+ int status;
+
++retry:
+ if (!pgio->pg_lseg) {
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+@@ -940,10 +942,13 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
+ for (i = 0; i < pgio->pg_mirror_count; i++) {
+ ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
+ if (!ds) {
+- if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
+- goto out_pnfs;
+- else
++ if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
+ goto out_mds;
++ pnfs_put_lseg(pgio->pg_lseg);
++ pgio->pg_lseg = NULL;
++ /* Sleep for 1 second before retrying */
++ ssleep(1);
++ goto retry;
+ }
+ pgm = &pgio->pg_mirrors[i];
+ mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
+@@ -956,12 +961,6 @@ out_mds:
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ nfs_pageio_reset_write_mds(pgio);
+- return;
+-
+-out_pnfs:
+- pnfs_set_lo_fail(pgio->pg_lseg);
+- pnfs_put_lseg(pgio->pg_lseg);
+- pgio->pg_lseg = NULL;
+ }
+
+ static unsigned int
+diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+index 0aa36be71fce..ae5e15fd1258 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+@@ -379,7 +379,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
+
+ devid = &mirror->mirror_ds->id_node;
+ if (ff_layout_test_devid_unavailable(devid))
+- goto out;
++ goto out_fail;
+
+ ds = mirror->mirror_ds->ds;
+ /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
+@@ -405,15 +405,16 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
+ mirror->mirror_ds->ds_versions[0].rsize = max_payload;
+ if (mirror->mirror_ds->ds_versions[0].wsize > max_payload)
+ mirror->mirror_ds->ds_versions[0].wsize = max_payload;
+- } else {
+- ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
+- mirror, lseg->pls_range.offset,
+- lseg->pls_range.length, NFS4ERR_NXIO,
+- OP_ILLEGAL, GFP_NOIO);
+- if (fail_return || !ff_layout_has_available_ds(lseg))
+- pnfs_error_mark_layout_for_return(ino, lseg);
+- ds = NULL;
++ goto out;
+ }
++ ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
++ mirror, lseg->pls_range.offset,
++ lseg->pls_range.length, NFS4ERR_NXIO,
++ OP_ILLEGAL, GFP_NOIO);
++out_fail:
++ if (fail_return || !ff_layout_has_available_ds(lseg))
++ pnfs_error_mark_layout_for_return(ino, lseg);
++ ds = NULL;
+ out:
+ return ds;
+ }
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index 7796beacdefb..e2320c643107 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -7509,12 +7509,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
+ status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ trace_nfs4_create_session(clp, status);
+
++ switch (status) {
++ case -NFS4ERR_STALE_CLIENTID:
++ case -NFS4ERR_DELAY:
++ case -ETIMEDOUT:
++ case -EACCES:
++ case -EAGAIN:
++ goto out;
++ };
++
++ clp->cl_seqid++;
+ if (!status) {
+ /* Verify the session's negotiated channel_attrs values */
+ status = nfs4_verify_channel_attrs(&args, &res);
+ /* Increment the clientid slot sequence id */
+- if (clp->cl_seqid == res.seqid)
+- clp->cl_seqid++;
+ if (status)
+ goto out;
+ nfs4_update_session(session, &res);
+diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
+index 332d06e64fa9..c1f4c208f38a 100644
+--- a/fs/nfs/nfs4session.c
++++ b/fs/nfs/nfs4session.c
+@@ -172,6 +172,39 @@ struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid)
+ return ERR_PTR(-E2BIG);
+ }
+
++static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid,
++ u32 *seq_nr)
++ __must_hold(&tbl->slot_tbl_lock)
++{
++ struct nfs4_slot *slot;
++
++ slot = nfs4_lookup_slot(tbl, slotid);
++ if (IS_ERR(slot))
++ return PTR_ERR(slot);
++ *seq_nr = slot->seq_nr;
++ return 0;
++}
++
++/*
++ * nfs4_slot_seqid_in_use - test if a slot sequence id is still in use
++ *
++ * Given a slot table, slot id and sequence number, determine if the
++ * RPC call in question is still in flight. This function is mainly
++ * intended for use by the callback channel.
++ */
++bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr)
++{
++ u32 cur_seq;
++ bool ret = false;
++
++ spin_lock(&tbl->slot_tbl_lock);
++ if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 &&
++ cur_seq == seq_nr && test_bit(slotid, tbl->used_slots))
++ ret = true;
++ spin_unlock(&tbl->slot_tbl_lock);
++ return ret;
++}
++
+ /*
+ * nfs4_alloc_slot - efficiently look for a free slot
+ *
+diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
+index 5b51298d1d03..33cace62b50b 100644
+--- a/fs/nfs/nfs4session.h
++++ b/fs/nfs/nfs4session.h
+@@ -78,6 +78,7 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
+ extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
+ extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
+ extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid);
++extern bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr);
+ extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
+ extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
+ extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
+diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
+index 7d992362ff04..229fa6139e0a 100644
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -876,6 +876,9 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
+ static bool
+ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+ {
++ /* Serialise LAYOUTGET/LAYOUTRETURN */
++ if (atomic_read(&lo->plh_outstanding) != 0)
++ return false;
+ if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+ return false;
+ lo->plh_return_iomode = 0;
+@@ -1527,6 +1530,7 @@ pnfs_update_layout(struct inode *ino,
+ }
+
+ lookup_again:
++ nfs4_client_recover_expired_lease(clp);
+ first = false;
+ spin_lock(&ino->i_lock);
+ lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 806eda192d1c..6a230984086b 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1200,27 +1200,6 @@ free_ol_stateid_reaplist(struct list_head *reaplist)
+ }
+ }
+
+-static void release_lockowner(struct nfs4_lockowner *lo)
+-{
+- struct nfs4_client *clp = lo->lo_owner.so_client;
+- struct nfs4_ol_stateid *stp;
+- struct list_head reaplist;
+-
+- INIT_LIST_HEAD(&reaplist);
+-
+- spin_lock(&clp->cl_lock);
+- unhash_lockowner_locked(lo);
+- while (!list_empty(&lo->lo_owner.so_stateids)) {
+- stp = list_first_entry(&lo->lo_owner.so_stateids,
+- struct nfs4_ol_stateid, st_perstateowner);
+- WARN_ON(!unhash_lock_stateid(stp));
+- put_ol_stateid_locked(stp, &reaplist);
+- }
+- spin_unlock(&clp->cl_lock);
+- free_ol_stateid_reaplist(&reaplist);
+- nfs4_put_stateowner(&lo->lo_owner);
+-}
+-
+ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
+ struct list_head *reaplist)
+ {
+@@ -5976,6 +5955,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ __be32 status;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfs4_client *clp;
++ LIST_HEAD (reaplist);
+
+ dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
+ clid->cl_boot, clid->cl_id);
+@@ -6006,9 +5986,23 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ nfs4_get_stateowner(sop);
+ break;
+ }
++ if (!lo) {
++ spin_unlock(&clp->cl_lock);
++ return status;
++ }
++
++ unhash_lockowner_locked(lo);
++ while (!list_empty(&lo->lo_owner.so_stateids)) {
++ stp = list_first_entry(&lo->lo_owner.so_stateids,
++ struct nfs4_ol_stateid,
++ st_perstateowner);
++ WARN_ON(!unhash_lock_stateid(stp));
++ put_ol_stateid_locked(stp, &reaplist);
++ }
+ spin_unlock(&clp->cl_lock);
+- if (lo)
+- release_lockowner(lo);
++ free_ol_stateid_reaplist(&reaplist);
++ nfs4_put_stateowner(&lo->lo_owner);
++
+ return status;
+ }
+
+diff --git a/fs/proc/base.c b/fs/proc/base.c
+index a11eb7196ec8..7583df74d0c8 100644
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -1552,18 +1552,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
+ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
+ {
+ struct task_struct *task;
+- struct mm_struct *mm;
+ struct file *exe_file;
+
+ task = get_proc_task(d_inode(dentry));
+ if (!task)
+ return -ENOENT;
+- mm = get_task_mm(task);
++ exe_file = get_task_exe_file(task);
+ put_task_struct(task);
+- if (!mm)
+- return -ENOENT;
+- exe_file = get_mm_exe_file(mm);
+- mmput(mm);
+ if (exe_file) {
+ *exe_path = exe_file->f_path;
+ path_get(&exe_file->f_path);
+diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
+index 1bfa602958f2..32901d11f8c4 100644
+--- a/include/asm-generic/uaccess.h
++++ b/include/asm-generic/uaccess.h
+@@ -230,14 +230,18 @@ extern int __put_user_bad(void) __attribute__((noreturn));
+ might_fault(); \
+ access_ok(VERIFY_READ, __p, sizeof(*ptr)) ? \
+ __get_user((x), (__typeof__(*(ptr)) *)__p) : \
+- -EFAULT; \
++ ((x) = (__typeof__(*(ptr)))0,-EFAULT); \
+ })
+
+ #ifndef __get_user_fn
+ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
+ {
+- size = __copy_from_user(x, ptr, size);
+- return size ? -EFAULT : size;
++ size_t n = __copy_from_user(x, ptr, size);
++ if (unlikely(n)) {
++ memset(x + (size - n), 0, n);
++ return -EFAULT;
++ }
++ return 0;
+ }
+
+ #define __get_user_fn(sz, u, k) __get_user_fn(sz, u, k)
+@@ -257,11 +261,13 @@ extern int __get_user_bad(void) __attribute__((noreturn));
+ static inline long copy_from_user(void *to,
+ const void __user * from, unsigned long n)
+ {
++ unsigned long res = n;
+ might_fault();
+- if (access_ok(VERIFY_READ, from, n))
+- return __copy_from_user(to, from, n);
+- else
+- return n;
++ if (likely(access_ok(VERIFY_READ, from, n)))
++ res = __copy_from_user(to, from, n);
++ if (unlikely(res))
++ memset(to + (n - res), 0, res);
++ return res;
+ }
+
+ static inline long copy_to_user(void __user *to,
+diff --git a/include/linux/efi.h b/include/linux/efi.h
+index f196dd0b0f2f..17fd2c5bf81f 100644
+--- a/include/linux/efi.h
++++ b/include/linux/efi.h
+@@ -118,6 +118,15 @@ typedef struct {
+ u32 imagesize;
+ } efi_capsule_header_t;
+
++struct efi_boot_memmap {
++ efi_memory_desc_t **map;
++ unsigned long *map_size;
++ unsigned long *desc_size;
++ u32 *desc_ver;
++ unsigned long *key_ptr;
++ unsigned long *buff_size;
++};
++
+ /*
+ * EFI capsule flags
+ */
+@@ -1005,7 +1014,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm,
+ /* Iterate through an efi_memory_map */
+ #define for_each_efi_memory_desc_in_map(m, md) \
+ for ((md) = (m)->map; \
+- ((void *)(md) + (m)->desc_size) <= (m)->map_end; \
++ (md) && ((void *)(md) + (m)->desc_size) <= (m)->map_end; \
+ (md) = (void *)(md) + (m)->desc_size)
+
+ /**
+@@ -1430,11 +1439,7 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+ efi_loaded_image_t *image, int *cmd_line_len);
+
+ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
+- efi_memory_desc_t **map,
+- unsigned long *map_size,
+- unsigned long *desc_size,
+- u32 *desc_ver,
+- unsigned long *key_ptr);
++ struct efi_boot_memmap *map);
+
+ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
+ unsigned long size, unsigned long align,
+@@ -1465,4 +1470,15 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
+ unsigned long size);
+
+ bool efi_runtime_disabled(void);
++
++typedef efi_status_t (*efi_exit_boot_map_processing)(
++ efi_system_table_t *sys_table_arg,
++ struct efi_boot_memmap *map,
++ void *priv);
++
++efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table,
++ void *handle,
++ struct efi_boot_memmap *map,
++ void *priv,
++ efi_exit_boot_map_processing priv_func);
+ #endif /* _LINUX_EFI_H */
+diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h
+index 5198f8ed08a4..c97eab67558f 100644
+--- a/include/linux/iio/sw_trigger.h
++++ b/include/linux/iio/sw_trigger.h
+@@ -62,7 +62,7 @@ void iio_swt_group_init_type_name(struct iio_sw_trigger *t,
+ const char *name,
+ struct config_item_type *type)
+ {
+-#ifdef CONFIG_CONFIGFS_FS
++#if IS_ENABLED(CONFIG_CONFIGFS_FS)
+ config_group_init_type_name(&t->group, name, type);
+ #endif
+ }
+diff --git a/include/linux/irq.h b/include/linux/irq.h
+index 4d758a7c604a..cbb5a2c5dcb0 100644
+--- a/include/linux/irq.h
++++ b/include/linux/irq.h
+@@ -933,6 +933,16 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
+ static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
+ #endif
+
++/*
++ * The irqsave variants are for usage in non interrupt code. Do not use
++ * them in irq_chip callbacks. Use irq_gc_lock() instead.
++ */
++#define irq_gc_lock_irqsave(gc, flags) \
++ raw_spin_lock_irqsave(&(gc)->lock, flags)
++
++#define irq_gc_unlock_irqrestore(gc, flags) \
++ raw_spin_unlock_irqrestore(&(gc)->lock, flags)
++
+ static inline void irq_reg_writel(struct irq_chip_generic *gc,
+ u32 val, int reg_offset)
+ {
+diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
+index 4429d255c8ab..5e5b2969d931 100644
+--- a/include/linux/mempolicy.h
++++ b/include/linux/mempolicy.h
+@@ -195,6 +195,7 @@ static inline bool vma_migratable(struct vm_area_struct *vma)
+ }
+
+ extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
++extern void mpol_put_task_policy(struct task_struct *);
+
+ #else
+
+@@ -297,5 +298,8 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+ return -1; /* no node preference */
+ }
+
++static inline void mpol_put_task_policy(struct task_struct *task)
++{
++}
+ #endif /* CONFIG_NUMA */
+ #endif
+diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
+index 1fd50dcfe47c..175c82699e9d 100644
+--- a/include/linux/mfd/ti_am335x_tscadc.h
++++ b/include/linux/mfd/ti_am335x_tscadc.h
+@@ -138,16 +138,16 @@
+ /*
+ * time in us for processing a single channel, calculated as follows:
+ *
+- * num cycles = open delay + (sample delay + conv time) * averaging
++ * max num cycles = open delay + (sample delay + conv time) * averaging
+ *
+- * num cycles: 152 + (1 + 13) * 16 = 376
++ * max num cycles: 262143 + (255 + 13) * 16 = 266431
+ *
+ * clock frequency: 26MHz / 8 = 3.25MHz
+ * clock period: 1 / 3.25MHz = 308ns
+ *
+- * processing time: 376 * 308ns = 116us
++ * max processing time: 266431 * 308ns = 83ms(approx)
+ */
+-#define IDLE_TIMEOUT 116 /* microsec */
++#define IDLE_TIMEOUT 83 /* milliseconds */
+
+ #define TSCADC_CELLS 2
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index ece042dfe23c..317564b11dc7 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1975,6 +1975,7 @@ extern void mm_drop_all_locks(struct mm_struct *mm);
+
+ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
+ extern struct file *get_mm_exe_file(struct mm_struct *mm);
++extern struct file *get_task_exe_file(struct task_struct *task);
+
+ extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
+ extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index da4b33bea982..4f0e6fb39a36 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3225,6 +3225,7 @@ static inline void napi_free_frags(struct napi_struct *napi)
+ napi->skb = NULL;
+ }
+
++bool netdev_is_rx_handler_busy(struct net_device *dev);
+ int netdev_rx_handler_register(struct net_device *dev,
+ rx_handler_func_t *rx_handler,
+ void *rx_handler_data);
+diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h
+index 76199b75d584..e302c447e057 100644
+--- a/include/linux/smc91x.h
++++ b/include/linux/smc91x.h
+@@ -1,6 +1,16 @@
+ #ifndef __SMC91X_H__
+ #define __SMC91X_H__
+
++/*
++ * These bits define which access sizes a platform can support, rather
++ * than the maximal access size. So, if your platform can do 16-bit
++ * and 32-bit accesses to the SMC91x device, but not 8-bit, set both
++ * SMC91X_USE_16BIT and SMC91X_USE_32BIT.
++ *
++ * The SMC91x driver requires at least one of SMC91X_USE_8BIT or
++ * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is
++ * an invalid configuration.
++ */
+ #define SMC91X_USE_8BIT (1 << 0)
+ #define SMC91X_USE_16BIT (1 << 1)
+ #define SMC91X_USE_32BIT (1 << 2)
+diff --git a/include/linux/uio.h b/include/linux/uio.h
+index 1b5d1cd796e2..75b4aaf31a9d 100644
+--- a/include/linux/uio.h
++++ b/include/linux/uio.h
+@@ -76,7 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
+ struct iov_iter *i, unsigned long offset, size_t bytes);
+ void iov_iter_advance(struct iov_iter *i, size_t bytes);
+ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
+-int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes);
++#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable
+ size_t iov_iter_single_seg_count(const struct iov_iter *i);
+ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+ struct iov_iter *i);
+diff --git a/include/net/af_unix.h b/include/net/af_unix.h
+index 9b4c418bebd8..fd60eccb59a6 100644
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -52,7 +52,7 @@ struct unix_sock {
+ struct sock sk;
+ struct unix_address *addr;
+ struct path path;
+- struct mutex readlock;
++ struct mutex iolock, bindlock;
+ struct sock *peer;
+ struct list_head link;
+ atomic_long_t inflight;
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 0bcc70f4e1fb..725405170f0e 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1522,6 +1522,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli
+ {
+ if (sk->sk_send_head == skb_unlinked)
+ sk->sk_send_head = NULL;
++ if (tcp_sk(sk)->highest_sack == skb_unlinked)
++ tcp_sk(sk)->highest_sack = NULL;
+ }
+
+ static inline void tcp_init_send_head(struct sock *sk)
+diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
+index d6709eb70970..0d302a87f21b 100644
+--- a/kernel/audit_watch.c
++++ b/kernel/audit_watch.c
+@@ -19,6 +19,7 @@
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
++#include <linux/file.h>
+ #include <linux/kernel.h>
+ #include <linux/audit.h>
+ #include <linux/kthread.h>
+@@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
+ unsigned long ino;
+ dev_t dev;
+
+- rcu_read_lock();
+- exe_file = rcu_dereference(tsk->mm->exe_file);
++ exe_file = get_task_exe_file(tsk);
++ if (!exe_file)
++ return 0;
+ ino = exe_file->f_inode->i_ino;
+ dev = exe_file->f_inode->i_sb->s_dev;
+- rcu_read_unlock();
++ fput(exe_file);
+ return audit_mark_compare(mark, ino, dev);
+ }
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index eec9f90ba030..6d011c693f67 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -194,6 +194,7 @@ struct verifier_env {
+ struct verifier_state_list **explored_states; /* search pruning optimization */
+ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+ u32 used_map_cnt; /* number of used maps */
++ u32 id_gen; /* used to generate unique reg IDs */
+ bool allow_ptr_leaks;
+ };
+
+@@ -1277,7 +1278,7 @@ add_imm:
+ /* dst_reg stays as pkt_ptr type and since some positive
+ * integer value was added to the pointer, increment its 'id'
+ */
+- dst_reg->id++;
++ dst_reg->id = ++env->id_gen;
+
+ /* something was added to pkt_ptr, set range and off to zero */
+ dst_reg->off = 0;
+diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
+index c2de56ab0fce..7fa0c4ae6394 100644
+--- a/kernel/configs/tiny.config
++++ b/kernel/configs/tiny.config
+@@ -1,4 +1,12 @@
++# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
+ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
++# CONFIG_KERNEL_GZIP is not set
++# CONFIG_KERNEL_BZIP2 is not set
++# CONFIG_KERNEL_LZMA is not set
+ CONFIG_KERNEL_XZ=y
++# CONFIG_KERNEL_LZO is not set
++# CONFIG_KERNEL_LZ4 is not set
+ CONFIG_OPTIMIZE_INLINING=y
++# CONFIG_SLAB is not set
++# CONFIG_SLUB is not set
+ CONFIG_SLOB=y
+diff --git a/kernel/cpuset.c b/kernel/cpuset.c
+index 73e93e53884d..40b6ed559448 100644
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2078,6 +2078,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+ mutex_unlock(&cpuset_mutex);
+ }
+
++/*
++ * Make sure the new task conform to the current state of its parent,
++ * which could have been changed by cpuset just after it inherits the
++ * state from the parent and before it sits on the cgroup's task list.
++ */
++void cpuset_fork(struct task_struct *task)
++{
++ if (task_css_is_root(task, cpuset_cgrp_id))
++ return;
++
++ set_cpus_allowed_ptr(task, ¤t->cpus_allowed);
++ task->mems_allowed = current->mems_allowed;
++}
++
+ struct cgroup_subsys cpuset_cgrp_subsys = {
+ .css_alloc = cpuset_css_alloc,
+ .css_online = cpuset_css_online,
+@@ -2088,6 +2102,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
+ .attach = cpuset_attach,
+ .post_attach = cpuset_post_attach,
+ .bind = cpuset_bind,
++ .fork = cpuset_fork,
+ .legacy_cftypes = files,
+ .early_init = true,
+ };
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 9e6e1356e6bb..26a766a7e07e 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -768,12 +768,7 @@ void do_exit(long code)
+ TASKS_RCU(preempt_enable());
+ exit_notify(tsk, group_dead);
+ proc_exit_connector(tsk);
+-#ifdef CONFIG_NUMA
+- task_lock(tsk);
+- mpol_put(tsk->mempolicy);
+- tsk->mempolicy = NULL;
+- task_unlock(tsk);
+-#endif
++ mpol_put_task_policy(tsk);
+ #ifdef CONFIG_FUTEX
+ if (unlikely(current->pi_state_cache))
+ kfree(current->pi_state_cache);
+diff --git a/kernel/fork.c b/kernel/fork.c
+index aea4f4da3836..74fd39079031 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -801,6 +801,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
+ EXPORT_SYMBOL(get_mm_exe_file);
+
+ /**
++ * get_task_exe_file - acquire a reference to the task's executable file
++ *
++ * Returns %NULL if task's mm (if any) has no associated executable file or
++ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
++ * User must release file via fput().
++ */
++struct file *get_task_exe_file(struct task_struct *task)
++{
++ struct file *exe_file = NULL;
++ struct mm_struct *mm;
++
++ task_lock(task);
++ mm = task->mm;
++ if (mm) {
++ if (!(task->flags & PF_KTHREAD))
++ exe_file = get_mm_exe_file(mm);
++ }
++ task_unlock(task);
++ return exe_file;
++}
++EXPORT_SYMBOL(get_task_exe_file);
++
++/**
+ * get_task_mm - acquire a reference to the task's mm
+ *
+ * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
+diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
+index 0afe671f1770..6143b2f64b95 100644
+--- a/kernel/irq/msi.c
++++ b/kernel/irq/msi.c
+@@ -352,6 +352,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
+ ops->msi_finish(&arg, 0);
+
+ for_each_msi_entry(desc, dev) {
++ virq = desc->irq;
+ if (desc->nvec_used == 1)
+ dev_dbg(dev, "irq %d for MSI\n", virq);
+ else
+diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
+index 503bc2d348e5..037c321c5618 100644
+--- a/kernel/kexec_file.c
++++ b/kernel/kexec_file.c
+@@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
+ return 0;
+ out:
+ vfree(pi->sechdrs);
++ pi->sechdrs = NULL;
++
+ vfree(pi->purgatory_buf);
++ pi->purgatory_buf = NULL;
+ return ret;
+ }
+
+diff --git a/kernel/memremap.c b/kernel/memremap.c
+index 017532193fb1..c2eb3a057764 100644
+--- a/kernel/memremap.c
++++ b/kernel/memremap.c
+@@ -253,6 +253,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
+ align_start = res->start & ~(SECTION_SIZE - 1);
+ align_size = ALIGN(resource_size(res), SECTION_SIZE);
+ arch_remove_memory(align_start, align_size);
++ untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ pgmap_radix_release(res);
+ dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
+ "%s: failed to free all reserved pages\n", __func__);
+@@ -288,6 +289,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
+ struct percpu_ref *ref, struct vmem_altmap *altmap)
+ {
+ resource_size_t key, align_start, align_size, align_end;
++ pgprot_t pgprot = PAGE_KERNEL;
+ struct dev_pagemap *pgmap;
+ struct page_map *page_map;
+ int error, nid, is_ram;
+@@ -363,6 +365,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
+ if (nid < 0)
+ nid = numa_mem_id();
+
++ error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
++ align_size);
++ if (error)
++ goto err_pfn_remap;
++
+ error = arch_add_memory(nid, align_start, align_size, true);
+ if (error)
+ goto err_add_memory;
+@@ -383,6 +390,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
+ return __va(res->start);
+
+ err_add_memory:
++ untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
++ err_pfn_remap:
+ err_radix:
+ pgmap_radix_release(res);
+ devres_free(page_map);
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 97ee9ac7e97c..38eacc323fdd 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2015,6 +2015,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+ success = 1; /* we're going to change ->state */
+ cpu = task_cpu(p);
+
++ /*
++ * Ensure we load p->on_rq _after_ p->state, otherwise it would
++ * be possible to, falsely, observe p->on_rq == 0 and get stuck
++ * in smp_cond_load_acquire() below.
++ *
++ * sched_ttwu_pending() try_to_wake_up()
++ * [S] p->on_rq = 1; [L] P->state
++ * UNLOCK rq->lock -----.
++ * \
++ * +--- RMB
++ * schedule() /
++ * LOCK rq->lock -----'
++ * UNLOCK rq->lock
++ *
++ * [task p]
++ * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq
++ *
++ * Pairs with the UNLOCK+LOCK on rq->lock from the
++ * last wakeup of our task and the schedule that got our task
++ * current.
++ */
++ smp_rmb();
+ if (p->on_rq && ttwu_remote(p, wake_flags))
+ goto stat;
+
+diff --git a/lib/iov_iter.c b/lib/iov_iter.c
+index 0cd522753ff5..eaaf73032441 100644
+--- a/lib/iov_iter.c
++++ b/lib/iov_iter.c
+@@ -302,33 +302,13 @@ done:
+ }
+
+ /*
+- * Fault in the first iovec of the given iov_iter, to a maximum length
+- * of bytes. Returns 0 on success, or non-zero if the memory could not be
+- * accessed (ie. because it is an invalid address).
+- *
+- * writev-intensive code may want this to prefault several iovecs -- that
+- * would be possible (callers must not rely on the fact that _only_ the
+- * first iovec will be faulted with the current implementation).
+- */
+-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+-{
+- if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
+- char __user *buf = i->iov->iov_base + i->iov_offset;
+- bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+- return fault_in_pages_readable(buf, bytes);
+- }
+- return 0;
+-}
+-EXPORT_SYMBOL(iov_iter_fault_in_readable);
+-
+-/*
+ * Fault in one or more iovecs of the given iov_iter, to a maximum length of
+ * bytes. For each iovec, fault in each page that constitutes the iovec.
+ *
+ * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
+ * because it is an invalid address).
+ */
+-int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
++int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+ {
+ size_t skip = i->iov_offset;
+ const struct iovec *iov;
+@@ -345,7 +325,7 @@ int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+ }
+ return 0;
+ }
+-EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
++EXPORT_SYMBOL(iov_iter_fault_in_readable);
+
+ void iov_iter_init(struct iov_iter *i, int direction,
+ const struct iovec *iov, unsigned long nr_segs,
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index 297d6854f849..e682861215b0 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -2334,6 +2334,23 @@ out:
+ return ret;
+ }
+
++/*
++ * Drop the (possibly final) reference to task->mempolicy. It needs to be
++ * dropped after task->mempolicy is set to NULL so that any allocation done as
++ * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed
++ * policy.
++ */
++void mpol_put_task_policy(struct task_struct *task)
++{
++ struct mempolicy *pol;
++
++ task_lock(task);
++ pol = task->mempolicy;
++ task->mempolicy = NULL;
++ task_unlock(task);
++ mpol_put(pol);
++}
++
+ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
+ {
+ pr_debug("deleting %lx-l%lx\n", n->start, n->end);
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 8b3e1341b754..6e354199151b 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3254,53 +3254,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
+ return NULL;
+ }
+
+-static inline bool
+-should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
+- enum compact_result compact_result, enum migrate_mode *migrate_mode,
+- int compaction_retries)
+-{
+- int max_retries = MAX_COMPACT_RETRIES;
+-
+- if (!order)
+- return false;
+-
+- /*
+- * compaction considers all the zone as desperately out of memory
+- * so it doesn't really make much sense to retry except when the
+- * failure could be caused by weak migration mode.
+- */
+- if (compaction_failed(compact_result)) {
+- if (*migrate_mode == MIGRATE_ASYNC) {
+- *migrate_mode = MIGRATE_SYNC_LIGHT;
+- return true;
+- }
+- return false;
+- }
+-
+- /*
+- * make sure the compaction wasn't deferred or didn't bail out early
+- * due to locks contention before we declare that we should give up.
+- * But do not retry if the given zonelist is not suitable for
+- * compaction.
+- */
+- if (compaction_withdrawn(compact_result))
+- return compaction_zonelist_suitable(ac, order, alloc_flags);
+-
+- /*
+- * !costly requests are much more important than __GFP_REPEAT
+- * costly ones because they are de facto nofail and invoke OOM
+- * killer to move on while costly can fail and users are ready
+- * to cope with that. 1/4 retries is rather arbitrary but we
+- * would need much more detailed feedback from compaction to
+- * make a better decision.
+- */
+- if (order > PAGE_ALLOC_COSTLY_ORDER)
+- max_retries /= 4;
+- if (compaction_retries <= max_retries)
+- return true;
+-
+- return false;
+-}
+ #else
+ static inline struct page *
+ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
+@@ -3311,6 +3264,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
+ return NULL;
+ }
+
++#endif /* CONFIG_COMPACTION */
++
+ static inline bool
+ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
+ enum compact_result compact_result,
+@@ -3337,7 +3292,6 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
+ }
+ return false;
+ }
+-#endif /* CONFIG_COMPACTION */
+
+ /* Perform direct synchronous page reclaim */
+ static int
+diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
+index 43844144c9c4..d3abdaefe392 100644
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -1121,7 +1121,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
+ } else {
+ err = br_ip6_multicast_add_group(br, port,
+ &grec->grec_mca, vid);
+- if (!err)
++ if (err)
+ break;
+ }
+ }
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 904ff431d570..97fb3da5093a 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3979,6 +3979,22 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ }
+
+ /**
++ * netdev_is_rx_handler_busy - check if receive handler is registered
++ * @dev: device to check
++ *
++ * Check if a receive handler is already registered for a given device.
++ * Return true if there one.
++ *
++ * The caller must hold the rtnl_mutex.
++ */
++bool netdev_is_rx_handler_busy(struct net_device *dev)
++{
++ ASSERT_RTNL();
++ return dev && rtnl_dereference(dev->rx_handler);
++}
++EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
++
++/**
+ * netdev_rx_handler_register - register receive handler
+ * @dev: device to register a handler for
+ * @rx_handler: receive handler to register
+diff --git a/net/core/filter.c b/net/core/filter.c
+index e759d90e8cef..bca32d63ab43 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1353,54 +1353,33 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
+ {
+ int err;
+
+- if (!skb_cloned(skb))
+- return 0;
+- if (skb_clone_writable(skb, write_len))
+- return 0;
+- err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+- if (!err)
+- bpf_compute_data_end(skb);
++ err = skb_ensure_writable(skb, write_len);
++ bpf_compute_data_end(skb);
++
+ return err;
+ }
+
+ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
+ {
+- struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+- int offset = (int) r2;
++ unsigned int offset = (unsigned int) r2;
+ void *from = (void *) (long) r3;
+ unsigned int len = (unsigned int) r4;
+ void *ptr;
+
+ if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
+ return -EINVAL;
+-
+- /* bpf verifier guarantees that:
+- * 'from' pointer points to bpf program stack
+- * 'len' bytes of it were initialized
+- * 'len' > 0
+- * 'skb' is a valid pointer to 'struct sk_buff'
+- *
+- * so check for invalid 'offset' and too large 'len'
+- */
+- if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
++ if (unlikely(offset > 0xffff))
+ return -EFAULT;
+ if (unlikely(bpf_try_make_writable(skb, offset + len)))
+ return -EFAULT;
+
+- ptr = skb_header_pointer(skb, offset, len, sp->buff);
+- if (unlikely(!ptr))
+- return -EFAULT;
+-
++ ptr = skb->data + offset;
+ if (flags & BPF_F_RECOMPUTE_CSUM)
+ skb_postpull_rcsum(skb, ptr, len);
+
+ memcpy(ptr, from, len);
+
+- if (ptr == sp->buff)
+- /* skb_store_bits cannot return -EFAULT here */
+- skb_store_bits(skb, offset, ptr, len);
+-
+ if (flags & BPF_F_RECOMPUTE_CSUM)
+ skb_postpush_rcsum(skb, ptr, len);
+ if (flags & BPF_F_INVALIDATE_HASH)
+@@ -1423,12 +1402,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+ {
+ const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
+- int offset = (int) r2;
++ unsigned int offset = (unsigned int) r2;
+ void *to = (void *)(unsigned long) r3;
+ unsigned int len = (unsigned int) r4;
+ void *ptr;
+
+- if (unlikely((u32) offset > 0xffff))
++ if (unlikely(offset > 0xffff))
+ goto err_clear;
+
+ ptr = skb_header_pointer(skb, offset, len, to);
+@@ -1456,20 +1435,17 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
+ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+ {
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+- int offset = (int) r2;
+- __sum16 sum, *ptr;
++ unsigned int offset = (unsigned int) r2;
++ __sum16 *ptr;
+
+ if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
+ return -EINVAL;
+- if (unlikely((u32) offset > 0xffff))
+- return -EFAULT;
+- if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
++ if (unlikely(offset > 0xffff || offset & 1))
+ return -EFAULT;
+-
+- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+- if (unlikely(!ptr))
++ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
+ return -EFAULT;
+
++ ptr = (__sum16 *)(skb->data + offset);
+ switch (flags & BPF_F_HDR_FIELD_MASK) {
+ case 0:
+ if (unlikely(from != 0))
+@@ -1487,10 +1463,6 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+ return -EINVAL;
+ }
+
+- if (ptr == &sum)
+- /* skb_store_bits guaranteed to not return -EFAULT here */
+- skb_store_bits(skb, offset, ptr, sizeof(sum));
+-
+ return 0;
+ }
+
+@@ -1510,20 +1482,18 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
+ bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
+- int offset = (int) r2;
+- __sum16 sum, *ptr;
++ unsigned int offset = (unsigned int) r2;
++ __sum16 *ptr;
+
+ if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
+ BPF_F_HDR_FIELD_MASK)))
+ return -EINVAL;
+- if (unlikely((u32) offset > 0xffff))
++ if (unlikely(offset > 0xffff || offset & 1))
+ return -EFAULT;
+- if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
++ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
+ return -EFAULT;
+
+- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+- if (unlikely(!ptr))
+- return -EFAULT;
++ ptr = (__sum16 *)(skb->data + offset);
+ if (is_mmzero && !*ptr)
+ return 0;
+
+@@ -1546,10 +1516,6 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+
+ if (is_mmzero && !*ptr)
+ *ptr = CSUM_MANGLED_0;
+- if (ptr == &sum)
+- /* skb_store_bits guaranteed to not return -EFAULT here */
+- skb_store_bits(skb, offset, ptr, sizeof(sum));
+-
+ return 0;
+ }
+
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index d07fc076bea0..febca0f1008c 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -2452,9 +2452,7 @@ struct fib_route_iter {
+ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
+ loff_t pos)
+ {
+- struct fib_table *tb = iter->main_tb;
+ struct key_vector *l, **tp = &iter->tnode;
+- struct trie *t;
+ t_key key;
+
+ /* use cache location of next-to-find key */
+@@ -2462,8 +2460,6 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
+ pos -= iter->pos;
+ key = iter->key;
+ } else {
+- t = (struct trie *)tb->tb_data;
+- iter->tnode = t->kv;
+ iter->pos = 0;
+ key = 0;
+ }
+@@ -2504,12 +2500,12 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
+ return NULL;
+
+ iter->main_tb = tb;
++ t = (struct trie *)tb->tb_data;
++ iter->tnode = t->kv;
+
+ if (*pos != 0)
+ return fib_route_get_idx(iter, *pos);
+
+- t = (struct trie *)tb->tb_data;
+- iter->tnode = t->kv;
+ iter->pos = 0;
+ iter->key = 0;
+
+diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
+index a917903d5e97..cc701fa70b12 100644
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -557,6 +557,33 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
+ .get_link_net = ip_tunnel_get_link_net,
+ };
+
++static bool is_vti_tunnel(const struct net_device *dev)
++{
++ return dev->netdev_ops == &vti_netdev_ops;
++}
++
++static int vti_device_event(struct notifier_block *unused,
++ unsigned long event, void *ptr)
++{
++ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++ struct ip_tunnel *tunnel = netdev_priv(dev);
++
++ if (!is_vti_tunnel(dev))
++ return NOTIFY_DONE;
++
++ switch (event) {
++ case NETDEV_DOWN:
++ if (!net_eq(tunnel->net, dev_net(dev)))
++ xfrm_garbage_collect(tunnel->net);
++ break;
++ }
++ return NOTIFY_DONE;
++}
++
++static struct notifier_block vti_notifier_block __read_mostly = {
++ .notifier_call = vti_device_event,
++};
++
+ static int __init vti_init(void)
+ {
+ const char *msg;
+@@ -564,6 +591,8 @@ static int __init vti_init(void)
+
+ pr_info("IPv4 over IPsec tunneling driver\n");
+
++ register_netdevice_notifier(&vti_notifier_block);
++
+ msg = "tunnel device";
+ err = register_pernet_device(&vti_net_ops);
+ if (err < 0)
+@@ -596,6 +625,7 @@ xfrm_proto_ah_failed:
+ xfrm_proto_esp_failed:
+ unregister_pernet_device(&vti_net_ops);
+ pernet_dev_failed:
++ unregister_netdevice_notifier(&vti_notifier_block);
+ pr_err("vti init: failed to register %s\n", msg);
+ return err;
+ }
+@@ -607,6 +637,7 @@ static void __exit vti_fini(void)
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti_net_ops);
++ unregister_netdevice_notifier(&vti_notifier_block);
+ }
+
+ module_init(vti_init);
+diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
+index 54d9f9b0120f..478114b366d8 100644
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -150,6 +150,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
+ tp->segs_in = 0;
+ tcp_segs_in(tp, skb);
+ __skb_pull(skb, tcp_hdrlen(skb));
++ sk_forced_mem_schedule(sk, skb->truesize);
+ skb_set_owner_r(skb, sk);
+
+ TCP_SKB_CB(skb)->seq++;
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 3708de2a6683..ba7ce3ffa0e3 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -814,8 +814,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+ tcp_sk(sk)->snd_nxt;
+
++ /* RFC 7323 2.3
++ * The window field (SEG.WND) of every outgoing segment, with the
++ * exception of <SYN> segments, MUST be right-shifted by
++ * Rcv.Wind.Shift bits:
++ */
+ tcp_v4_send_ack(sock_net(sk), skb, seq,
+- tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
++ tcp_rsk(req)->rcv_nxt,
++ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_time_stamp,
+ req->ts_recent,
+ 0,
+diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
+index 028eb046ea40..9c5fc973267f 100644
+--- a/net/ipv4/tcp_yeah.c
++++ b/net/ipv4/tcp_yeah.c
+@@ -76,7 +76,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+ if (!tcp_is_cwnd_limited(sk))
+ return;
+
+- if (tp->snd_cwnd <= tp->snd_ssthresh)
++ if (tcp_in_slow_start(tp))
+ tcp_slow_start(tp, acked);
+
+ else if (!yeah->doing_reno_now) {
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index e61f7cd65d08..00d18c57c83c 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1182,13 +1182,13 @@ out:
+ * @sk: socket
+ *
+ * Drops all bad checksum frames, until a valid one is found.
+- * Returns the length of found skb, or 0 if none is found.
++ * Returns the length of found skb, or -1 if none is found.
+ */
+-static unsigned int first_packet_length(struct sock *sk)
++static int first_packet_length(struct sock *sk)
+ {
+ struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
+ struct sk_buff *skb;
+- unsigned int res;
++ int res;
+
+ __skb_queue_head_init(&list_kill);
+
+@@ -1203,7 +1203,7 @@ static unsigned int first_packet_length(struct sock *sk)
+ __skb_unlink(skb, rcvq);
+ __skb_queue_tail(&list_kill, skb);
+ }
+- res = skb ? skb->len : 0;
++ res = skb ? skb->len : -1;
+ spin_unlock_bh(&rcvq->lock);
+
+ if (!skb_queue_empty(&list_kill)) {
+@@ -1232,7 +1232,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+
+ case SIOCINQ:
+ {
+- unsigned int amount = first_packet_length(sk);
++ int amount = max_t(int, 0, first_packet_length(sk));
+
+ return put_user(amount, (int __user *)arg);
+ }
+@@ -2184,7 +2184,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+
+ /* Check for false positives due to checksum errors */
+ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+- !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
++ !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
+ mask &= ~(POLLIN | POLLRDNORM);
+
+ return mask;
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 047c75a798b1..82e367b9e685 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -1906,6 +1906,7 @@ errdad:
+ spin_unlock_bh(&ifp->lock);
+
+ addrconf_mod_dad_work(ifp, 0);
++ in6_ifa_put(ifp);
+ }
+
+ /* Join to solicited addr multicast group.
+@@ -3469,7 +3470,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
+ /* combine the user config with event to determine if permanent
+ * addresses are to be removed from address hash table
+ */
+- keep_addr = !(how || _keep_addr <= 0);
++ keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
+
+ /* Step 2: clear hash table */
+ for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+@@ -3525,7 +3526,7 @@ restart:
+ /* re-combine the user config with event to determine if permanent
+ * addresses are to be removed from the interface list
+ */
+- keep_addr = (!how && _keep_addr > 0);
++ keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
+
+ INIT_LIST_HEAD(&del_list);
+ list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+@@ -3771,6 +3772,7 @@ static void addrconf_dad_work(struct work_struct *w)
+ addrconf_dad_begin(ifp);
+ goto out;
+ } else if (action == DAD_ABORT) {
++ in6_ifa_hold(ifp);
+ addrconf_dad_stop(ifp, 1);
+ goto out;
+ }
+diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
+index 7b0481e3738f..888543debe4e 100644
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1174,6 +1174,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
++ fl6.flowi6_proto = IPPROTO_IPIP;
+
+ dsfield = ipv4_get_dsfield(iph);
+
+@@ -1233,6 +1234,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
++ fl6.flowi6_proto = IPPROTO_IPV6;
+
+ dsfield = ipv6_get_dsfield(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index 3ee3e444a66b..408660477ba6 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -122,8 +122,10 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ rt = (struct rt6_info *) dst;
+
+ np = inet6_sk(sk);
+- if (!np)
+- return -EBADF;
++ if (!np) {
++ err = -EBADF;
++ goto dst_err_out;
++ }
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+@@ -160,6 +162,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ }
+ release_sock(sk);
+
++dst_err_out:
++ dst_release(dst);
++
+ if (err)
+ return err;
+
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 2255d2bf5f6b..889acc471720 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -937,9 +937,15 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+ * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+ */
++ /* RFC 7323 2.3
++ * The window field (SEG.WND) of every outgoing segment, with the
++ * exception of <SYN> segments, MUST be right-shifted by
++ * Rcv.Wind.Shift bits:
++ */
+ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
+ tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+- tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
++ tcp_rsk(req)->rcv_nxt,
++ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ 0, 0);
+diff --git a/net/irda/iriap.c b/net/irda/iriap.c
+index 4a7ae32afa09..1138eaf5c682 100644
+--- a/net/irda/iriap.c
++++ b/net/irda/iriap.c
+@@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
+
+ self->magic = IAS_MAGIC;
+ self->mode = mode;
+- if (mode == IAS_CLIENT)
+- iriap_register_lsap(self, slsap_sel, mode);
++ if (mode == IAS_CLIENT) {
++ if (iriap_register_lsap(self, slsap_sel, mode)) {
++ kfree(self);
++ return NULL;
++ }
++ }
+
+ self->confirm = callback;
+ self->priv = priv;
+diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
+index 0b68ba730a06..48613f5dd952 100644
+--- a/net/kcm/kcmsock.c
++++ b/net/kcm/kcmsock.c
+@@ -13,6 +13,7 @@
+ #include <linux/socket.h>
+ #include <linux/uaccess.h>
+ #include <linux/workqueue.h>
++#include <linux/syscalls.h>
+ #include <net/kcm.h>
+ #include <net/netns/generic.h>
+ #include <net/sock.h>
+@@ -2035,7 +2036,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+ if (copy_to_user((void __user *)arg, &info,
+ sizeof(info))) {
+ err = -EFAULT;
+- sock_release(newsock);
++ sys_close(info.fd);
+ }
+ }
+
+diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
+index ea4a2fef1b71..5c4cdea216fd 100644
+--- a/net/sched/act_ife.c
++++ b/net/sched/act_ife.c
+@@ -52,7 +52,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
+ u32 *tlv = (u32 *)(skbdata);
+ u16 totlen = nla_total_size(dlen); /*alignment + hdr */
+ char *dptr = (char *)tlv + NLA_HDRLEN;
+- u32 htlv = attrtype << 16 | totlen;
++ u32 htlv = attrtype << 16 | dlen;
+
+ *tlv = htonl(htlv);
+ memset(dptr, 0, totlen - NLA_HDRLEN);
+@@ -134,7 +134,7 @@ EXPORT_SYMBOL_GPL(ife_release_meta_gen);
+
+ int ife_validate_meta_u32(void *val, int len)
+ {
+- if (len == 4)
++ if (len == sizeof(u32))
+ return 0;
+
+ return -EINVAL;
+@@ -143,8 +143,8 @@ EXPORT_SYMBOL_GPL(ife_validate_meta_u32);
+
+ int ife_validate_meta_u16(void *val, int len)
+ {
+- /* length will include padding */
+- if (len == NLA_ALIGN(2))
++ /* length will not include padding */
++ if (len == sizeof(u16))
+ return 0;
+
+ return -EINVAL;
+@@ -652,12 +652,14 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
+ u8 *tlvdata = (u8 *)tlv;
+ u16 mtype = tlv->type;
+ u16 mlen = tlv->len;
++ u16 alen;
+
+ mtype = ntohs(mtype);
+ mlen = ntohs(mlen);
++ alen = NLA_ALIGN(mlen);
+
+- if (find_decode_metaid(skb, ife, mtype, (mlen - 4),
+- (void *)(tlvdata + 4))) {
++ if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN),
++ (void *)(tlvdata + NLA_HDRLEN))) {
+ /* abuse overlimits to count when we receive metadata
+ * but dont have an ops for it
+ */
+@@ -666,8 +668,8 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
+ ife->tcf_qstats.overlimits++;
+ }
+
+- tlvdata += mlen;
+- ifehdrln -= mlen;
++ tlvdata += alen;
++ ifehdrln -= alen;
+ tlv = (struct meta_tlvhdr *)tlvdata;
+ }
+
+diff --git a/net/sctp/proc.c b/net/sctp/proc.c
+index 4cb5aedfe3ee..ef8ba77a5bea 100644
+--- a/net/sctp/proc.c
++++ b/net/sctp/proc.c
+@@ -293,6 +293,7 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
+ return ERR_PTR(err);
+ }
+
++ iter->start_fail = 0;
+ return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
+ }
+
+diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
+index f69edcf219e5..10bae2201c6f 100644
+--- a/net/sctp/sctp_diag.c
++++ b/net/sctp/sctp_diag.c
+@@ -418,11 +418,13 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb,
+ paddr.v4.sin_family = AF_INET;
+ } else {
+ laddr.v6.sin6_port = req->id.idiag_sport;
+- memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64);
++ memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
++ sizeof(laddr.v6.sin6_addr));
+ laddr.v6.sin6_family = AF_INET6;
+
+ paddr.v6.sin6_port = req->id.idiag_dport;
+- memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64);
++ memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
++ sizeof(paddr.v6.sin6_addr));
+ paddr.v6.sin6_family = AF_INET6;
+ }
+
+diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
+index e085f5ae1548..4605dc73def6 100644
+--- a/net/sunrpc/auth_gss/svcauth_gss.c
++++ b/net/sunrpc/auth_gss/svcauth_gss.c
+@@ -569,9 +569,10 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle)
+ struct rsc *found;
+
+ memset(&rsci, 0, sizeof(rsci));
+- rsci.handle.data = handle->data;
+- rsci.handle.len = handle->len;
++ if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
++ return NULL;
+ found = rsc_lookup(cd, &rsci);
++ rsc_free(&rsci);
+ if (!found)
+ return NULL;
+ if (cache_check(cd, &found->h, NULL))
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index c49b8df438cb..f9f5f3c3dab5 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -2180,7 +2180,8 @@ restart:
+ TIPC_CONN_MSG, SHORT_H_SIZE,
+ 0, dnode, onode, dport, oport,
+ TIPC_CONN_SHUTDOWN);
+- tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
++ if (skb)
++ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+ }
+ tsk->connected = 0;
+ sock->state = SS_DISCONNECTING;
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 735362c26c8e..e444fa47ea46 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val)
+ {
+ struct unix_sock *u = unix_sk(sk);
+
+- if (mutex_lock_interruptible(&u->readlock))
++ if (mutex_lock_interruptible(&u->iolock))
+ return -EINTR;
+
+ sk->sk_peek_off = val;
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+
+ return 0;
+ }
+@@ -778,7 +778,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
+ spin_lock_init(&u->lock);
+ atomic_long_set(&u->inflight, 0);
+ INIT_LIST_HEAD(&u->link);
+- mutex_init(&u->readlock); /* single task reading lock */
++ mutex_init(&u->iolock); /* single task reading lock */
++ mutex_init(&u->bindlock); /* single task binding lock */
+ init_waitqueue_head(&u->peer_wait);
+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
+ unix_insert_socket(unix_sockets_unbound(sk), sk);
+@@ -847,7 +848,7 @@ static int unix_autobind(struct socket *sock)
+ int err;
+ unsigned int retries = 0;
+
+- err = mutex_lock_interruptible(&u->readlock);
++ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ return err;
+
+@@ -894,7 +895,7 @@ retry:
+ spin_unlock(&unix_table_lock);
+ err = 0;
+
+-out: mutex_unlock(&u->readlock);
++out: mutex_unlock(&u->bindlock);
+ return err;
+ }
+
+@@ -953,20 +954,32 @@ fail:
+ return NULL;
+ }
+
+-static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode,
+- struct path *res)
++static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+ {
+- int err;
++ struct dentry *dentry;
++ struct path path;
++ int err = 0;
++ /*
++ * Get the parent directory, calculate the hash for last
++ * component.
++ */
++ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
++ err = PTR_ERR(dentry);
++ if (IS_ERR(dentry))
++ return err;
+
+- err = security_path_mknod(path, dentry, mode, 0);
++ /*
++ * All right, let's create it.
++ */
++ err = security_path_mknod(&path, dentry, mode, 0);
+ if (!err) {
+- err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
++ err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+ if (!err) {
+- res->mnt = mntget(path->mnt);
++ res->mnt = mntget(path.mnt);
+ res->dentry = dget(dentry);
+ }
+ }
+-
++ done_path_create(&path, dentry);
+ return err;
+ }
+
+@@ -977,12 +990,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ struct unix_sock *u = unix_sk(sk);
+ struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
+ char *sun_path = sunaddr->sun_path;
+- int err, name_err;
++ int err;
+ unsigned int hash;
+ struct unix_address *addr;
+ struct hlist_head *list;
+- struct path path;
+- struct dentry *dentry;
+
+ err = -EINVAL;
+ if (sunaddr->sun_family != AF_UNIX)
+@@ -998,34 +1009,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ goto out;
+ addr_len = err;
+
+- name_err = 0;
+- dentry = NULL;
+- if (sun_path[0]) {
+- /* Get the parent directory, calculate the hash for last
+- * component.
+- */
+- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+-
+- if (IS_ERR(dentry)) {
+- /* delay report until after 'already bound' check */
+- name_err = PTR_ERR(dentry);
+- dentry = NULL;
+- }
+- }
+-
+- err = mutex_lock_interruptible(&u->readlock);
++ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+- goto out_path;
++ goto out;
+
+ err = -EINVAL;
+ if (u->addr)
+ goto out_up;
+
+- if (name_err) {
+- err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+- goto out_up;
+- }
+-
+ err = -ENOMEM;
+ addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
+ if (!addr)
+@@ -1036,11 +1027,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ addr->hash = hash ^ sk->sk_type;
+ atomic_set(&addr->refcnt, 1);
+
+- if (dentry) {
+- struct path u_path;
++ if (sun_path[0]) {
++ struct path path;
+ umode_t mode = S_IFSOCK |
+ (SOCK_INODE(sock)->i_mode & ~current_umask());
+- err = unix_mknod(dentry, &path, mode, &u_path);
++ err = unix_mknod(sun_path, mode, &path);
+ if (err) {
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
+@@ -1048,9 +1039,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ goto out_up;
+ }
+ addr->hash = UNIX_HASH_SIZE;
+- hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
++ hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ spin_lock(&unix_table_lock);
+- u->path = u_path;
++ u->path = path;
+ list = &unix_socket_table[hash];
+ } else {
+ spin_lock(&unix_table_lock);
+@@ -1072,11 +1063,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ out_unlock:
+ spin_unlock(&unix_table_lock);
+ out_up:
+- mutex_unlock(&u->readlock);
+-out_path:
+- if (dentry)
+- done_path_create(&path, dentry);
+-
++ mutex_unlock(&u->bindlock);
+ out:
+ return err;
+ }
+@@ -1968,17 +1955,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
+ if (false) {
+ alloc_skb:
+ unix_state_unlock(other);
+- mutex_unlock(&unix_sk(other)->readlock);
++ mutex_unlock(&unix_sk(other)->iolock);
+ newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
+ &err, 0);
+ if (!newskb)
+ goto err;
+ }
+
+- /* we must acquire readlock as we modify already present
++ /* we must acquire iolock as we modify already present
+ * skbs in the sk_receive_queue and mess with skb->len
+ */
+- err = mutex_lock_interruptible(&unix_sk(other)->readlock);
++ err = mutex_lock_interruptible(&unix_sk(other)->iolock);
+ if (err) {
+ err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
+ goto err;
+@@ -2045,7 +2032,7 @@ alloc_skb:
+ }
+
+ unix_state_unlock(other);
+- mutex_unlock(&unix_sk(other)->readlock);
++ mutex_unlock(&unix_sk(other)->iolock);
+
+ other->sk_data_ready(other);
+ scm_destroy(&scm);
+@@ -2054,7 +2041,7 @@ alloc_skb:
+ err_state_unlock:
+ unix_state_unlock(other);
+ err_unlock:
+- mutex_unlock(&unix_sk(other)->readlock);
++ mutex_unlock(&unix_sk(other)->iolock);
+ err:
+ kfree_skb(newskb);
+ if (send_sigpipe && !(flags & MSG_NOSIGNAL))
+@@ -2122,7 +2109,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ do {
+- mutex_lock(&u->readlock);
++ mutex_lock(&u->iolock);
+
+ skip = sk_peek_offset(sk, flags);
+ skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
+@@ -2130,14 +2117,14 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+ if (skb)
+ break;
+
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+
+ if (err != -EAGAIN)
+ break;
+ } while (timeo &&
+ !__skb_wait_for_more_packets(sk, &err, &timeo, last));
+
+- if (!skb) { /* implies readlock unlocked */
++ if (!skb) { /* implies iolock unlocked */
+ unix_state_lock(sk);
+ /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
+ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
+@@ -2202,7 +2189,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+
+ out_free:
+ skb_free_datagram(sk, skb);
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+ out:
+ return err;
+ }
+@@ -2297,7 +2284,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
+ /* Lock the socket to prevent queue disordering
+ * while sleeps in memcpy_tomsg
+ */
+- mutex_lock(&u->readlock);
++ mutex_lock(&u->iolock);
+
+ if (flags & MSG_PEEK)
+ skip = sk_peek_offset(sk, flags);
+@@ -2339,7 +2326,7 @@ again:
+ break;
+ }
+
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+
+ timeo = unix_stream_data_wait(sk, timeo, last,
+ last_len);
+@@ -2350,7 +2337,7 @@ again:
+ goto out;
+ }
+
+- mutex_lock(&u->readlock);
++ mutex_lock(&u->iolock);
+ goto redo;
+ unlock:
+ unix_state_unlock(sk);
+@@ -2453,7 +2440,7 @@ unlock:
+ }
+ } while (size);
+
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+ if (state->msg)
+ scm_recv(sock, state->msg, &scm, flags);
+ else
+@@ -2494,9 +2481,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk,
+ int ret;
+ struct unix_sock *u = unix_sk(sk);
+
+- mutex_unlock(&u->readlock);
++ mutex_unlock(&u->iolock);
+ ret = splice_to_pipe(pipe, spd);
+- mutex_lock(&u->readlock);
++ mutex_lock(&u->iolock);
+
+ return ret;
+ }
+diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
+index dbb2738e356a..6250b1cfcde5 100644
+--- a/net/wireless/wext-core.c
++++ b/net/wireless/wext-core.c
+@@ -958,29 +958,8 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
+ return private(dev, iwr, cmd, info, handler);
+ }
+ /* Old driver API : call driver ioctl handler */
+- if (dev->netdev_ops->ndo_do_ioctl) {
+-#ifdef CONFIG_COMPAT
+- if (info->flags & IW_REQUEST_FLAG_COMPAT) {
+- int ret = 0;
+- struct iwreq iwr_lcl;
+- struct compat_iw_point *iwp_compat = (void *) &iwr->u.data;
+-
+- memcpy(&iwr_lcl, iwr, sizeof(struct iwreq));
+- iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer);
+- iwr_lcl.u.data.length = iwp_compat->length;
+- iwr_lcl.u.data.flags = iwp_compat->flags;
+-
+- ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd);
+-
+- iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer);
+- iwp_compat->length = iwr_lcl.u.data.length;
+- iwp_compat->flags = iwr_lcl.u.data.flags;
+-
+- return ret;
+- } else
+-#endif
+- return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+- }
++ if (dev->netdev_ops->ndo_do_ioctl)
++ return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ return -EOPNOTSUPP;
+ }
+
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-10-04 4:50 Alice Ferrazzi
0 siblings, 0 replies; 20+ messages in thread
From: Alice Ferrazzi @ 2016-10-04 4:50 UTC (permalink / raw
To: gentoo-commits
commit: d42281d873059025c03f41e8722a93ef404368d2
Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
AuthorDate: Tue Oct 4 04:49:30 2016 +0000
Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
CommitDate: Tue Oct 4 04:49:30 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=d42281d8
Linux patch 4.7.6.
0000_README | 4 +
1005_linux-4.7.6.patch | 2572 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 2576 insertions(+)
diff --git a/0000_README b/0000_README
index fefac23..87048e9 100644
--- a/0000_README
+++ b/0000_README
@@ -63,6 +63,10 @@ Patch: 1004_linux-4.7.5.patch
From: http://www.kernel.org
Desc: Linux 4.7.5
+Patch: 1005_linux-4.7.6.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.6
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1005_linux-4.7.6.patch b/1005_linux-4.7.6.patch
new file mode 100644
index 0000000..8c0ea69
--- /dev/null
+++ b/1005_linux-4.7.6.patch
@@ -0,0 +1,2572 @@
+diff --git a/Makefile b/Makefile
+index dd755d199ad6..48b0120be59b 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 5
++SUBLEVEL = 6
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+@@ -370,7 +370,7 @@ LDFLAGS_MODULE =
+ CFLAGS_KERNEL =
+ AFLAGS_KERNEL =
+ LDFLAGS_vmlinux =
+-CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
++CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+ CFLAGS_KCOV = -fsanitize-coverage=trace-pc
+
+
+@@ -619,12 +619,13 @@ ARCH_CFLAGS :=
+ include arch/$(SRCARCH)/Makefile
+
+ KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,)
++KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,)
+
+ ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+-KBUILD_CFLAGS += -Os $(call cc-disable-warning,maybe-uninitialized,)
++KBUILD_CFLAGS += -Os
+ else
+ ifdef CONFIG_PROFILE_ALL_BRANCHES
+-KBUILD_CFLAGS += -O2 $(call cc-disable-warning,maybe-uninitialized,)
++KBUILD_CFLAGS += -O2
+ else
+ KBUILD_CFLAGS += -O2
+ endif
+diff --git a/arch/arc/Makefile b/arch/arc/Makefile
+index 85814e74677d..601ed173080b 100644
+--- a/arch/arc/Makefile
++++ b/arch/arc/Makefile
+@@ -74,9 +74,7 @@ endif
+ ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
+ # Generic build system uses -O2, we want -O3
+ # Note: No need to add to cflags-y as that happens anyways
+-#
+-# Disable the false maybe-uninitialized warings gcc spits out at -O3
+-ARCH_CFLAGS += -O3 $(call cc-disable-warning,maybe-uninitialized,)
++ARCH_CFLAGS += -O3
+ endif
+
+ # small data is default for elf32 tool-chain. If not usable, disable it
+diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
+index da3c0428507b..aef022a87c53 100644
+--- a/arch/arm/crypto/aes-ce-glue.c
++++ b/arch/arm/crypto/aes-ce-glue.c
+@@ -284,7 +284,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+- if (nbytes) {
++ if (walk.nbytes % AES_BLOCK_SIZE) {
+ u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 __aligned(8) tail[AES_BLOCK_SIZE];
+diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
+index 5c888049d061..6b2aa0fd6cd0 100644
+--- a/arch/arm64/crypto/aes-glue.c
++++ b/arch/arm64/crypto/aes-glue.c
+@@ -216,7 +216,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+- if (nbytes) {
++ if (walk.nbytes % AES_BLOCK_SIZE) {
+ u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 __aligned(8) tail[AES_BLOCK_SIZE];
+diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
+index 490db85dec23..95e32d9c6820 100644
+--- a/arch/arm64/kernel/smp.c
++++ b/arch/arm64/kernel/smp.c
+@@ -201,12 +201,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
+ return ret;
+ }
+
+-static void smp_store_cpu_info(unsigned int cpuid)
+-{
+- store_cpu_topology(cpuid);
+- numa_store_cpu_info(cpuid);
+-}
+-
+ /*
+ * This is the secondary CPU boot entry. We're using this CPUs
+ * idle thread stack, but a set of temporary page tables.
+@@ -254,7 +248,7 @@ asmlinkage void secondary_start_kernel(void)
+ */
+ notify_cpu_starting(cpu);
+
+- smp_store_cpu_info(cpu);
++ store_cpu_topology(cpu);
+
+ /*
+ * OK, now it's safe to let the boot CPU continue. Wait for
+@@ -687,10 +681,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+ {
+ int err;
+ unsigned int cpu;
++ unsigned int this_cpu;
+
+ init_cpu_topology();
+
+- smp_store_cpu_info(smp_processor_id());
++ this_cpu = smp_processor_id();
++ store_cpu_topology(this_cpu);
++ numa_store_cpu_info(this_cpu);
+
+ /*
+ * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
+@@ -717,6 +714,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+ continue;
+
+ set_cpu_present(cpu, true);
++ numa_store_cpu_info(cpu);
+ }
+ }
+
+diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
+index f0e314ceb8ba..7f975b20b20c 100644
+--- a/arch/mips/Kconfig.debug
++++ b/arch/mips/Kconfig.debug
+@@ -113,42 +113,6 @@ config SPINLOCK_TEST
+ help
+ Add several files to the debugfs to test spinlock speed.
+
+-if CPU_MIPSR6
+-
+-choice
+- prompt "Compact branch policy"
+- default MIPS_COMPACT_BRANCHES_OPTIMAL
+-
+-config MIPS_COMPACT_BRANCHES_NEVER
+- bool "Never (force delay slot branches)"
+- help
+- Pass the -mcompact-branches=never flag to the compiler in order to
+- force it to always emit branches with delay slots, and make no use
+- of the compact branch instructions introduced by MIPSr6. This is
+- useful if you suspect there may be an issue with compact branches in
+- either the compiler or the CPU.
+-
+-config MIPS_COMPACT_BRANCHES_OPTIMAL
+- bool "Optimal (use where beneficial)"
+- help
+- Pass the -mcompact-branches=optimal flag to the compiler in order for
+- it to make use of compact branch instructions where it deems them
+- beneficial, and use branches with delay slots elsewhere. This is the
+- default compiler behaviour, and should be used unless you have a
+- reason to choose otherwise.
+-
+-config MIPS_COMPACT_BRANCHES_ALWAYS
+- bool "Always (force compact branches)"
+- help
+- Pass the -mcompact-branches=always flag to the compiler in order to
+- force it to always emit compact branches, making no use of branch
+- instructions with delay slots. This can result in more compact code
+- which may be beneficial in some scenarios.
+-
+-endchoice
+-
+-endif # CPU_MIPSR6
+-
+ config SCACHE_DEBUGFS
+ bool "L2 cache debugfs entries"
+ depends on DEBUG_FS
+diff --git a/arch/mips/Makefile b/arch/mips/Makefile
+index efd7a9dc93c4..598ab2930fce 100644
+--- a/arch/mips/Makefile
++++ b/arch/mips/Makefile
+@@ -203,10 +203,6 @@ endif
+ toolchain-virt := $(call cc-option-yn,$(mips-cflags) -mvirt)
+ cflags-$(toolchain-virt) += -DTOOLCHAIN_SUPPORTS_VIRT
+
+-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_NEVER) += -mcompact-branches=never
+-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_OPTIMAL) += -mcompact-branches=optimal
+-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_ALWAYS) += -mcompact-branches=always
+-
+ #
+ # Firmware support
+ #
+diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
+index 56584a659183..83054f79f72a 100644
+--- a/arch/mips/include/asm/asmmacro.h
++++ b/arch/mips/include/asm/asmmacro.h
+@@ -157,6 +157,7 @@
+ ldc1 $f28, THREAD_FPR28(\thread)
+ ldc1 $f30, THREAD_FPR30(\thread)
+ ctc1 \tmp, fcr31
++ .set pop
+ .endm
+
+ .macro fpu_restore_16odd thread
+diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
+index 2f82bfa3a773..c9f5769dfc8f 100644
+--- a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
++++ b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
+@@ -11,11 +11,13 @@
+ #define CP0_EBASE $15, 1
+
+ .macro kernel_entry_setup
++#ifdef CONFIG_SMP
+ mfc0 t0, CP0_EBASE
+ andi t0, t0, 0x3ff # CPUNum
+ beqz t0, 1f
+ # CPUs other than zero goto smp_bootstrap
+ j smp_bootstrap
++#endif /* CONFIG_SMP */
+
+ 1:
+ .endm
+diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
+index 7ff2a557f4aa..ae775758105f 100644
+--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
++++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
+@@ -1164,7 +1164,9 @@ fpu_emul:
+ regs->regs[31] = r31;
+ regs->cp0_epc = epc;
+ if (!used_math()) { /* First time FPU user. */
++ preempt_disable();
+ err = init_fpu();
++ preempt_enable();
+ set_used_math();
+ }
+ lose_fpu(1); /* Save FPU state for the emulator. */
+diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
+index 813ed7829c61..45cff9fcf575 100644
+--- a/arch/mips/kernel/process.c
++++ b/arch/mips/kernel/process.c
+@@ -591,14 +591,14 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
+ return -EOPNOTSUPP;
+
+ /* Avoid inadvertently triggering emulation */
+- if ((value & PR_FP_MODE_FR) && cpu_has_fpu &&
+- !(current_cpu_data.fpu_id & MIPS_FPIR_F64))
++ if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu &&
++ !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64))
+ return -EOPNOTSUPP;
+- if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre)
++ if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre)
+ return -EOPNOTSUPP;
+
+ /* FR = 0 not supported in MIPS R6 */
+- if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6)
++ if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6)
+ return -EOPNOTSUPP;
+
+ /* Proceed with the mode switch */
+diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
+index f9d01e953acb..dcf4a23ec074 100644
+--- a/arch/mips/kernel/smp.c
++++ b/arch/mips/kernel/smp.c
+@@ -320,6 +320,9 @@ asmlinkage void start_secondary(void)
+ cpumask_set_cpu(cpu, &cpu_coherent_mask);
+ notify_cpu_starting(cpu);
+
++ cpumask_set_cpu(cpu, &cpu_callin_map);
++ synchronise_count_slave(cpu);
++
+ set_cpu_online(cpu, true);
+
+ set_cpu_sibling_map(cpu);
+@@ -327,10 +330,6 @@ asmlinkage void start_secondary(void)
+
+ calculate_cpu_foreign_map();
+
+- cpumask_set_cpu(cpu, &cpu_callin_map);
+-
+- synchronise_count_slave(cpu);
+-
+ /*
+ * irq will be enabled in ->smp_finish(), enabling it too early
+ * is dangerous.
+diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
+index 54e1663ce639..0b30c02a5014 100644
+--- a/arch/mips/kernel/vdso.c
++++ b/arch/mips/kernel/vdso.c
+@@ -39,16 +39,16 @@ static struct vm_special_mapping vdso_vvar_mapping = {
+ static void __init init_vdso_image(struct mips_vdso_image *image)
+ {
+ unsigned long num_pages, i;
++ unsigned long data_pfn;
+
+ BUG_ON(!PAGE_ALIGNED(image->data));
+ BUG_ON(!PAGE_ALIGNED(image->size));
+
+ num_pages = image->size / PAGE_SIZE;
+
+- for (i = 0; i < num_pages; i++) {
+- image->mapping.pages[i] =
+- virt_to_page(image->data + (i * PAGE_SIZE));
+- }
++ data_pfn = __phys_to_pfn(__pa_symbol(image->data));
++ for (i = 0; i < num_pages; i++)
++ image->mapping.pages[i] = pfn_to_page(data_pfn + i);
+ }
+
+ static int __init init_vdso(void)
+diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
+index 7a1f7bbf4105..b2659b9d0809 100644
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -932,11 +932,11 @@ static void populate_pte(struct cpa_data *cpa,
+ }
+ }
+
+-static int populate_pmd(struct cpa_data *cpa,
+- unsigned long start, unsigned long end,
+- unsigned num_pages, pud_t *pud, pgprot_t pgprot)
++static long populate_pmd(struct cpa_data *cpa,
++ unsigned long start, unsigned long end,
++ unsigned num_pages, pud_t *pud, pgprot_t pgprot)
+ {
+- unsigned int cur_pages = 0;
++ long cur_pages = 0;
+ pmd_t *pmd;
+ pgprot_t pmd_pgprot;
+
+@@ -1006,12 +1006,12 @@ static int populate_pmd(struct cpa_data *cpa,
+ return num_pages;
+ }
+
+-static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
+- pgprot_t pgprot)
++static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
++ pgprot_t pgprot)
+ {
+ pud_t *pud;
+ unsigned long end;
+- int cur_pages = 0;
++ long cur_pages = 0;
+ pgprot_t pud_pgprot;
+
+ end = start + (cpa->numpages << PAGE_SHIFT);
+@@ -1067,7 +1067,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
+
+ /* Map trailing leftover */
+ if (start < end) {
+- int tmp;
++ long tmp;
+
+ pud = pud_offset(pgd, start);
+ if (pud_none(*pud))
+@@ -1093,7 +1093,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
+ pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
+ pud_t *pud = NULL; /* shut up gcc */
+ pgd_t *pgd_entry;
+- int ret;
++ long ret;
+
+ pgd_entry = cpa->pgd + pgd_index(addr);
+
+@@ -1336,7 +1336,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
+
+ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
+ {
+- int ret, numpages = cpa->numpages;
++ unsigned long numpages = cpa->numpages;
++ int ret;
+
+ while (numpages) {
+ /*
+diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
+index b226b3f497f1..964c7022d31d 100644
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -244,7 +244,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+ * text and allocate a new stack because we can't rely on the
+ * stack pointer being < 4GB.
+ */
+- if (!IS_ENABLED(CONFIG_EFI_MIXED))
++ if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native())
+ return 0;
+
+ /*
+diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
+index 8cc1622b2ee0..dca7bc87dad9 100644
+--- a/crypto/blkcipher.c
++++ b/crypto/blkcipher.c
+@@ -234,6 +234,8 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
+ return blkcipher_walk_done(desc, walk, -EINVAL);
+ }
+
++ bsize = min(walk->walk_blocksize, n);
++
+ walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
+ BLKCIPHER_WALK_DIFF);
+ if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
+@@ -246,7 +248,6 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
+ }
+ }
+
+- bsize = min(walk->walk_blocksize, n);
+ n = scatterwalk_clamp(&walk->in, n);
+ n = scatterwalk_clamp(&walk->out, n);
+
+diff --git a/crypto/echainiv.c b/crypto/echainiv.c
+index b96a84560b67..343a74e96e2a 100644
+--- a/crypto/echainiv.c
++++ b/crypto/echainiv.c
+@@ -1,8 +1,8 @@
+ /*
+ * echainiv: Encrypted Chain IV Generator
+ *
+- * This generator generates an IV based on a sequence number by xoring it
+- * with a salt and then encrypting it with the same key as used to encrypt
++ * This generator generates an IV based on a sequence number by multiplying
++ * it with a salt and then encrypting it with the same key as used to encrypt
+ * the plain text. This algorithm requires that the block size be equal
+ * to the IV size. It is mainly useful for CBC.
+ *
+@@ -23,81 +23,17 @@
+ #include <linux/err.h>
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+-#include <linux/mm.h>
+ #include <linux/module.h>
+-#include <linux/percpu.h>
+-#include <linux/spinlock.h>
++#include <linux/slab.h>
+ #include <linux/string.h>
+
+-#define MAX_IV_SIZE 16
+-
+-static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv);
+-
+-/* We don't care if we get preempted and read/write IVs from the next CPU. */
+-static void echainiv_read_iv(u8 *dst, unsigned size)
+-{
+- u32 *a = (u32 *)dst;
+- u32 __percpu *b = echainiv_iv;
+-
+- for (; size >= 4; size -= 4) {
+- *a++ = this_cpu_read(*b);
+- b++;
+- }
+-}
+-
+-static void echainiv_write_iv(const u8 *src, unsigned size)
+-{
+- const u32 *a = (const u32 *)src;
+- u32 __percpu *b = echainiv_iv;
+-
+- for (; size >= 4; size -= 4) {
+- this_cpu_write(*b, *a);
+- a++;
+- b++;
+- }
+-}
+-
+-static void echainiv_encrypt_complete2(struct aead_request *req, int err)
+-{
+- struct aead_request *subreq = aead_request_ctx(req);
+- struct crypto_aead *geniv;
+- unsigned int ivsize;
+-
+- if (err == -EINPROGRESS)
+- return;
+-
+- if (err)
+- goto out;
+-
+- geniv = crypto_aead_reqtfm(req);
+- ivsize = crypto_aead_ivsize(geniv);
+-
+- echainiv_write_iv(subreq->iv, ivsize);
+-
+- if (req->iv != subreq->iv)
+- memcpy(req->iv, subreq->iv, ivsize);
+-
+-out:
+- if (req->iv != subreq->iv)
+- kzfree(subreq->iv);
+-}
+-
+-static void echainiv_encrypt_complete(struct crypto_async_request *base,
+- int err)
+-{
+- struct aead_request *req = base->data;
+-
+- echainiv_encrypt_complete2(req, err);
+- aead_request_complete(req, err);
+-}
+-
+ static int echainiv_encrypt(struct aead_request *req)
+ {
+ struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+ struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
+ struct aead_request *subreq = aead_request_ctx(req);
+- crypto_completion_t compl;
+- void *data;
++ __be64 nseqno;
++ u64 seqno;
+ u8 *info;
+ unsigned int ivsize = crypto_aead_ivsize(geniv);
+ int err;
+@@ -107,8 +43,6 @@ static int echainiv_encrypt(struct aead_request *req)
+
+ aead_request_set_tfm(subreq, ctx->child);
+
+- compl = echainiv_encrypt_complete;
+- data = req;
+ info = req->iv;
+
+ if (req->src != req->dst) {
+@@ -123,29 +57,30 @@ static int echainiv_encrypt(struct aead_request *req)
+ return err;
+ }
+
+- if (unlikely(!IS_ALIGNED((unsigned long)info,
+- crypto_aead_alignmask(geniv) + 1))) {
+- info = kmalloc(ivsize, req->base.flags &
+- CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
+- GFP_ATOMIC);
+- if (!info)
+- return -ENOMEM;
+-
+- memcpy(info, req->iv, ivsize);
+- }
+-
+- aead_request_set_callback(subreq, req->base.flags, compl, data);
++ aead_request_set_callback(subreq, req->base.flags,
++ req->base.complete, req->base.data);
+ aead_request_set_crypt(subreq, req->dst, req->dst,
+ req->cryptlen, info);
+ aead_request_set_ad(subreq, req->assoclen);
+
+- crypto_xor(info, ctx->salt, ivsize);
++ memcpy(&nseqno, info + ivsize - 8, 8);
++ seqno = be64_to_cpu(nseqno);
++ memset(info, 0, ivsize);
++
+ scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
+- echainiv_read_iv(info, ivsize);
+
+- err = crypto_aead_encrypt(subreq);
+- echainiv_encrypt_complete2(req, err);
+- return err;
++ do {
++ u64 a;
++
++ memcpy(&a, ctx->salt + ivsize - 8, 8);
++
++ a |= 1;
++ a *= seqno;
++
++ memcpy(info + ivsize - 8, &a, 8);
++ } while ((ivsize -= 8));
++
++ return crypto_aead_encrypt(subreq);
+ }
+
+ static int echainiv_decrypt(struct aead_request *req)
+@@ -192,8 +127,7 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
+ alg = crypto_spawn_aead_alg(spawn);
+
+ err = -EINVAL;
+- if (inst->alg.ivsize & (sizeof(u32) - 1) ||
+- inst->alg.ivsize > MAX_IV_SIZE)
++ if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize)
+ goto free_inst;
+
+ inst->alg.encrypt = echainiv_encrypt;
+@@ -202,7 +136,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
+ inst->alg.init = aead_init_geniv;
+ inst->alg.exit = aead_exit_geniv;
+
+- inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+ inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
+ inst->alg.base.cra_ctxsize += inst->alg.ivsize;
+
+diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c
+index 56e1d633875e..6e6c76080d6a 100644
+--- a/drivers/gpu/drm/qxl/qxl_draw.c
++++ b/drivers/gpu/drm/qxl/qxl_draw.c
+@@ -136,6 +136,8 @@ static int qxl_palette_create_1bit(struct qxl_bo *palette_bo,
+ * correctly globaly, since that would require
+ * tracking all of our palettes. */
+ ret = qxl_bo_kmap(palette_bo, (void **)&pal);
++ if (ret)
++ return ret;
+ pal->num_ents = 2;
+ pal->unique = unique++;
+ if (visual == FB_VISUAL_TRUECOLOR || visual == FB_VISUAL_DIRECTCOLOR) {
+diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
+index 137125b5eae7..5ce71ce7b6c4 100644
+--- a/drivers/i2c/busses/i2c-eg20t.c
++++ b/drivers/i2c/busses/i2c-eg20t.c
+@@ -773,13 +773,6 @@ static int pch_i2c_probe(struct pci_dev *pdev,
+ /* Set the number of I2C channel instance */
+ adap_info->ch_num = id->driver_data;
+
+- ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
+- KBUILD_MODNAME, adap_info);
+- if (ret) {
+- pch_pci_err(pdev, "request_irq FAILED\n");
+- goto err_request_irq;
+- }
+-
+ for (i = 0; i < adap_info->ch_num; i++) {
+ pch_adap = &adap_info->pch_data[i].pch_adapter;
+ adap_info->pch_i2c_suspended = false;
+@@ -797,6 +790,17 @@ static int pch_i2c_probe(struct pci_dev *pdev,
+
+ pch_adap->dev.of_node = pdev->dev.of_node;
+ pch_adap->dev.parent = &pdev->dev;
++ }
++
++ ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
++ KBUILD_MODNAME, adap_info);
++ if (ret) {
++ pch_pci_err(pdev, "request_irq FAILED\n");
++ goto err_request_irq;
++ }
++
++ for (i = 0; i < adap_info->ch_num; i++) {
++ pch_adap = &adap_info->pch_data[i].pch_adapter;
+
+ pch_i2c_init(&adap_info->pch_data[i]);
+
+diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
+index 041050edd809..57256bb42fcd 100644
+--- a/drivers/i2c/busses/i2c-qup.c
++++ b/drivers/i2c/busses/i2c-qup.c
+@@ -1610,7 +1610,8 @@ static int qup_i2c_pm_resume_runtime(struct device *device)
+ #ifdef CONFIG_PM_SLEEP
+ static int qup_i2c_suspend(struct device *device)
+ {
+- qup_i2c_pm_suspend_runtime(device);
++ if (!pm_runtime_suspended(device))
++ return qup_i2c_pm_suspend_runtime(device);
+ return 0;
+ }
+
+diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
+index 528e755c468f..3278ebf1cc5c 100644
+--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
++++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
+@@ -164,7 +164,7 @@ static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan)
+ /* Only select the channel if its different from the last channel */
+ if (data->last_chan != regval) {
+ ret = pca954x_reg_write(muxc->parent, client, regval);
+- data->last_chan = regval;
++ data->last_chan = ret ? 0 : regval;
+ }
+
+ return ret;
+diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
+index a3a67216bce6..be00c4bc0b70 100644
+--- a/drivers/infiniband/hw/cxgb4/cm.c
++++ b/drivers/infiniband/hw/cxgb4/cm.c
+@@ -3011,9 +3011,9 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
+ PDBG("%s last streaming msg ack ep %p tid %u state %u "
+ "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
+ state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
++ mutex_lock(&ep->com.mutex);
+ kfree_skb(ep->mpa_skb);
+ ep->mpa_skb = NULL;
+- mutex_lock(&ep->com.mutex);
+ if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
+ stop_ep_timer(ep);
+ mutex_unlock(&ep->com.mutex);
+@@ -3582,6 +3582,16 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
+ ep->com.state = ABORTING;
+ else {
+ ep->com.state = CLOSING;
++
++ /*
++ * if we close before we see the fw4_ack() then we fix
++ * up the timer state since we're reusing it.
++ */
++ if (ep->mpa_skb &&
++ test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
++ clear_bit(STOP_MPA_TIMER, &ep->com.flags);
++ stop_ep_timer(ep);
++ }
+ start_ep_timer(ep);
+ }
+ set_bit(CLOSE_SENT, &ep->com.flags);
+diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
+index d3ef0fcaaddf..6acf69ee0639 100644
+--- a/drivers/irqchip/irq-mips-gic.c
++++ b/drivers/irqchip/irq-mips-gic.c
+@@ -638,27 +638,6 @@ static int gic_local_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ if (!gic_local_irq_is_routable(intr))
+ return -EPERM;
+
+- /*
+- * HACK: These are all really percpu interrupts, but the rest
+- * of the MIPS kernel code does not use the percpu IRQ API for
+- * the CP0 timer and performance counter interrupts.
+- */
+- switch (intr) {
+- case GIC_LOCAL_INT_TIMER:
+- case GIC_LOCAL_INT_PERFCTR:
+- case GIC_LOCAL_INT_FDC:
+- irq_set_chip_and_handler(virq,
+- &gic_all_vpes_local_irq_controller,
+- handle_percpu_irq);
+- break;
+- default:
+- irq_set_chip_and_handler(virq,
+- &gic_local_irq_controller,
+- handle_percpu_devid_irq);
+- irq_set_percpu_devid(virq);
+- break;
+- }
+-
+ spin_lock_irqsave(&gic_lock, flags);
+ for (i = 0; i < gic_vpes; i++) {
+ u32 val = GIC_MAP_TO_PIN_MSK | gic_cpu_pin;
+@@ -724,16 +703,42 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ return 0;
+ }
+
+-static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
+- irq_hw_number_t hw)
++static int gic_setup_dev_chip(struct irq_domain *d, unsigned int virq,
++ unsigned int hwirq)
+ {
+- if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS)
+- return gic_local_irq_domain_map(d, virq, hw);
++ struct irq_chip *chip;
++ int err;
++
++ if (hwirq >= GIC_SHARED_HWIRQ_BASE) {
++ err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
++ &gic_level_irq_controller,
++ NULL);
++ } else {
++ switch (GIC_HWIRQ_TO_LOCAL(hwirq)) {
++ case GIC_LOCAL_INT_TIMER:
++ case GIC_LOCAL_INT_PERFCTR:
++ case GIC_LOCAL_INT_FDC:
++ /*
++ * HACK: These are all really percpu interrupts, but
++ * the rest of the MIPS kernel code does not use the
++ * percpu IRQ API for them.
++ */
++ chip = &gic_all_vpes_local_irq_controller;
++ irq_set_handler(virq, handle_percpu_irq);
++ break;
++
++ default:
++ chip = &gic_local_irq_controller;
++ irq_set_handler(virq, handle_percpu_devid_irq);
++ irq_set_percpu_devid(virq);
++ break;
++ }
+
+- irq_set_chip_and_handler(virq, &gic_level_irq_controller,
+- handle_level_irq);
++ err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
++ chip, NULL);
++ }
+
+- return gic_shared_irq_domain_map(d, virq, hw, 0);
++ return err;
+ }
+
+ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
+@@ -744,15 +749,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
+ int cpu, ret, i;
+
+ if (spec->type == GIC_DEVICE) {
+- /* verify that it doesn't conflict with an IPI irq */
+- if (test_bit(spec->hwirq, ipi_resrv))
++ /* verify that shared irqs don't conflict with an IPI irq */
++ if ((spec->hwirq >= GIC_SHARED_HWIRQ_BASE) &&
++ test_bit(GIC_HWIRQ_TO_SHARED(spec->hwirq), ipi_resrv))
+ return -EBUSY;
+
+- hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
+-
+- return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+- &gic_level_irq_controller,
+- NULL);
++ return gic_setup_dev_chip(d, virq, spec->hwirq);
+ } else {
+ base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
+ if (base_hwirq == gic_shared_intrs) {
+@@ -821,7 +823,6 @@ int gic_irq_domain_match(struct irq_domain *d, struct device_node *node,
+ }
+
+ static const struct irq_domain_ops gic_irq_domain_ops = {
+- .map = gic_irq_domain_map,
+ .alloc = gic_irq_domain_alloc,
+ .free = gic_irq_domain_free,
+ .match = gic_irq_domain_match,
+@@ -852,29 +853,20 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq,
+ struct irq_fwspec *fwspec = arg;
+ struct gic_irq_spec spec = {
+ .type = GIC_DEVICE,
+- .hwirq = fwspec->param[1],
+ };
+ int i, ret;
+- bool is_shared = fwspec->param[0] == GIC_SHARED;
+
+- if (is_shared) {
+- ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec);
+- if (ret)
+- return ret;
+- }
+-
+- for (i = 0; i < nr_irqs; i++) {
+- irq_hw_number_t hwirq;
++ if (fwspec->param[0] == GIC_SHARED)
++ spec.hwirq = GIC_SHARED_TO_HWIRQ(fwspec->param[1]);
++ else
++ spec.hwirq = GIC_LOCAL_TO_HWIRQ(fwspec->param[1]);
+
+- if (is_shared)
+- hwirq = GIC_SHARED_TO_HWIRQ(spec.hwirq + i);
+- else
+- hwirq = GIC_LOCAL_TO_HWIRQ(spec.hwirq + i);
++ ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec);
++ if (ret)
++ return ret;
+
+- ret = irq_domain_set_hwirq_and_chip(d, virq + i,
+- hwirq,
+- &gic_level_irq_controller,
+- NULL);
++ for (i = 0; i < nr_irqs; i++) {
++ ret = gic_setup_dev_chip(d, virq + i, spec.hwirq + i);
+ if (ret)
+ goto error;
+ }
+@@ -896,7 +888,10 @@ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
+ static void gic_dev_domain_activate(struct irq_domain *domain,
+ struct irq_data *d)
+ {
+- gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0);
++ if (GIC_HWIRQ_TO_LOCAL(d->hwirq) < GIC_NUM_LOCAL_INTRS)
++ gic_local_irq_domain_map(domain, d->irq, d->hwirq);
++ else
++ gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0);
+ }
+
+ static struct irq_domain_ops gic_dev_domain_ops = {
+diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c
+index 744ca5cacc9b..f9fa3fad728e 100644
+--- a/drivers/mtd/maps/pmcmsp-flash.c
++++ b/drivers/mtd/maps/pmcmsp-flash.c
+@@ -75,15 +75,15 @@ static int __init init_msp_flash(void)
+
+ printk(KERN_NOTICE "Found %d PMC flash devices\n", fcnt);
+
+- msp_flash = kmalloc(fcnt * sizeof(struct map_info *), GFP_KERNEL);
++ msp_flash = kcalloc(fcnt, sizeof(*msp_flash), GFP_KERNEL);
+ if (!msp_flash)
+ return -ENOMEM;
+
+- msp_parts = kmalloc(fcnt * sizeof(struct mtd_partition *), GFP_KERNEL);
++ msp_parts = kcalloc(fcnt, sizeof(*msp_parts), GFP_KERNEL);
+ if (!msp_parts)
+ goto free_msp_flash;
+
+- msp_maps = kcalloc(fcnt, sizeof(struct mtd_info), GFP_KERNEL);
++ msp_maps = kcalloc(fcnt, sizeof(*msp_maps), GFP_KERNEL);
+ if (!msp_maps)
+ goto free_msp_parts;
+
+diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
+index 142fc3d79463..784c6e1a0391 100644
+--- a/drivers/mtd/maps/sa1100-flash.c
++++ b/drivers/mtd/maps/sa1100-flash.c
+@@ -230,8 +230,10 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev,
+
+ info->mtd = mtd_concat_create(cdev, info->num_subdev,
+ plat->name);
+- if (info->mtd == NULL)
++ if (info->mtd == NULL) {
+ ret = -ENXIO;
++ goto err;
++ }
+ }
+ info->mtd->dev.parent = &pdev->dev;
+
+diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
+index 5173fadc9a4e..57cbe2b83849 100644
+--- a/drivers/mtd/nand/mxc_nand.c
++++ b/drivers/mtd/nand/mxc_nand.c
+@@ -943,7 +943,7 @@ static int mxc_v2_ooblayout_free(struct mtd_info *mtd, int section,
+ struct nand_chip *nand_chip = mtd_to_nand(mtd);
+ int stepsize = nand_chip->ecc.bytes == 9 ? 16 : 26;
+
+- if (section > nand_chip->ecc.steps)
++ if (section >= nand_chip->ecc.steps)
+ return -ERANGE;
+
+ if (!section) {
+diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
+index c52e45594bfd..a6adb2785b14 100644
+--- a/drivers/mtd/spi-nor/spi-nor.c
++++ b/drivers/mtd/spi-nor/spi-nor.c
+@@ -661,7 +661,7 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
+ status_new = (status_old & ~mask & ~SR_TB) | val;
+
+ /* Don't protect status register if we're fully unlocked */
+- if (lock_len == mtd->size)
++ if (lock_len == 0)
+ status_new &= ~SR_SRWD;
+
+ if (!use_top)
+diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
+index 41c0fc9f3b14..16f7cadda5c3 100644
+--- a/drivers/net/can/flexcan.c
++++ b/drivers/net/can/flexcan.c
+@@ -1268,11 +1268,10 @@ static int __maybe_unused flexcan_suspend(struct device *device)
+ struct flexcan_priv *priv = netdev_priv(dev);
+ int err;
+
+- err = flexcan_chip_disable(priv);
+- if (err)
+- return err;
+-
+ if (netif_running(dev)) {
++ err = flexcan_chip_disable(priv);
++ if (err)
++ return err;
+ netif_stop_queue(dev);
+ netif_device_detach(dev);
+ }
+@@ -1285,13 +1284,17 @@ static int __maybe_unused flexcan_resume(struct device *device)
+ {
+ struct net_device *dev = dev_get_drvdata(device);
+ struct flexcan_priv *priv = netdev_priv(dev);
++ int err;
+
+ priv->can.state = CAN_STATE_ERROR_ACTIVE;
+ if (netif_running(dev)) {
+ netif_device_attach(dev);
+ netif_start_queue(dev);
++ err = flexcan_chip_enable(priv);
++ if (err)
++ return err;
+ }
+- return flexcan_chip_enable(priv);
++ return 0;
+ }
+
+ static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume);
+diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c
+index 2d1d22eec750..368bb0710d8f 100644
+--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
++++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
+@@ -81,6 +81,10 @@
+ #define IFI_CANFD_TIME_SET_TIMEA_4_12_6_6 BIT(15)
+
+ #define IFI_CANFD_TDELAY 0x1c
++#define IFI_CANFD_TDELAY_DEFAULT 0xb
++#define IFI_CANFD_TDELAY_MASK 0x3fff
++#define IFI_CANFD_TDELAY_ABS BIT(14)
++#define IFI_CANFD_TDELAY_EN BIT(15)
+
+ #define IFI_CANFD_ERROR 0x20
+ #define IFI_CANFD_ERROR_TX_OFFSET 0
+@@ -641,7 +645,7 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev)
+ struct ifi_canfd_priv *priv = netdev_priv(ndev);
+ const struct can_bittiming *bt = &priv->can.bittiming;
+ const struct can_bittiming *dbt = &priv->can.data_bittiming;
+- u16 brp, sjw, tseg1, tseg2;
++ u16 brp, sjw, tseg1, tseg2, tdc;
+
+ /* Configure bit timing */
+ brp = bt->brp - 2;
+@@ -664,6 +668,11 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev)
+ (brp << IFI_CANFD_TIME_PRESCALE_OFF) |
+ (sjw << IFI_CANFD_TIME_SJW_OFF_7_9_8_8),
+ priv->base + IFI_CANFD_FTIME);
++
++ /* Configure transmitter delay */
++ tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK;
++ writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc,
++ priv->base + IFI_CANFD_TDELAY);
+ }
+
+ static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id,
+diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
+index f097c5a8ab93..3c7bcdf76f1f 100644
+--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
++++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
+@@ -743,7 +743,8 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
+ }
+ }
+
+- shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust);
++ shhwtstamps.hwtstamp =
++ ktime_add_ns(shhwtstamps.hwtstamp, adjust);
+
+ skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+@@ -766,13 +767,32 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
+ struct sk_buff *skb)
+ {
+ __le64 *regval = (__le64 *)va;
++ struct igb_adapter *adapter = q_vector->adapter;
++ int adjust = 0;
+
+ /* The timestamp is recorded in little endian format.
+ * DWORD: 0 1 2 3
+ * Field: Reserved Reserved SYSTIML SYSTIMH
+ */
+- igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
++ igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
+ le64_to_cpu(regval[1]));
++
++ /* adjust timestamp for the RX latency based on link speed */
++ if (adapter->hw.mac.type == e1000_i210) {
++ switch (adapter->link_speed) {
++ case SPEED_10:
++ adjust = IGB_I210_RX_LATENCY_10;
++ break;
++ case SPEED_100:
++ adjust = IGB_I210_RX_LATENCY_100;
++ break;
++ case SPEED_1000:
++ adjust = IGB_I210_RX_LATENCY_1000;
++ break;
++ }
++ }
++ skb_hwtstamps(skb)->hwtstamp =
++ ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+ }
+
+ /**
+@@ -824,7 +844,7 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
+ }
+ }
+ skb_hwtstamps(skb)->hwtstamp =
+- ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
++ ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+
+ /* Update the last_rx_timestamp timer in order to enable watchdog check
+ * for error case of latched timestamp on a dropped packet.
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+index 8bebd862a54c..58153e818e81 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+@@ -4100,6 +4100,8 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 vlnctrl, i;
+
++ vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
++
+ switch (hw->mac.type) {
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+@@ -4112,8 +4114,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
+ /* fall through */
+ case ixgbe_mac_82598EB:
+ /* legacy case, we can just disable VLAN filtering */
+- vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+- vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
++ vlnctrl &= ~IXGBE_VLNCTRL_VFE;
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+ return;
+ }
+@@ -4125,6 +4126,10 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
+ /* Set flag so we don't redo unnecessary work */
+ adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
+
++ /* For VMDq and SR-IOV we must leave VLAN filtering enabled */
++ vlnctrl |= IXGBE_VLNCTRL_VFE;
++ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
++
+ /* Add PF to all active pools */
+ for (i = IXGBE_VLVF_ENTRIES; --i;) {
+ u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
+@@ -4191,6 +4196,11 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 vlnctrl, i;
+
++ /* Set VLAN filtering to enabled */
++ vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
++ vlnctrl |= IXGBE_VLNCTRL_VFE;
++ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
++
+ switch (hw->mac.type) {
+ case ixgbe_mac_82599EB:
+ case ixgbe_mac_X540:
+@@ -4202,10 +4212,6 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
+ break;
+ /* fall through */
+ case ixgbe_mac_82598EB:
+- vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+- vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
+- vlnctrl |= IXGBE_VLNCTRL_VFE;
+- IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+ return;
+ }
+
+@@ -9496,6 +9502,7 @@ skip_sriov:
+
+ /* copy netdev features into list of user selectable features */
+ netdev->hw_features |= netdev->features |
++ NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_RXALL |
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+index 779bafcbc9a1..b92b75fea92f 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+@@ -501,6 +501,15 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
+ int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+ int queue;
+
++ /* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
++ * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
++ * queue. STATION (HS2.0) uses the auxiliary context of the FW,
++ * and hence needs to be sent on the aux queue
++ */
++ if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
++ skb_info->control.vif->type == NL80211_IFTYPE_STATION)
++ IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
++
+ memcpy(&info, skb->cb, sizeof(info));
+
+ if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU))
+@@ -514,16 +523,6 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
+ /* This holds the amsdu headers length */
+ skb_info->driver_data[0] = (void *)(uintptr_t)0;
+
+- /*
+- * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
+- * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
+- * queue. STATION (HS2.0) uses the auxiliary context of the FW,
+- * and hence needs to be sent on the aux queue
+- */
+- if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
+- info.control.vif->type == NL80211_IFTYPE_STATION)
+- IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
+-
+ queue = info.hw_queue;
+
+ /*
+diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c
+index 9c65f134d447..da7a75f82489 100644
+--- a/drivers/power/max17042_battery.c
++++ b/drivers/power/max17042_battery.c
+@@ -457,13 +457,16 @@ static inline void max17042_write_model_data(struct max17042_chip *chip,
+ }
+
+ static inline void max17042_read_model_data(struct max17042_chip *chip,
+- u8 addr, u32 *data, int size)
++ u8 addr, u16 *data, int size)
+ {
+ struct regmap *map = chip->regmap;
+ int i;
++ u32 tmp;
+
+- for (i = 0; i < size; i++)
+- regmap_read(map, addr + i, &data[i]);
++ for (i = 0; i < size; i++) {
++ regmap_read(map, addr + i, &tmp);
++ data[i] = (u16)tmp;
++ }
+ }
+
+ static inline int max17042_model_data_compare(struct max17042_chip *chip,
+@@ -486,7 +489,7 @@ static int max17042_init_model(struct max17042_chip *chip)
+ {
+ int ret;
+ int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
+- u32 *temp_data;
++ u16 *temp_data;
+
+ temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
+ if (!temp_data)
+@@ -501,7 +504,7 @@ static int max17042_init_model(struct max17042_chip *chip)
+ ret = max17042_model_data_compare(
+ chip,
+ chip->pdata->config_data->cell_char_tbl,
+- (u16 *)temp_data,
++ temp_data,
+ table_size);
+
+ max10742_lock_model(chip);
+@@ -514,7 +517,7 @@ static int max17042_verify_model_lock(struct max17042_chip *chip)
+ {
+ int i;
+ int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
+- u32 *temp_data;
++ u16 *temp_data;
+ int ret = 0;
+
+ temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
+diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c
+index 9ab7f562a83b..f69387e12c1e 100644
+--- a/drivers/power/reset/hisi-reboot.c
++++ b/drivers/power/reset/hisi-reboot.c
+@@ -53,13 +53,16 @@ static int hisi_reboot_probe(struct platform_device *pdev)
+
+ if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) {
+ pr_err("failed to find reboot-offset property\n");
++ iounmap(base);
+ return -EINVAL;
+ }
+
+ err = register_restart_handler(&hisi_restart_nb);
+- if (err)
++ if (err) {
+ dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n",
+ err);
++ iounmap(base);
++ }
+
+ return err;
+ }
+diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c
+index 73dfae41def8..4c56e54af6ac 100644
+--- a/drivers/power/tps65217_charger.c
++++ b/drivers/power/tps65217_charger.c
+@@ -206,6 +206,7 @@ static int tps65217_charger_probe(struct platform_device *pdev)
+ if (!charger)
+ return -ENOMEM;
+
++ platform_set_drvdata(pdev, charger);
+ charger->tps = tps;
+ charger->dev = &pdev->dev;
+
+diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
+index 821d9c089cdb..f25f7dce6e1f 100644
+--- a/drivers/rtc/rtc-ds1307.c
++++ b/drivers/rtc/rtc-ds1307.c
+@@ -602,6 +602,8 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
+ * Alarm support for mcp794xx devices.
+ */
+
++#define MCP794XX_REG_WEEKDAY 0x3
++#define MCP794XX_REG_WEEKDAY_WDAY_MASK 0x7
+ #define MCP794XX_REG_CONTROL 0x07
+ # define MCP794XX_BIT_ALM0_EN 0x10
+ # define MCP794XX_BIT_ALM1_EN 0x20
+@@ -1231,13 +1233,16 @@ static int ds1307_probe(struct i2c_client *client,
+ {
+ struct ds1307 *ds1307;
+ int err = -ENODEV;
+- int tmp;
++ int tmp, wday;
+ struct chip_desc *chip = &chips[id->driver_data];
+ struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+ bool want_irq = false;
+ bool ds1307_can_wakeup_device = false;
+ unsigned char *buf;
+ struct ds1307_platform_data *pdata = dev_get_platdata(&client->dev);
++ struct rtc_time tm;
++ unsigned long timestamp;
++
+ irq_handler_t irq_handler = ds1307_irq;
+
+ static const int bbsqi_bitpos[] = {
+@@ -1526,6 +1531,27 @@ read_rtc:
+ bin2bcd(tmp));
+ }
+
++ /*
++ * Some IPs have weekday reset value = 0x1 which might not correct
++ * hence compute the wday using the current date/month/year values
++ */
++ ds1307_get_time(&client->dev, &tm);
++ wday = tm.tm_wday;
++ timestamp = rtc_tm_to_time64(&tm);
++ rtc_time64_to_tm(timestamp, &tm);
++
++ /*
++ * Check if reset wday is different from the computed wday
++ * If different then set the wday which we computed using
++ * timestamp
++ */
++ if (wday != tm.tm_wday) {
++ wday = i2c_smbus_read_byte_data(client, MCP794XX_REG_WEEKDAY);
++ wday = wday & ~MCP794XX_REG_WEEKDAY_WDAY_MASK;
++ wday = wday | (tm.tm_wday + 1);
++ i2c_smbus_write_byte_data(client, MCP794XX_REG_WEEKDAY, wday);
++ }
++
+ if (want_irq) {
+ device_set_wakeup_capable(&client->dev, true);
+ set_bit(HAS_ALARM, &ds1307->flags);
+diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
+index bb173456bbff..5031c89b0d0d 100644
+--- a/drivers/soc/tegra/pmc.c
++++ b/drivers/soc/tegra/pmc.c
+@@ -1205,6 +1205,14 @@ static int tegra_pmc_probe(struct platform_device *pdev)
+ struct resource *res;
+ int err;
+
++ /*
++ * Early initialisation should have configured an initial
++ * register mapping and setup the soc data pointer. If these
++ * are not valid then something went badly wrong!
++ */
++ if (WARN_ON(!pmc->base || !pmc->soc))
++ return -ENODEV;
++
+ err = tegra_pmc_parse_dt(pmc, pdev->dev.of_node);
+ if (err < 0)
+ return err;
+diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
+index b493909e7492..d8e6d421c27f 100644
+--- a/fs/autofs4/expire.c
++++ b/fs/autofs4/expire.c
+@@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry,
+ }
+ return NULL;
+ }
++
+ /*
+ * Find an eligible tree to time-out
+ * A tree is eligible if :-
+@@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
+ struct dentry *root = sb->s_root;
+ struct dentry *dentry;
+ struct dentry *expired;
++ struct dentry *found;
+ struct autofs_info *ino;
+
+ if (!root)
+@@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
+
+ dentry = NULL;
+ while ((dentry = get_next_positive_subdir(dentry, root))) {
++ int flags = how;
++
+ spin_lock(&sbi->fs_lock);
+ ino = autofs4_dentry_ino(dentry);
+- if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
+- expired = NULL;
+- else
+- expired = should_expire(dentry, mnt, timeout, how);
+- if (!expired) {
++ if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
+ spin_unlock(&sbi->fs_lock);
+ continue;
+ }
++ spin_unlock(&sbi->fs_lock);
++
++ expired = should_expire(dentry, mnt, timeout, flags);
++ if (!expired)
++ continue;
++
++ spin_lock(&sbi->fs_lock);
+ ino = autofs4_dentry_ino(expired);
+ ino->flags |= AUTOFS_INF_WANT_EXPIRE;
+ spin_unlock(&sbi->fs_lock);
+ synchronize_rcu();
+- spin_lock(&sbi->fs_lock);
+- if (should_expire(expired, mnt, timeout, how)) {
+- if (expired != dentry)
+- dput(dentry);
+- goto found;
+- }
+
++ /* Make sure a reference is not taken on found if
++ * things have changed.
++ */
++ flags &= ~AUTOFS_EXP_LEAVES;
++ found = should_expire(expired, mnt, timeout, how);
++ if (!found || found != expired)
++ /* Something has changed, continue */
++ goto next;
++
++ if (expired != dentry)
++ dput(dentry);
++
++ spin_lock(&sbi->fs_lock);
++ goto found;
++next:
++ spin_lock(&sbi->fs_lock);
+ ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
++ spin_unlock(&sbi->fs_lock);
+ if (expired != dentry)
+ dput(expired);
+- spin_unlock(&sbi->fs_lock);
+ }
+ return NULL;
+
+@@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ int status;
++ int state;
+
+ /* Block on any pending expire */
+ if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
+@@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
+ if (rcu_walk)
+ return -ECHILD;
+
++retry:
+ spin_lock(&sbi->fs_lock);
+- if (ino->flags & AUTOFS_INF_EXPIRING) {
++ state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
++ if (state == AUTOFS_INF_WANT_EXPIRE) {
++ spin_unlock(&sbi->fs_lock);
++ /*
++ * Possibly being selected for expire, wait until
++ * it's selected or not.
++ */
++ schedule_timeout_uninterruptible(HZ/10);
++ goto retry;
++ }
++ if (state & AUTOFS_INF_EXPIRING) {
+ spin_unlock(&sbi->fs_lock);
+
+ pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 3722a1f65069..4ffcf0c27739 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1634,6 +1634,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
+ int namelen;
+ int ret = 0;
+
++ if (!S_ISDIR(file_inode(file)->i_mode))
++ return -ENOTDIR;
++
+ ret = mnt_want_write_file(file);
+ if (ret)
+ goto out;
+@@ -1691,6 +1694,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
+ struct btrfs_ioctl_vol_args *vol_args;
+ int ret;
+
++ if (!S_ISDIR(file_inode(file)->i_mode))
++ return -ENOTDIR;
++
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+@@ -1714,6 +1720,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
+ bool readonly = false;
+ struct btrfs_qgroup_inherit *inherit = NULL;
+
++ if (!S_ISDIR(file_inode(file)->i_mode))
++ return -ENOTDIR;
++
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+@@ -2358,6 +2367,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
+ int ret;
+ int err = 0;
+
++ if (!S_ISDIR(dir->i_mode))
++ return -ENOTDIR;
++
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+diff --git a/fs/ceph/file.c b/fs/ceph/file.c
+index 0daaf7ceedc5..b1b9b48a479d 100644
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -1448,16 +1448,14 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
+ {
+ struct inode *inode = file->f_mapping->host;
+ loff_t i_size;
+- int ret;
++ loff_t ret;
+
+ inode_lock(inode);
+
+ if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
+ ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
+- if (ret < 0) {
+- offset = ret;
++ if (ret < 0)
+ goto out;
+- }
+ }
+
+ i_size = i_size_read(inode);
+@@ -1473,7 +1471,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
+ * write() or lseek() might have altered it
+ */
+ if (offset == 0) {
+- offset = file->f_pos;
++ ret = file->f_pos;
+ goto out;
+ }
+ offset += file->f_pos;
+@@ -1493,11 +1491,11 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
+ break;
+ }
+
+- offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
++ ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+
+ out:
+ inode_unlock(inode);
+- return offset;
++ return ret;
+ }
+
+ static inline void ceph_zero_partial_page(
+diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
+index be6b1657b1af..0946f2d4a81f 100644
+--- a/fs/ceph/ioctl.c
++++ b/fs/ceph/ioctl.c
+@@ -183,7 +183,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
+ struct ceph_osd_client *osdc =
+ &ceph_sb_to_client(inode->i_sb)->client->osdc;
+ struct ceph_object_locator oloc;
+- struct ceph_object_id oid;
++ CEPH_DEFINE_OID_ONSTACK(oid);
+ u64 len = 1, olen;
+ u64 tmp;
+ struct ceph_pg pgid;
+diff --git a/fs/configfs/file.c b/fs/configfs/file.c
+index bbc1252a59f5..2ddfa05d71f5 100644
+--- a/fs/configfs/file.c
++++ b/fs/configfs/file.c
+@@ -333,6 +333,7 @@ configfs_write_bin_file(struct file *file, const char __user *buf,
+ if (bin_attr->cb_max_size &&
+ *ppos + count > bin_attr->cb_max_size) {
+ len = -EFBIG;
++ goto out;
+ }
+
+ tbuf = vmalloc(*ppos + count);
+diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
+index 5c57654927a6..90e46cd752fe 100644
+--- a/fs/hostfs/hostfs_kern.c
++++ b/fs/hostfs/hostfs_kern.c
+@@ -959,10 +959,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
+
+ if (S_ISLNK(root_inode->i_mode)) {
+ char *name = follow_link(host_root_path);
+- if (IS_ERR(name))
++ if (IS_ERR(name)) {
+ err = PTR_ERR(name);
+- else
+- err = read_name(root_inode, name);
++ goto out_put;
++ }
++ err = read_name(root_inode, name);
+ kfree(name);
+ if (err)
+ goto out_put;
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index d2f97ecca6a5..e0e5f7c3c99f 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
+
+ pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+- wait_event(group->fanotify_data.access_waitq, event->response ||
+- atomic_read(&group->fanotify_data.bypass_perm));
+-
+- if (!event->response) { /* bypass_perm set */
+- /*
+- * Event was canceled because group is being destroyed. Remove
+- * it from group's event list because we are responsible for
+- * freeing the permission event.
+- */
+- fsnotify_remove_event(group, &event->fae.fse);
+- return 0;
+- }
++ wait_event(group->fanotify_data.access_waitq, event->response);
+
+ /* userspace responded, convert to something usable */
+ switch (event->response) {
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 8e8e6bcd1d43..a64313868d3a 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file)
+
+ #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+ struct fanotify_perm_event_info *event, *next;
++ struct fsnotify_event *fsn_event;
+
+ /*
+- * There may be still new events arriving in the notification queue
+- * but since userspace cannot use fanotify fd anymore, no event can
+- * enter or leave access_list by now.
++ * Stop new events from arriving in the notification queue. since
++ * userspace cannot use fanotify fd anymore, no event can enter or
++ * leave access_list by now either.
+ */
+- spin_lock(&group->fanotify_data.access_lock);
+-
+- atomic_inc(&group->fanotify_data.bypass_perm);
++ fsnotify_group_stop_queueing(group);
+
++ /*
++ * Process all permission events on access_list and notification queue
++ * and simulate reply from userspace.
++ */
++ spin_lock(&group->fanotify_data.access_lock);
+ list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
+ fae.fse.list) {
+ pr_debug("%s: found group=%p event=%p\n", __func__, group,
+@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file)
+ spin_unlock(&group->fanotify_data.access_lock);
+
+ /*
+- * Since bypass_perm is set, newly queued events will not wait for
+- * access response. Wake up the already sleeping ones now.
+- * synchronize_srcu() in fsnotify_destroy_group() will wait for all
+- * processes sleeping in fanotify_handle_event() waiting for access
+- * response and thus also for all permission events to be freed.
++ * Destroy all non-permission events. For permission events just
++ * dequeue them and set the response. They will be freed once the
++ * response is consumed and fanotify_get_response() returns.
+ */
++ mutex_lock(&group->notification_mutex);
++ while (!fsnotify_notify_queue_is_empty(group)) {
++ fsn_event = fsnotify_remove_first_event(group);
++ if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
++ fsnotify_destroy_event(group, fsn_event);
++ else
++ FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
++ }
++ mutex_unlock(&group->notification_mutex);
++
++ /* Response for all permission events it set, wakeup waiters */
+ wake_up(&group->fanotify_data.access_waitq);
+ #endif
+
+@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
+ spin_lock_init(&group->fanotify_data.access_lock);
+ init_waitqueue_head(&group->fanotify_data.access_waitq);
+ INIT_LIST_HEAD(&group->fanotify_data.access_list);
+- atomic_set(&group->fanotify_data.bypass_perm, 0);
+ #endif
+ switch (flags & FAN_ALL_CLASS_BITS) {
+ case FAN_CLASS_NOTIF:
+diff --git a/fs/notify/group.c b/fs/notify/group.c
+index 3e2dd85be5dd..b47f7cfdcaa4 100644
+--- a/fs/notify/group.c
++++ b/fs/notify/group.c
+@@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
+ }
+
+ /*
++ * Stop queueing new events for this group. Once this function returns
++ * fsnotify_add_event() will not add any new events to the group's queue.
++ */
++void fsnotify_group_stop_queueing(struct fsnotify_group *group)
++{
++ mutex_lock(&group->notification_mutex);
++ group->shutdown = true;
++ mutex_unlock(&group->notification_mutex);
++}
++
++/*
+ * Trying to get rid of a group. Remove all marks, flush all events and release
+ * the group reference.
+ * Note that another thread calling fsnotify_clear_marks_by_group() may still
+@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
+ */
+ void fsnotify_destroy_group(struct fsnotify_group *group)
+ {
++ /*
++ * Stop queueing new events. The code below is careful enough to not
++ * require this but fanotify needs to stop queuing events even before
++ * fsnotify_destroy_group() is called and this makes the other callers
++ * of fsnotify_destroy_group() to see the same behavior.
++ */
++ fsnotify_group_stop_queueing(group);
++
+ /* clear all inode marks for this group, attach them to destroy_list */
+ fsnotify_detach_group_marks(group);
+
+diff --git a/fs/notify/notification.c b/fs/notify/notification.c
+index a95d8e037aeb..e455e83ceeeb 100644
+--- a/fs/notify/notification.c
++++ b/fs/notify/notification.c
+@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
+ * Add an event to the group notification queue. The group can later pull this
+ * event off the queue to deal with. The function returns 0 if the event was
+ * added to the queue, 1 if the event was merged with some other queued event,
+- * 2 if the queue of events has overflown.
++ * 2 if the event was not queued - either the queue of events has overflown
++ * or the group is shutting down.
+ */
+ int fsnotify_add_event(struct fsnotify_group *group,
+ struct fsnotify_event *event,
+@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group,
+
+ mutex_lock(&group->notification_mutex);
+
++ if (group->shutdown) {
++ mutex_unlock(&group->notification_mutex);
++ return 2;
++ }
++
+ if (group->q_len >= group->max_events) {
+ ret = 2;
+ /* Queue overflow event only if it isn't already queued */
+@@ -126,21 +132,6 @@ queue:
+ }
+
+ /*
+- * Remove @event from group's notification queue. It is the responsibility of
+- * the caller to destroy the event.
+- */
+-void fsnotify_remove_event(struct fsnotify_group *group,
+- struct fsnotify_event *event)
+-{
+- mutex_lock(&group->notification_mutex);
+- if (!list_empty(&event->list)) {
+- list_del_init(&event->list);
+- group->q_len--;
+- }
+- mutex_unlock(&group->notification_mutex);
+-}
+-
+-/*
+ * Remove and return the first event from the notification list. It is the
+ * responsibility of the caller to destroy the obtained event
+ */
+diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
+index 94b18369b1cc..b95e7df5b76a 100644
+--- a/fs/ocfs2/cluster/tcp_internal.h
++++ b/fs/ocfs2/cluster/tcp_internal.h
+@@ -44,9 +44,6 @@
+ * version here in tcp_internal.h should not need to be bumped for
+ * filesystem locking changes.
+ *
+- * New in version 12
+- * - Negotiate hb timeout when storage is down.
+- *
+ * New in version 11
+ * - Negotiation of filesystem locking in the dlm join.
+ *
+@@ -78,7 +75,7 @@
+ * - full 64 bit i_size in the metadata lock lvbs
+ * - introduction of "rw" lock and pushing meta/data locking down
+ */
+-#define O2NET_PROTOCOL_VERSION 12ULL
++#define O2NET_PROTOCOL_VERSION 11ULL
+ struct o2net_handshake {
+ __be64 protocol_version;
+ __be64 connector_id;
+diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
+index cdeafb4e7ed6..0bb128659d4b 100644
+--- a/fs/ocfs2/dlm/dlmconvert.c
++++ b/fs/ocfs2/dlm/dlmconvert.c
+@@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
+ struct dlm_lock *lock, int flags, int type)
+ {
+ enum dlm_status status;
+- u8 old_owner = res->owner;
+
+ mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
+ lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
+@@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
+
+ spin_lock(&res->spinlock);
+ res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
+- lock->convert_pending = 0;
+ /* if it failed, move it back to granted queue.
+ * if master returns DLM_NORMAL and then down before sending ast,
+ * it may have already been moved to granted queue, reset to
+@@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
+ if (status != DLM_NOTQUEUED)
+ dlm_error(status);
+ dlm_revert_pending_convert(res, lock);
+- } else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
+- (old_owner != res->owner)) {
+- mlog(0, "res %.*s is in recovering or has been recovered.\n",
+- res->lockname.len, res->lockname.name);
++ } else if (!lock->convert_pending) {
++ mlog(0, "%s: res %.*s, owner died and lock has been moved back "
++ "to granted list, retry convert.\n",
++ dlm->name, res->lockname.len, res->lockname.name);
+ status = DLM_RECOVERING;
+ }
++
++ lock->convert_pending = 0;
+ bail:
+ spin_unlock(&res->spinlock);
+
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index 4e7b0dc22450..0b055bfb8e86 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
+ u64 start, u64 len)
+ {
+ int ret = 0;
+- u64 tmpend, end = start + len;
++ u64 tmpend = 0;
++ u64 end = start + len;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ unsigned int csize = osb->s_clustersize;
+ handle_t *handle;
+@@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
+ }
+
+ /*
+- * We want to get the byte offset of the end of the 1st cluster.
++ * If start is on a cluster boundary and end is somewhere in another
++ * cluster, we have not COWed the cluster starting at start, unless
++ * end is also within the same cluster. So, in this case, we skip this
++ * first call to ocfs2_zero_range_for_truncate() truncate and move on
++ * to the next one.
+ */
+- tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
+- if (tmpend > end)
+- tmpend = end;
++ if ((start & (csize - 1)) != 0) {
++ /*
++ * We want to get the byte offset of the end of the 1st
++ * cluster.
++ */
++ tmpend = (u64)osb->s_clustersize +
++ (start & ~(osb->s_clustersize - 1));
++ if (tmpend > end)
++ tmpend = end;
+
+- trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
+- (unsigned long long)tmpend);
++ trace_ocfs2_zero_partial_clusters_range1(
++ (unsigned long long)start,
++ (unsigned long long)tmpend);
+
+- ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
+- if (ret)
+- mlog_errno(ret);
++ ret = ocfs2_zero_range_for_truncate(inode, handle, start,
++ tmpend);
++ if (ret)
++ mlog_errno(ret);
++ }
+
+ if (tmpend < end) {
+ /*
+diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
+index b751eea32e20..5db6f45b3fed 100644
+--- a/fs/reiserfs/ibalance.c
++++ b/fs/reiserfs/ibalance.c
+@@ -1153,8 +1153,9 @@ int balance_internal(struct tree_balance *tb,
+ insert_ptr);
+ }
+
+- memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
+ insert_ptr[0] = new_insert_ptr;
++ if (new_insert_ptr)
++ memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
+
+ return order;
+ }
+diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
+index e71cfbd5acb3..41c20b66d4eb 100644
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1531,7 +1531,7 @@ xfs_wait_buftarg(
+ * ensure here that all reference counts have been dropped before we
+ * start walking the LRU list.
+ */
+- drain_workqueue(btp->bt_mount->m_buf_workqueue);
++ flush_workqueue(btp->bt_mount->m_buf_workqueue);
+
+ /* loop until there is nothing left on the lru list. */
+ while (list_lru_count(&btp->bt_lru)) {
+diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
+index 690985daad1c..6b79a6ba39ca 100644
+--- a/include/linux/ceph/libceph.h
++++ b/include/linux/ceph/libceph.h
+@@ -214,8 +214,9 @@ static void erase_##name(struct rb_root *root, type *t) \
+ }
+
+ #define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \
++extern type __lookup_##name##_key; \
+ static type *lookup_##name(struct rb_root *root, \
+- typeof(((type *)0)->keyfld) key) \
++ typeof(__lookup_##name##_key.keyfld) key) \
+ { \
+ struct rb_node *n = root->rb_node; \
+ \
+diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
+index 9ccf4dbe55f8..21d7f048959f 100644
+--- a/include/linux/ceph/osdmap.h
++++ b/include/linux/ceph/osdmap.h
+@@ -115,6 +115,11 @@ static inline void ceph_oid_init(struct ceph_object_id *oid)
+ oid->name_len = 0;
+ }
+
++#define CEPH_OID_INIT_ONSTACK(oid) \
++ ({ ceph_oid_init(&oid); oid; })
++#define CEPH_DEFINE_OID_ONSTACK(oid) \
++ struct ceph_object_id oid = CEPH_OID_INIT_ONSTACK(oid)
++
+ static inline bool ceph_oid_empty(const struct ceph_object_id *oid)
+ {
+ return oid->name == oid->inline_name && !oid->name_len;
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 29f917517299..16af670a98ec 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -148,6 +148,7 @@ struct fsnotify_group {
+ #define FS_PRIO_1 1 /* fanotify content based access control */
+ #define FS_PRIO_2 2 /* fanotify pre-content access */
+ unsigned int priority;
++ bool shutdown; /* group is being shut down, don't queue more events */
+
+ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
+ struct mutex mark_mutex; /* protect marks_list */
+@@ -179,7 +180,6 @@ struct fsnotify_group {
+ spinlock_t access_lock;
+ struct list_head access_list;
+ wait_queue_head_t access_waitq;
+- atomic_t bypass_perm;
+ #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
+ int f_flags;
+ unsigned int max_marks;
+@@ -303,6 +303,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op
+ extern void fsnotify_get_group(struct fsnotify_group *group);
+ /* drop reference on a group from fsnotify_alloc_group */
+ extern void fsnotify_put_group(struct fsnotify_group *group);
++/* group destruction begins, stop queuing new events */
++extern void fsnotify_group_stop_queueing(struct fsnotify_group *group);
+ /* destroy group */
+ extern void fsnotify_destroy_group(struct fsnotify_group *group);
+ /* fasync handler function */
+@@ -315,8 +317,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group,
+ struct fsnotify_event *event,
+ int (*merge)(struct list_head *,
+ struct fsnotify_event *));
+-/* Remove passed event from groups notification queue */
+-extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
+ /* true if the group notification queue is empty */
+ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
+ /* return, but do not dequeue the first event on the notification queue */
+diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
+index 97354102794d..1f94053b3964 100644
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -571,56 +571,56 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
+ */
+ static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
+ {
+- int ret = 0;
+ char __user *end = uaddr + size - 1;
+
+ if (unlikely(size == 0))
+- return ret;
++ return 0;
+
++ if (unlikely(uaddr > end))
++ return -EFAULT;
+ /*
+ * Writing zeroes into userspace here is OK, because we know that if
+ * the zero gets there, we'll be overwriting it.
+ */
+- while (uaddr <= end) {
+- ret = __put_user(0, uaddr);
+- if (ret != 0)
+- return ret;
++ do {
++ if (unlikely(__put_user(0, uaddr) != 0))
++ return -EFAULT;
+ uaddr += PAGE_SIZE;
+- }
++ } while (uaddr <= end);
+
+ /* Check whether the range spilled into the next page. */
+ if (((unsigned long)uaddr & PAGE_MASK) ==
+ ((unsigned long)end & PAGE_MASK))
+- ret = __put_user(0, end);
++ return __put_user(0, end);
+
+- return ret;
++ return 0;
+ }
+
+ static inline int fault_in_multipages_readable(const char __user *uaddr,
+ int size)
+ {
+ volatile char c;
+- int ret = 0;
+ const char __user *end = uaddr + size - 1;
+
+ if (unlikely(size == 0))
+- return ret;
++ return 0;
+
+- while (uaddr <= end) {
+- ret = __get_user(c, uaddr);
+- if (ret != 0)
+- return ret;
++ if (unlikely(uaddr > end))
++ return -EFAULT;
++
++ do {
++ if (unlikely(__get_user(c, uaddr) != 0))
++ return -EFAULT;
+ uaddr += PAGE_SIZE;
+- }
++ } while (uaddr <= end);
+
+ /* Check whether the range spilled into the next page. */
+ if (((unsigned long)uaddr & PAGE_MASK) ==
+ ((unsigned long)end & PAGE_MASK)) {
+- ret = __get_user(c, end);
+- (void)c;
++ return __get_user(c, end);
+ }
+
+- return ret;
++ return 0;
+ }
+
+ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
+diff --git a/include/linux/reset.h b/include/linux/reset.h
+index 45a4abeb6acb..5daff15722d3 100644
+--- a/include/linux/reset.h
++++ b/include/linux/reset.h
+@@ -71,14 +71,14 @@ static inline struct reset_control *__of_reset_control_get(
+ struct device_node *node,
+ const char *id, int index, int shared)
+ {
+- return ERR_PTR(-EINVAL);
++ return ERR_PTR(-ENOTSUPP);
+ }
+
+ static inline struct reset_control *__devm_reset_control_get(
+ struct device *dev,
+ const char *id, int index, int shared)
+ {
+- return ERR_PTR(-EINVAL);
++ return ERR_PTR(-ENOTSUPP);
+ }
+
+ #endif /* CONFIG_RESET_CONTROLLER */
+diff --git a/kernel/cgroup.c b/kernel/cgroup.c
+index e0be49fc382f..129a7ca5f159 100644
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -6240,6 +6240,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
+ if (cgroup_sk_alloc_disabled)
+ return;
+
++ /* Socket clone path */
++ if (skcd->val) {
++ cgroup_get(sock_cgroup_ptr(skcd));
++ return;
++ }
++
+ rcu_read_lock();
+
+ while (true) {
+diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
+index fca9254280ee..2fc1177383a0 100644
+--- a/kernel/power/hibernate.c
++++ b/kernel/power/hibernate.c
+@@ -299,12 +299,12 @@ static int create_image(int platform_mode)
+ save_processor_state();
+ trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
+ error = swsusp_arch_suspend();
++ /* Restore control flow magically appears here */
++ restore_processor_state();
+ trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
+ if (error)
+ printk(KERN_ERR "PM: Error %d creating hibernation image\n",
+ error);
+- /* Restore control flow magically appears here */
+- restore_processor_state();
+ if (!in_suspend)
+ events_check_enabled = false;
+
+diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
+index 3a970604308f..f155c62f1f2c 100644
+--- a/kernel/power/snapshot.c
++++ b/kernel/power/snapshot.c
+@@ -765,9 +765,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
+ */
+ static bool rtree_next_node(struct memory_bitmap *bm)
+ {
+- bm->cur.node = list_entry(bm->cur.node->list.next,
+- struct rtree_node, list);
+- if (&bm->cur.node->list != &bm->cur.zone->leaves) {
++ if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
++ bm->cur.node = list_entry(bm->cur.node->list.next,
++ struct rtree_node, list);
+ bm->cur.node_pfn += BM_BITS_PER_BLOCK;
+ bm->cur.node_bit = 0;
+ touch_softlockup_watchdog();
+@@ -775,9 +775,9 @@ static bool rtree_next_node(struct memory_bitmap *bm)
+ }
+
+ /* No more nodes, goto next zone */
+- bm->cur.zone = list_entry(bm->cur.zone->list.next,
++ if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
++ bm->cur.zone = list_entry(bm->cur.zone->list.next,
+ struct mem_zone_bm_rtree, list);
+- if (&bm->cur.zone->list != &bm->zones) {
+ bm->cur.node = list_entry(bm->cur.zone->leaves.next,
+ struct rtree_node, list);
+ bm->cur.node_pfn = 0;
+diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
+index 979e7bfbde7a..d0a1617b52b4 100644
+--- a/kernel/trace/Makefile
++++ b/kernel/trace/Makefile
+@@ -1,4 +1,8 @@
+
++# We are fully aware of the dangers of __builtin_return_address()
++FRAME_CFLAGS := $(call cc-disable-warning,frame-address)
++KBUILD_CFLAGS += $(FRAME_CFLAGS)
++
+ # Do not instrument the tracer itself:
+
+ ifdef CONFIG_FUNCTION_TRACER
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 8a4bd6b68a0b..77eeab2776ef 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -4890,19 +4890,20 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
+ struct trace_iterator *iter = filp->private_data;
+ ssize_t sret;
+
+- /* return any leftover data */
+- sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+- if (sret != -EBUSY)
+- return sret;
+-
+- trace_seq_init(&iter->seq);
+-
+ /*
+ * Avoid more than one consumer on a single file descriptor
+ * This is just a matter of traces coherency, the ring buffer itself
+ * is protected.
+ */
+ mutex_lock(&iter->mutex);
++
++ /* return any leftover data */
++ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
++ if (sret != -EBUSY)
++ goto out;
++
++ trace_seq_init(&iter->seq);
++
+ if (iter->trace->read) {
+ sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
+ if (sret)
+@@ -5929,9 +5930,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+ return -EBUSY;
+ #endif
+
+- if (splice_grow_spd(pipe, &spd))
+- return -ENOMEM;
+-
+ if (*ppos & (PAGE_SIZE - 1))
+ return -EINVAL;
+
+@@ -5941,6 +5939,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+ len &= PAGE_MASK;
+ }
+
++ if (splice_grow_spd(pipe, &spd))
++ return -ENOMEM;
++
+ again:
+ trace_access_lock(iter->cpu_file);
+ entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
+@@ -5998,19 +5999,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+ /* did we read anything? */
+ if (!spd.nr_pages) {
+ if (ret)
+- return ret;
++ goto out;
+
++ ret = -EAGAIN;
+ if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
+- return -EAGAIN;
++ goto out;
+
+ ret = wait_on_pipe(iter, true);
+ if (ret)
+- return ret;
++ goto out;
+
+ goto again;
+ }
+
+ ret = splice_to_pipe(pipe, &spd);
++out:
+ splice_shrink_spd(&spd);
+
+ return ret;
+diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
+index 6845f9294696..50e92c585407 100644
+--- a/mm/kasan/kasan.c
++++ b/mm/kasan/kasan.c
+@@ -562,7 +562,7 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
+ unsigned long redzone_start;
+ unsigned long redzone_end;
+
+- if (flags & __GFP_RECLAIM)
++ if (gfpflags_allow_blocking(flags))
+ quarantine_reduce();
+
+ if (unlikely(object == NULL))
+@@ -595,7 +595,7 @@ void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
+ unsigned long redzone_start;
+ unsigned long redzone_end;
+
+- if (flags & __GFP_RECLAIM)
++ if (gfpflags_allow_blocking(flags))
+ quarantine_reduce();
+
+ if (unlikely(ptr == NULL))
+diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
+index 65793f150d1f..a04887c624ed 100644
+--- a/mm/kasan/quarantine.c
++++ b/mm/kasan/quarantine.c
+@@ -196,7 +196,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
+
+ void quarantine_reduce(void)
+ {
+- size_t new_quarantine_size;
++ size_t new_quarantine_size, percpu_quarantines;
+ unsigned long flags;
+ struct qlist_head to_free = QLIST_INIT;
+ size_t size_to_free = 0;
+@@ -214,7 +214,12 @@ void quarantine_reduce(void)
+ */
+ new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
+ QUARANTINE_FRACTION;
+- new_quarantine_size -= QUARANTINE_PERCPU_SIZE * num_online_cpus();
++ percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
++ if (WARN_ONCE(new_quarantine_size < percpu_quarantines,
++ "Too little memory, disabling global KASAN quarantine.\n"))
++ new_quarantine_size = 0;
++ else
++ new_quarantine_size -= percpu_quarantines;
+ WRITE_ONCE(quarantine_size, new_quarantine_size);
+
+ last = global_quarantine.head;
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index ca847d96a980..fcb5b8cb43fe 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -1797,17 +1797,22 @@ static DEFINE_MUTEX(percpu_charge_mutex);
+ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ {
+ struct memcg_stock_pcp *stock;
++ unsigned long flags;
+ bool ret = false;
+
+ if (nr_pages > CHARGE_BATCH)
+ return ret;
+
+- stock = &get_cpu_var(memcg_stock);
++ local_irq_save(flags);
++
++ stock = this_cpu_ptr(&memcg_stock);
+ if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
+ stock->nr_pages -= nr_pages;
+ ret = true;
+ }
+- put_cpu_var(memcg_stock);
++
++ local_irq_restore(flags);
++
+ return ret;
+ }
+
+@@ -1828,15 +1833,18 @@ static void drain_stock(struct memcg_stock_pcp *stock)
+ stock->cached = NULL;
+ }
+
+-/*
+- * This must be called under preempt disabled or must be called by
+- * a thread which is pinned to local cpu.
+- */
+ static void drain_local_stock(struct work_struct *dummy)
+ {
+- struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
++ struct memcg_stock_pcp *stock;
++ unsigned long flags;
++
++ local_irq_save(flags);
++
++ stock = this_cpu_ptr(&memcg_stock);
+ drain_stock(stock);
+ clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
++
++ local_irq_restore(flags);
+ }
+
+ /*
+@@ -1845,14 +1853,19 @@ static void drain_local_stock(struct work_struct *dummy)
+ */
+ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ {
+- struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
++ struct memcg_stock_pcp *stock;
++ unsigned long flags;
++
++ local_irq_save(flags);
+
++ stock = this_cpu_ptr(&memcg_stock);
+ if (stock->cached != memcg) { /* reset if necessary */
+ drain_stock(stock);
+ stock->cached = memcg;
+ }
+ stock->nr_pages += nr_pages;
+- put_cpu_var(memcg_stock);
++
++ local_irq_restore(flags);
+ }
+
+ /*
+diff --git a/mm/page_io.c b/mm/page_io.c
+index 242dba07545b..dc1af1e63221 100644
+--- a/mm/page_io.c
++++ b/mm/page_io.c
+@@ -262,6 +262,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
+ int ret, rw = WRITE;
+ struct swap_info_struct *sis = page_swap_info(page);
+
++ BUG_ON(!PageSwapCache(page));
+ if (sis->flags & SWP_FILE) {
+ struct kiocb kiocb;
+ struct file *swap_file = sis->swap_file;
+@@ -333,6 +334,7 @@ int swap_readpage(struct page *page)
+ int ret = 0;
+ struct swap_info_struct *sis = page_swap_info(page);
+
++ BUG_ON(!PageSwapCache(page));
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(PageUptodate(page), page);
+ if (frontswap_load(page) == 0) {
+@@ -381,6 +383,7 @@ int swap_set_page_dirty(struct page *page)
+
+ if (sis->flags & SWP_FILE) {
+ struct address_space *mapping = sis->swap_file->f_mapping;
++ BUG_ON(!PageSwapCache(page));
+ return mapping->a_ops->set_page_dirty(page);
+ } else {
+ return __set_page_dirty_no_writeback(page);
+diff --git a/mm/swapfile.c b/mm/swapfile.c
+index 031713ab40ce..46502f551ac4 100644
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry)
+ struct swap_info_struct *page_swap_info(struct page *page)
+ {
+ swp_entry_t swap = { .val = page_private(page) };
+- BUG_ON(!PageSwapCache(page));
+ return swap_info[swp_type(swap)];
+ }
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index c4a2f4512fca..38a2691a6367 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2150,23 +2150,6 @@ out:
+ }
+ }
+
+-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+-static void init_tlb_ubc(void)
+-{
+- /*
+- * This deliberately does not clear the cpumask as it's expensive
+- * and unnecessary. If there happens to be data in there then the
+- * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and
+- * then will be cleared.
+- */
+- current->tlb_ubc.flush_required = false;
+-}
+-#else
+-static inline void init_tlb_ubc(void)
+-{
+-}
+-#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
+-
+ /*
+ * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
+ */
+@@ -2202,8 +2185,6 @@ static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg,
+ scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() &&
+ sc->priority == DEF_PRIORITY);
+
+- init_tlb_ubc();
+-
+ blk_start_plug(&plug);
+ while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+ nr[LRU_INACTIVE_FILE]) {
+diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
+index 89469592076c..aee117f831c6 100644
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -4187,7 +4187,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
+
+ pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
+ GFP_NOIO);
+- if (!pages) {
++ if (IS_ERR(pages)) {
+ ceph_msg_put(m);
+ return NULL;
+ }
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 25dab8b60223..fd7b41edf1ce 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1362,7 +1362,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
+ if (!try_module_get(prot->owner))
+ goto out_free_sec;
+ sk_tx_queue_clear(sk);
+- cgroup_sk_alloc(&sk->sk_cgrp_data);
+ }
+
+ return sk;
+@@ -1422,6 +1421,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
+ sock_net_set(sk, net);
+ atomic_set(&sk->sk_wmem_alloc, 1);
+
++ cgroup_sk_alloc(&sk->sk_cgrp_data);
+ sock_update_classid(&sk->sk_cgrp_data);
+ sock_update_netprioidx(&sk->sk_cgrp_data);
+ }
+@@ -1566,6 +1566,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
+ newsk->sk_priority = 0;
+ newsk->sk_incoming_cpu = raw_smp_processor_id();
+ atomic64_set(&newsk->sk_cookie, 0);
++
++ cgroup_sk_alloc(&newsk->sk_cgrp_data);
++
+ /*
+ * Before updating sk_refcnt, we must commit prior changes to memory
+ * (Documentation/RCU/rculist_nulls.txt for details)
+diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
+index 3a8f881b22f1..0a7305a97918 100644
+--- a/net/mac80211/agg-rx.c
++++ b/net/mac80211/agg-rx.c
+@@ -261,10 +261,16 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+ .timeout = timeout,
+ .ssn = start_seq_num,
+ };
+-
+ int i, ret = -EOPNOTSUPP;
+ u16 status = WLAN_STATUS_REQUEST_DECLINED;
+
++ if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
++ ht_dbg(sta->sdata,
++ "STA %pM requests BA session on unsupported tid %d\n",
++ sta->sta.addr, tid);
++ goto end_no_lock;
++ }
++
+ if (!sta->sta.ht_cap.ht_supported) {
+ ht_dbg(sta->sdata,
+ "STA %pM erroneously requests BA session on tid %d w/o QoS\n",
+diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
+index 42fa81031dfa..744ad1c0bc02 100644
+--- a/net/mac80211/agg-tx.c
++++ b/net/mac80211/agg-tx.c
+@@ -580,6 +580,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
+ ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW))
+ return -EINVAL;
+
++ if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID))
++ return -EINVAL;
++
+ ht_dbg(sdata, "Open BA session requested for %pM tid %u\n",
+ pubsta->addr, tid);
+
+diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
+index 7d72283901a3..7d38dd6971a8 100644
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -6811,7 +6811,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
+
+ params.n_counter_offsets_presp = len / sizeof(u16);
+ if (rdev->wiphy.max_num_csa_counters &&
+- (params.n_counter_offsets_beacon >
++ (params.n_counter_offsets_presp >
+ rdev->wiphy.max_num_csa_counters))
+ return -EINVAL;
+
+diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
+index 77ce538268b5..8ab68679cfb5 100644
+--- a/scripts/Makefile.ubsan
++++ b/scripts/Makefile.ubsan
+@@ -14,8 +14,4 @@ ifdef CONFIG_UBSAN
+ ifdef CONFIG_UBSAN_ALIGNMENT
+ CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
+ endif
+-
+- # -fsanitize=* options makes GCC less smart than usual and
+- # increase number of 'maybe-uninitialized false-positives
+- CFLAGS_UBSAN += $(call cc-option, -Wno-maybe-uninitialized)
+ endif
+diff --git a/scripts/package/builddeb b/scripts/package/builddeb
+index 86e56fef7473..202d6e7db859 100755
+--- a/scripts/package/builddeb
++++ b/scripts/package/builddeb
+@@ -322,12 +322,12 @@ fi
+
+ # Build kernel header package
+ (cd $srctree; find . -name Makefile\* -o -name Kconfig\* -o -name \*.pl) > "$objtree/debian/hdrsrcfiles"
+-if grep -q '^CONFIG_STACK_VALIDATION=y' $KCONFIG_CONFIG ; then
+- (cd $srctree; find tools/objtool -type f -executable) >> "$objtree/debian/hdrsrcfiles"
+-fi
+ (cd $srctree; find arch/*/include include scripts -type f) >> "$objtree/debian/hdrsrcfiles"
+ (cd $srctree; find arch/$SRCARCH -name module.lds -o -name Kbuild.platforms -o -name Platform) >> "$objtree/debian/hdrsrcfiles"
+ (cd $srctree; find $(find arch/$SRCARCH -name include -o -name scripts -type d) -type f) >> "$objtree/debian/hdrsrcfiles"
++if grep -q '^CONFIG_STACK_VALIDATION=y' $KCONFIG_CONFIG ; then
++ (cd $objtree; find tools/objtool -type f -executable) >> "$objtree/debian/hdrobjfiles"
++fi
+ (cd $objtree; find arch/$SRCARCH/include Module.symvers include scripts -type f) >> "$objtree/debian/hdrobjfiles"
+ destdir=$kernel_headers_dir/usr/src/linux-headers-$version
+ mkdir -p "$destdir"
+diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
+index 5adbfc32242f..17a06105ccb6 100644
+--- a/security/keys/encrypted-keys/encrypted.c
++++ b/security/keys/encrypted-keys/encrypted.c
+@@ -29,6 +29,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/scatterlist.h>
+ #include <linux/ctype.h>
++#include <crypto/aes.h>
+ #include <crypto/hash.h>
+ #include <crypto/sha.h>
+ #include <crypto/skcipher.h>
+@@ -478,6 +479,7 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload,
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req;
+ unsigned int encrypted_datalen;
++ u8 iv[AES_BLOCK_SIZE];
+ unsigned int padlen;
+ char pad[16];
+ int ret;
+@@ -500,8 +502,8 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload,
+ sg_init_table(sg_out, 1);
+ sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen);
+
+- skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen,
+- epayload->iv);
++ memcpy(iv, epayload->iv, sizeof(iv));
++ skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv);
+ ret = crypto_skcipher_encrypt(req);
+ tfm = crypto_skcipher_reqtfm(req);
+ skcipher_request_free(req);
+@@ -581,6 +583,7 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload,
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req;
+ unsigned int encrypted_datalen;
++ u8 iv[AES_BLOCK_SIZE];
+ char pad[16];
+ int ret;
+
+@@ -599,8 +602,8 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload,
+ epayload->decrypted_datalen);
+ sg_set_buf(&sg_out[1], pad, sizeof pad);
+
+- skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen,
+- epayload->iv);
++ memcpy(iv, epayload->iv, sizeof(iv));
++ skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv);
+ ret = crypto_skcipher_decrypt(req);
+ tfm = crypto_skcipher_reqtfm(req);
+ skcipher_request_free(req);
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-10-08 11:37 Alice Ferrazzi
0 siblings, 0 replies; 20+ messages in thread
From: Alice Ferrazzi @ 2016-10-08 11:37 UTC (permalink / raw
To: gentoo-commits
commit: 62792f42f1f905a8040f299e7ad492de6b237028
Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
AuthorDate: Sat Oct 8 11:35:32 2016 +0000
Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
CommitDate: Sat Oct 8 11:35:32 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=62792f42
Linux patch 4.7.7.
0000_README | 4 +
1006_linux-4.7.7.patch | 3811 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 3815 insertions(+)
diff --git a/0000_README b/0000_README
index 87048e9..01abfe8 100644
--- a/0000_README
+++ b/0000_README
@@ -67,6 +67,10 @@ Patch: 1005_linux-4.7.6.patch
From: http://www.kernel.org
Desc: Linux 4.7.6
+Patch: 1006_linux-4.7.7.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.7
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1006_linux-4.7.7.patch b/1006_linux-4.7.7.patch
new file mode 100644
index 0000000..3bee058
--- /dev/null
+++ b/1006_linux-4.7.7.patch
@@ -0,0 +1,3811 @@
+diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+index 46c6f3ed1a1c..0fa3b0fac129 100644
+--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
++++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+@@ -113,9 +113,9 @@ pm8916:
+ l14, l15, l16, l17, l18
+
+ pm8941:
+- s1, s2, s3, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14,
+- l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2, lvs3,
+- mvs1, mvs2
++ s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13,
++ l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2, lvs3,
++ 5vs1, 5vs2
+
+ pm8994:
+ s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, l1, l2, l3, l4, l5,
+diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
+index 4976389e432d..dd15a699ee1c 100644
+--- a/Documentation/pinctrl.txt
++++ b/Documentation/pinctrl.txt
+@@ -831,7 +831,7 @@ separate memory range only intended for GPIO driving, and the register
+ range dealing with pin config and pin multiplexing get placed into a
+ different memory range and a separate section of the data sheet.
+
+-A flag "strict" in struct pinctrl_desc is available to check and deny
++A flag "strict" in struct pinmux_ops is available to check and deny
+ simultaneous access to the same pin from GPIO and pin multiplexing
+ consumers on hardware of this type. The pinctrl driver should set this flag
+ accordingly.
+diff --git a/Makefile b/Makefile
+index 48b0120be59b..320a9308470b 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 6
++SUBLEVEL = 7
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
+index af11c2f8f3b7..fc6d541549a2 100644
+--- a/arch/arm/boot/compressed/head.S
++++ b/arch/arm/boot/compressed/head.S
+@@ -779,7 +779,7 @@ __armv7_mmu_cache_on:
+ orrne r0, r0, #1 @ MMU enabled
+ movne r1, #0xfffffffd @ domain 0 = client
+ bic r6, r6, #1 << 31 @ 32-bit translation system
+- bic r6, r6, #3 << 0 @ use only ttbr0
++ bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0
+ mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer
+ mcrne p15, 0, r1, c3, c0, 0 @ load domain access control
+ mcrne p15, 0, r6, c2, c0, 2 @ load ttb control
+diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts
+index 96ea936eeeb0..240a2864d044 100644
+--- a/arch/arm/boot/dts/imx6sx-sabreauto.dts
++++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts
+@@ -64,7 +64,7 @@
+ cd-gpios = <&gpio7 11 GPIO_ACTIVE_LOW>;
+ no-1-8-v;
+ keep-power-in-suspend;
+- enable-sdio-wakup;
++ wakeup-source;
+ status = "okay";
+ };
+
+diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
+index 263d46dbc7e6..2d78eee7cb55 100644
+--- a/arch/arm/boot/dts/sun5i-a13.dtsi
++++ b/arch/arm/boot/dts/sun5i-a13.dtsi
+@@ -84,7 +84,7 @@
+ trips {
+ cpu_alert0: cpu_alert0 {
+ /* milliCelsius */
+- temperature = <850000>;
++ temperature = <85000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
+index fb0a0a4dfea4..0e437189b095 100644
+--- a/arch/arm/common/sa1111.c
++++ b/arch/arm/common/sa1111.c
+@@ -869,9 +869,9 @@ struct sa1111_save_data {
+
+ #ifdef CONFIG_PM
+
+-static int sa1111_suspend(struct platform_device *dev, pm_message_t state)
++static int sa1111_suspend_noirq(struct device *dev)
+ {
+- struct sa1111 *sachip = platform_get_drvdata(dev);
++ struct sa1111 *sachip = dev_get_drvdata(dev);
+ struct sa1111_save_data *save;
+ unsigned long flags;
+ unsigned int val;
+@@ -934,9 +934,9 @@ static int sa1111_suspend(struct platform_device *dev, pm_message_t state)
+ * restored by their respective drivers, and must be called
+ * via LDM after this function.
+ */
+-static int sa1111_resume(struct platform_device *dev)
++static int sa1111_resume_noirq(struct device *dev)
+ {
+- struct sa1111 *sachip = platform_get_drvdata(dev);
++ struct sa1111 *sachip = dev_get_drvdata(dev);
+ struct sa1111_save_data *save;
+ unsigned long flags, id;
+ void __iomem *base;
+@@ -952,7 +952,7 @@ static int sa1111_resume(struct platform_device *dev)
+ id = sa1111_readl(sachip->base + SA1111_SKID);
+ if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
+ __sa1111_remove(sachip);
+- platform_set_drvdata(dev, NULL);
++ dev_set_drvdata(dev, NULL);
+ kfree(save);
+ return 0;
+ }
+@@ -1003,8 +1003,8 @@ static int sa1111_resume(struct platform_device *dev)
+ }
+
+ #else
+-#define sa1111_suspend NULL
+-#define sa1111_resume NULL
++#define sa1111_suspend_noirq NULL
++#define sa1111_resume_noirq NULL
+ #endif
+
+ static int sa1111_probe(struct platform_device *pdev)
+@@ -1038,6 +1038,11 @@ static int sa1111_remove(struct platform_device *pdev)
+ return 0;
+ }
+
++static struct dev_pm_ops sa1111_pm_ops = {
++ .suspend_noirq = sa1111_suspend_noirq,
++ .resume_noirq = sa1111_resume_noirq,
++};
++
+ /*
+ * Not sure if this should be on the system bus or not yet.
+ * We really want some way to register a system device at
+@@ -1050,10 +1055,9 @@ static int sa1111_remove(struct platform_device *pdev)
+ static struct platform_driver sa1111_device_driver = {
+ .probe = sa1111_probe,
+ .remove = sa1111_remove,
+- .suspend = sa1111_suspend,
+- .resume = sa1111_resume,
+ .driver = {
+ .name = "sa1111",
++ .pm = &sa1111_pm_ops,
+ },
+ };
+
+diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
+index a83570f10124..8ec98fc4eb2a 100644
+--- a/arch/arm/include/asm/dma-mapping.h
++++ b/arch/arm/include/asm/dma-mapping.h
+@@ -112,7 +112,7 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
+ /* The ARM override for dma_max_pfn() */
+ static inline unsigned long dma_max_pfn(struct device *dev)
+ {
+- return PHYS_PFN_OFFSET + dma_to_pfn(dev, *dev->dma_mask);
++ return dma_to_pfn(dev, *dev->dma_mask);
+ }
+ #define dma_max_pfn(dev) dma_max_pfn(dev)
+
+diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
+index 2e26016a91a5..5204978ec4fa 100644
+--- a/arch/arm/kernel/devtree.c
++++ b/arch/arm/kernel/devtree.c
+@@ -87,6 +87,8 @@ void __init arm_dt_init_cpu_maps(void)
+ return;
+
+ for_each_child_of_node(cpus, cpu) {
++ const __be32 *cell;
++ int prop_bytes;
+ u32 hwid;
+
+ if (of_node_cmp(cpu->type, "cpu"))
+@@ -98,7 +100,8 @@ void __init arm_dt_init_cpu_maps(void)
+ * properties is considered invalid to build the
+ * cpu_logical_map.
+ */
+- if (of_property_read_u32(cpu, "reg", &hwid)) {
++ cell = of_get_property(cpu, "reg", &prop_bytes);
++ if (!cell || prop_bytes < sizeof(*cell)) {
+ pr_debug(" * %s missing reg property\n",
+ cpu->full_name);
+ of_node_put(cpu);
+@@ -106,10 +109,15 @@ void __init arm_dt_init_cpu_maps(void)
+ }
+
+ /*
+- * 8 MSBs must be set to 0 in the DT since the reg property
++ * Bits n:24 must be set to 0 in the DT since the reg property
+ * defines the MPIDR[23:0].
+ */
+- if (hwid & ~MPIDR_HWID_BITMASK) {
++ do {
++ hwid = be32_to_cpu(*cell++);
++ prop_bytes -= sizeof(*cell);
++ } while (!hwid && prop_bytes > 0);
++
++ if (prop_bytes || (hwid & ~MPIDR_HWID_BITMASK)) {
+ of_node_put(cpu);
+ return;
+ }
+diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c
+index cbf53bb9c814..0db46895c82a 100644
+--- a/arch/arm/mach-sa1100/clock.c
++++ b/arch/arm/mach-sa1100/clock.c
+@@ -125,6 +125,8 @@ static unsigned long clk_36864_get_rate(struct clk *clk)
+ }
+
+ static struct clkops clk_36864_ops = {
++ .enable = clk_cpu_enable,
++ .disable = clk_cpu_disable,
+ .get_rate = clk_36864_get_rate,
+ };
+
+@@ -140,9 +142,8 @@ static struct clk_lookup sa11xx_clkregs[] = {
+ CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864),
+ };
+
+-static int __init sa11xx_clk_init(void)
++int __init sa11xx_clk_init(void)
+ {
+ clkdev_add_table(sa11xx_clkregs, ARRAY_SIZE(sa11xx_clkregs));
+ return 0;
+ }
+-core_initcall(sa11xx_clk_init);
+diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
+index 345e63f4eb71..3e09beddb6e8 100644
+--- a/arch/arm/mach-sa1100/generic.c
++++ b/arch/arm/mach-sa1100/generic.c
+@@ -34,6 +34,7 @@
+
+ #include <mach/hardware.h>
+ #include <mach/irqs.h>
++#include <mach/reset.h>
+
+ #include "generic.h"
+ #include <clocksource/pxa.h>
+@@ -95,6 +96,8 @@ static void sa1100_power_off(void)
+
+ void sa11x0_restart(enum reboot_mode mode, const char *cmd)
+ {
++ clear_reset_status(RESET_STATUS_ALL);
++
+ if (mode == REBOOT_SOFT) {
+ /* Jump into ROM at address 0 */
+ soft_restart(0);
+@@ -388,6 +391,7 @@ void __init sa1100_init_irq(void)
+ sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start);
+
+ sa1100_init_gpio();
++ sa11xx_clk_init();
+ }
+
+ /*
+diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
+index 0d92e119b36b..68199b603ff7 100644
+--- a/arch/arm/mach-sa1100/generic.h
++++ b/arch/arm/mach-sa1100/generic.h
+@@ -44,3 +44,5 @@ int sa11x0_pm_init(void);
+ #else
+ static inline int sa11x0_pm_init(void) { return 0; }
+ #endif
++
++int sa11xx_clk_init(void);
+diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
+index 62437b57813e..73e3adbc1330 100644
+--- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
++++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
+@@ -41,39 +41,26 @@
+
+ #define REGULATOR_IRQ_MASK BIT(2) /* IRQ2, active low */
+
+-static void __iomem *irqc;
+-
+-static const u8 da9063_mask_regs[] = {
+- DA9063_REG_IRQ_MASK_A,
+- DA9063_REG_IRQ_MASK_B,
+- DA9063_REG_IRQ_MASK_C,
+- DA9063_REG_IRQ_MASK_D,
+-};
+-
+-/* DA9210 System Control and Event Registers */
++/* start of DA9210 System Control and Event Registers */
+ #define DA9210_REG_MASK_A 0x54
+-#define DA9210_REG_MASK_B 0x55
+-
+-static const u8 da9210_mask_regs[] = {
+- DA9210_REG_MASK_A,
+- DA9210_REG_MASK_B,
+-};
+-
+-static void da9xxx_mask_irqs(struct i2c_client *client, const u8 regs[],
+- unsigned int nregs)
+-{
+- unsigned int i;
+
+- dev_info(&client->dev, "Masking %s interrupt sources\n", client->name);
++static void __iomem *irqc;
+
+- for (i = 0; i < nregs; i++) {
+- int error = i2c_smbus_write_byte_data(client, regs[i], ~0);
+- if (error) {
+- dev_err(&client->dev, "i2c error %d\n", error);
+- return;
+- }
+- }
+-}
++/* first byte sets the memory pointer, following are consecutive reg values */
++static u8 da9063_irq_clr[] = { DA9063_REG_IRQ_MASK_A, 0xff, 0xff, 0xff, 0xff };
++static u8 da9210_irq_clr[] = { DA9210_REG_MASK_A, 0xff, 0xff };
++
++static struct i2c_msg da9xxx_msgs[2] = {
++ {
++ .addr = 0x58,
++ .len = ARRAY_SIZE(da9063_irq_clr),
++ .buf = da9063_irq_clr,
++ }, {
++ .addr = 0x68,
++ .len = ARRAY_SIZE(da9210_irq_clr),
++ .buf = da9210_irq_clr,
++ },
++};
+
+ static int regulator_quirk_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+@@ -93,12 +80,15 @@ static int regulator_quirk_notify(struct notifier_block *nb,
+ client = to_i2c_client(dev);
+ dev_dbg(dev, "Detected %s\n", client->name);
+
+- if ((client->addr == 0x58 && !strcmp(client->name, "da9063")))
+- da9xxx_mask_irqs(client, da9063_mask_regs,
+- ARRAY_SIZE(da9063_mask_regs));
+- else if (client->addr == 0x68 && !strcmp(client->name, "da9210"))
+- da9xxx_mask_irqs(client, da9210_mask_regs,
+- ARRAY_SIZE(da9210_mask_regs));
++ if ((client->addr == 0x58 && !strcmp(client->name, "da9063")) ||
++ (client->addr == 0x68 && !strcmp(client->name, "da9210"))) {
++ int ret;
++
++ dev_info(&client->dev, "clearing da9063/da9210 interrupts\n");
++ ret = i2c_transfer(client->adapter, da9xxx_msgs, ARRAY_SIZE(da9xxx_msgs));
++ if (ret != ARRAY_SIZE(da9xxx_msgs))
++ dev_err(&client->dev, "i2c error %d\n", ret);
++ }
+
+ mon = ioread32(irqc + IRQC_MONITOR);
+ if (mon & REGULATOR_IRQ_MASK)
+diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
+index 0800d23e2fdd..b463607bc816 100644
+--- a/arch/arm64/kernel/debug-monitors.c
++++ b/arch/arm64/kernel/debug-monitors.c
+@@ -417,8 +417,10 @@ int kernel_active_single_step(void)
+ /* ptrace API */
+ void user_enable_single_step(struct task_struct *task)
+ {
+- set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
+- set_regs_spsr_ss(task_pt_regs(task));
++ struct thread_info *ti = task_thread_info(task);
++
++ if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP))
++ set_regs_spsr_ss(task_pt_regs(task));
+ }
+
+ void user_disable_single_step(struct task_struct *task)
+diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
+index 83c2a0021b56..13d3fc4270b7 100644
+--- a/arch/avr32/mach-at32ap/pio.c
++++ b/arch/avr32/mach-at32ap/pio.c
+@@ -435,7 +435,7 @@ void __init at32_init_pio(struct platform_device *pdev)
+ struct resource *regs;
+ struct pio_device *pio;
+
+- if (pdev->id > MAX_NR_PIO_DEVICES) {
++ if (pdev->id >= MAX_NR_PIO_DEVICES) {
+ dev_err(&pdev->dev, "only %d PIO devices supported\n",
+ MAX_NR_PIO_DEVICES);
+ return;
+diff --git a/arch/mips/include/asm/uprobes.h b/arch/mips/include/asm/uprobes.h
+index 34c325c674c4..70a4a2f173ff 100644
+--- a/arch/mips/include/asm/uprobes.h
++++ b/arch/mips/include/asm/uprobes.h
+@@ -36,7 +36,6 @@ struct arch_uprobe {
+ unsigned long resume_epc;
+ u32 insn[2];
+ u32 ixol[2];
+- union mips_instruction orig_inst[MAX_UINSN_BYTES / 4];
+ };
+
+ struct arch_uprobe_task {
+diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
+index 8452d933a645..4e7b89f2e244 100644
+--- a/arch/mips/kernel/uprobes.c
++++ b/arch/mips/kernel/uprobes.c
+@@ -157,7 +157,6 @@ bool is_trap_insn(uprobe_opcode_t *insn)
+ int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs)
+ {
+ struct uprobe_task *utask = current->utask;
+- union mips_instruction insn;
+
+ /*
+ * Now find the EPC where to resume after the breakpoint has been
+@@ -168,10 +167,10 @@ int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs)
+ unsigned long epc;
+
+ epc = regs->cp0_epc;
+- __compute_return_epc_for_insn(regs, insn);
++ __compute_return_epc_for_insn(regs,
++ (union mips_instruction) aup->insn[0]);
+ aup->resume_epc = regs->cp0_epc;
+ }
+-
+ utask->autask.saved_trap_nr = current->thread.trap_nr;
+ current->thread.trap_nr = UPROBE_TRAP_NR;
+ regs->cp0_epc = current->utask->xol_vaddr;
+@@ -257,7 +256,7 @@ unsigned long arch_uretprobe_hijack_return_addr(
+ ra = regs->regs[31];
+
+ /* Replace the return address with the trampoline address */
+- regs->regs[31] = ra;
++ regs->regs[31] = trampoline_vaddr;
+
+ return ra;
+ }
+@@ -280,24 +279,6 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+ }
+
+-/**
+- * set_orig_insn - Restore the original instruction.
+- * @mm: the probed process address space.
+- * @auprobe: arch specific probepoint information.
+- * @vaddr: the virtual address to insert the opcode.
+- *
+- * For mm @mm, restore the original opcode (opcode) at @vaddr.
+- * Return 0 (success) or a negative errno.
+- *
+- * This overrides the weak version in kernel/events/uprobes.c.
+- */
+-int set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+- unsigned long vaddr)
+-{
+- return uprobe_write_opcode(mm, vaddr,
+- *(uprobe_opcode_t *)&auprobe->orig_inst[0].word);
+-}
+-
+ void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+ void *src, unsigned long len)
+ {
+diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
+index 33d5ff5069e5..18b37a573eb4 100644
+--- a/arch/mips/mti-malta/malta-setup.c
++++ b/arch/mips/mti-malta/malta-setup.c
+@@ -39,6 +39,9 @@
+ #include <linux/console.h>
+ #endif
+
++#define ROCIT_CONFIG_GEN0 0x1f403000
++#define ROCIT_CONFIG_GEN0_PCI_IOCU BIT(7)
++
+ extern void malta_be_init(void);
+ extern int malta_be_handler(struct pt_regs *regs, int is_fixup);
+
+@@ -107,6 +110,8 @@ static void __init fd_activate(void)
+ static int __init plat_enable_iocoherency(void)
+ {
+ int supported = 0;
++ u32 cfg;
++
+ if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) {
+ if (BONITO_PCICACHECTRL & BONITO_PCICACHECTRL_CPUCOH_PRES) {
+ BONITO_PCICACHECTRL |= BONITO_PCICACHECTRL_CPUCOH_EN;
+@@ -129,7 +134,8 @@ static int __init plat_enable_iocoherency(void)
+ } else if (mips_cm_numiocu() != 0) {
+ /* Nothing special needs to be done to enable coherency */
+ pr_info("CMP IOCU detected\n");
+- if ((*(unsigned int *)0xbf403000 & 0x81) != 0x81) {
++ cfg = __raw_readl((u32 *)CKSEG1ADDR(ROCIT_CONFIG_GEN0));
++ if (!(cfg & ROCIT_CONFIG_GEN0_PCI_IOCU)) {
+ pr_crit("IOCU OPERATION DISABLED BY SWITCH - DEFAULTING TO SW IO COHERENCY\n");
+ return 0;
+ }
+diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
+index 6ee4b72cda42..e97925ffa6c7 100644
+--- a/arch/powerpc/kernel/prom_init.c
++++ b/arch/powerpc/kernel/prom_init.c
+@@ -695,7 +695,7 @@ unsigned char ibm_architecture_vec[] = {
+ OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
+
+ /* option vector 5: PAPR/OF options */
+- VECTOR_LENGTH(18), /* length */
++ VECTOR_LENGTH(21), /* length */
+ 0, /* don't ignore, don't halt */
+ OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
+ OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
+@@ -726,8 +726,11 @@ unsigned char ibm_architecture_vec[] = {
+ 0,
+ 0,
+ OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) |
+- OV5_FEAT(OV5_PFO_HW_842),
+- OV5_FEAT(OV5_SUB_PROCESSORS),
++ OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */
++ 0, /* Byte 18 */
++ 0, /* Byte 19 */
++ 0, /* Byte 20 */
++ OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */
+
+ /* option vector 6: IBM PAPR hints */
+ VECTOR_LENGTH(3), /* length */
+diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
+index c505d77e4d06..e9d54a06736f 100644
+--- a/arch/tile/include/asm/elf.h
++++ b/arch/tile/include/asm/elf.h
+@@ -129,6 +129,7 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
+ struct linux_binprm;
+ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack);
++/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+ #define ARCH_DLINFO \
+ do { \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
+diff --git a/arch/tile/include/uapi/asm/auxvec.h b/arch/tile/include/uapi/asm/auxvec.h
+index c93e92709f14..f497123ed980 100644
+--- a/arch/tile/include/uapi/asm/auxvec.h
++++ b/arch/tile/include/uapi/asm/auxvec.h
+@@ -18,4 +18,6 @@
+ /* The vDSO location. */
+ #define AT_SYSINFO_EHDR 33
+
++#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
++
+ #endif /* _ASM_TILE_AUXVEC_H */
+diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
+index 6fa85944af83..dee8a70382ba 100644
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+ /* Initialize cr4 shadow for this CPU. */
+ static inline void cr4_init_shadow(void)
+ {
+- this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
++ this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe());
+ }
+
+ /* Set in this cpu's CR4. */
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 0fe6953f421c..41f3f7dd6323 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -804,21 +804,20 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+ identify_cpu_without_cpuid(c);
+
+ /* cyrix could have cpuid enabled via c_identify()*/
+- if (!have_cpuid_p())
+- return;
++ if (have_cpuid_p()) {
++ cpu_detect(c);
++ get_cpu_vendor(c);
++ get_cpu_cap(c);
+
+- cpu_detect(c);
+- get_cpu_vendor(c);
+- get_cpu_cap(c);
+-
+- if (this_cpu->c_early_init)
+- this_cpu->c_early_init(c);
++ if (this_cpu->c_early_init)
++ this_cpu->c_early_init(c);
+
+- c->cpu_index = 0;
+- filter_cpuid_features(c, false);
++ c->cpu_index = 0;
++ filter_cpuid_features(c, false);
+
+- if (this_cpu->c_bsp_init)
+- this_cpu->c_bsp_init(c);
++ if (this_cpu->c_bsp_init)
++ this_cpu->c_bsp_init(c);
++ }
+
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+ fpu__init_system(c);
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 8326d6891395..9ed64d2b9ab1 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -409,6 +409,7 @@ struct nested_vmx {
+ struct list_head vmcs02_pool;
+ int vmcs02_num;
+ u64 vmcs01_tsc_offset;
++ bool change_vmcs01_virtual_x2apic_mode;
+ /* L2 must run next, and mustn't decide to exit to L1. */
+ bool nested_run_pending;
+ /*
+@@ -8284,6 +8285,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ {
+ u32 sec_exec_control;
+
++ /* Postpone execution until vmcs01 is the current VMCS. */
++ if (is_guest_mode(vcpu)) {
++ to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
++ return;
++ }
++
+ /*
+ * There is not point to enable virtualize x2apic without enable
+ * apicv
+@@ -10601,6 +10608,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+ /* Update TSC_OFFSET if TSC was changed while L2 ran */
+ vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+
++ if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
++ vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
++ vmx_set_virtual_x2apic_mode(vcpu,
++ vcpu->arch.apic_base & X2APIC_ENABLE);
++ }
++
+ /* This is needed for same reason as it was needed in prepare_vmcs02 */
+ vmx->host_rsp = 0;
+
+diff --git a/block/blk-mq.c b/block/blk-mq.c
+index f9b9049b1284..27dafb3a7a0e 100644
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -784,7 +784,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
+ switch (ret) {
+ case BLK_MQ_RQ_QUEUE_OK:
+ queued++;
+- continue;
++ break;
+ case BLK_MQ_RQ_QUEUE_BUSY:
+ list_add(&rq->queuelist, &rq_list);
+ __blk_mq_requeue_request(rq);
+diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
+index aa56af87d941..b11af3f2c1db 100644
+--- a/drivers/base/regmap/regcache-rbtree.c
++++ b/drivers/base/regmap/regcache-rbtree.c
+@@ -404,6 +404,7 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
+ unsigned int new_base_reg, new_top_reg;
+ unsigned int min, max;
+ unsigned int max_dist;
++ unsigned int dist, best_dist = UINT_MAX;
+
+ max_dist = map->reg_stride * sizeof(*rbnode_tmp) /
+ map->cache_word_size;
+@@ -423,24 +424,41 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
+ &base_reg, &top_reg);
+
+ if (base_reg <= max && top_reg >= min) {
+- new_base_reg = min(reg, base_reg);
+- new_top_reg = max(reg, top_reg);
+- } else {
+- if (max < base_reg)
+- node = node->rb_left;
++ if (reg < base_reg)
++ dist = base_reg - reg;
++ else if (reg > top_reg)
++ dist = reg - top_reg;
+ else
+- node = node->rb_right;
+-
+- continue;
++ dist = 0;
++ if (dist < best_dist) {
++ rbnode = rbnode_tmp;
++ best_dist = dist;
++ new_base_reg = min(reg, base_reg);
++ new_top_reg = max(reg, top_reg);
++ }
+ }
+
+- ret = regcache_rbtree_insert_to_block(map, rbnode_tmp,
++ /*
++ * Keep looking, we want to choose the closest block,
++ * otherwise we might end up creating overlapping
++ * blocks, which breaks the rbtree.
++ */
++ if (reg < base_reg)
++ node = node->rb_left;
++ else if (reg > top_reg)
++ node = node->rb_right;
++ else
++ break;
++ }
++
++ if (rbnode) {
++ ret = regcache_rbtree_insert_to_block(map, rbnode,
+ new_base_reg,
+ new_top_reg, reg,
+ value);
+ if (ret)
+ return ret;
+- rbtree_ctx->cached_rbnode = rbnode_tmp;
++ rbtree_ctx->cached_rbnode = rbnode;
+ return 0;
+ }
+
+diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
+index 8a1432e8bb80..01d4be2c354b 100644
+--- a/drivers/char/hw_random/omap-rng.c
++++ b/drivers/char/hw_random/omap-rng.c
+@@ -384,7 +384,12 @@ static int omap_rng_probe(struct platform_device *pdev)
+ }
+
+ pm_runtime_enable(&pdev->dev);
+- pm_runtime_get_sync(&pdev->dev);
++ ret = pm_runtime_get_sync(&pdev->dev);
++ if (ret) {
++ dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret);
++ pm_runtime_put_noidle(&pdev->dev);
++ goto err_ioremap;
++ }
+
+ ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) :
+ get_omap_rng_device_details(priv);
+@@ -435,8 +440,15 @@ static int __maybe_unused omap_rng_suspend(struct device *dev)
+ static int __maybe_unused omap_rng_resume(struct device *dev)
+ {
+ struct omap_rng_dev *priv = dev_get_drvdata(dev);
++ int ret;
++
++ ret = pm_runtime_get_sync(dev);
++ if (ret) {
++ dev_err(dev, "Failed to runtime_get device: %d\n", ret);
++ pm_runtime_put_noidle(dev);
++ return ret;
++ }
+
+- pm_runtime_get_sync(dev);
+ priv->pdata->init(priv);
+
+ return 0;
+diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
+index b28e4da3d2cf..ca2d2386a4eb 100644
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -703,7 +703,7 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value,
+
+ rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), desc);
+ if (!rc)
+- *value = cmd.params.get_tpm_pt_out.value;
++ *value = be32_to_cpu(cmd.params.get_tpm_pt_out.value);
+
+ return rc;
+ }
+diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
+index e9fd1d83f9f1..0f7ec0deed6c 100644
+--- a/drivers/char/tpm/tpm_crb.c
++++ b/drivers/char/tpm/tpm_crb.c
+@@ -77,7 +77,6 @@ enum crb_flags {
+
+ struct crb_priv {
+ unsigned int flags;
+- struct resource res;
+ void __iomem *iobase;
+ struct crb_control_area __iomem *cca;
+ u8 __iomem *cmd;
+@@ -224,19 +223,19 @@ static int crb_init(struct acpi_device *device, struct crb_priv *priv)
+
+ static int crb_check_resource(struct acpi_resource *ares, void *data)
+ {
+- struct crb_priv *priv = data;
++ struct resource *io_res = data;
+ struct resource res;
+
+ if (acpi_dev_resource_memory(ares, &res)) {
+- priv->res = res;
+- priv->res.name = NULL;
++ *io_res = res;
++ io_res->name = NULL;
+ }
+
+ return 1;
+ }
+
+ static void __iomem *crb_map_res(struct device *dev, struct crb_priv *priv,
+- u64 start, u32 size)
++ struct resource *io_res, u64 start, u32 size)
+ {
+ struct resource new_res = {
+ .start = start,
+@@ -248,51 +247,72 @@ static void __iomem *crb_map_res(struct device *dev, struct crb_priv *priv,
+ if (start != new_res.start)
+ return (void __iomem *) ERR_PTR(-EINVAL);
+
+- if (!resource_contains(&priv->res, &new_res))
++ if (!resource_contains(io_res, &new_res))
+ return devm_ioremap_resource(dev, &new_res);
+
+- return priv->iobase + (new_res.start - priv->res.start);
++ return priv->iobase + (new_res.start - io_res->start);
+ }
+
+ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
+ struct acpi_table_tpm2 *buf)
+ {
+ struct list_head resources;
++ struct resource io_res;
+ struct device *dev = &device->dev;
+- u64 pa;
++ u64 cmd_pa;
++ u32 cmd_size;
++ u64 rsp_pa;
++ u32 rsp_size;
+ int ret;
+
+ INIT_LIST_HEAD(&resources);
+ ret = acpi_dev_get_resources(device, &resources, crb_check_resource,
+- priv);
++ &io_res);
+ if (ret < 0)
+ return ret;
+ acpi_dev_free_resource_list(&resources);
+
+- if (resource_type(&priv->res) != IORESOURCE_MEM) {
++ if (resource_type(&io_res) != IORESOURCE_MEM) {
+ dev_err(dev,
+ FW_BUG "TPM2 ACPI table does not define a memory resource\n");
+ return -EINVAL;
+ }
+
+- priv->iobase = devm_ioremap_resource(dev, &priv->res);
++ priv->iobase = devm_ioremap_resource(dev, &io_res);
+ if (IS_ERR(priv->iobase))
+ return PTR_ERR(priv->iobase);
+
+- priv->cca = crb_map_res(dev, priv, buf->control_address, 0x1000);
++ priv->cca = crb_map_res(dev, priv, &io_res, buf->control_address,
++ sizeof(struct crb_control_area));
+ if (IS_ERR(priv->cca))
+ return PTR_ERR(priv->cca);
+
+- pa = ((u64) ioread32(&priv->cca->cmd_pa_high) << 32) |
+- (u64) ioread32(&priv->cca->cmd_pa_low);
+- priv->cmd = crb_map_res(dev, priv, pa, ioread32(&priv->cca->cmd_size));
++ cmd_pa = ((u64) ioread32(&priv->cca->cmd_pa_high) << 32) |
++ (u64) ioread32(&priv->cca->cmd_pa_low);
++ cmd_size = ioread32(&priv->cca->cmd_size);
++ priv->cmd = crb_map_res(dev, priv, &io_res, cmd_pa, cmd_size);
+ if (IS_ERR(priv->cmd))
+ return PTR_ERR(priv->cmd);
+
+- memcpy_fromio(&pa, &priv->cca->rsp_pa, 8);
+- pa = le64_to_cpu(pa);
+- priv->rsp = crb_map_res(dev, priv, pa, ioread32(&priv->cca->rsp_size));
+- return PTR_ERR_OR_ZERO(priv->rsp);
++ memcpy_fromio(&rsp_pa, &priv->cca->rsp_pa, 8);
++ rsp_pa = le64_to_cpu(rsp_pa);
++ rsp_size = ioread32(&priv->cca->rsp_size);
++
++ if (cmd_pa != rsp_pa) {
++ priv->rsp = crb_map_res(dev, priv, &io_res, rsp_pa, rsp_size);
++ return PTR_ERR_OR_ZERO(priv->rsp);
++ }
++
++ /* According to the PTP specification, overlapping command and response
++ * buffer sizes must be identical.
++ */
++ if (cmd_size != rsp_size) {
++ dev_err(dev, FW_BUG "overlapping command and response buffer sizes are not identical");
++ return -EINVAL;
++ }
++
++ priv->rsp = priv->cmd;
++ return 0;
+ }
+
+ static int crb_acpi_add(struct acpi_device *device)
+diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
+index 75bd6621dc5d..de9a3d7505a4 100644
+--- a/drivers/dma/at_xdmac.c
++++ b/drivers/dma/at_xdmac.c
+@@ -1195,8 +1195,8 @@ static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
+ desc->lld.mbr_cfg = chan_cc;
+
+ dev_dbg(chan2dev(chan),
+- "%s: lld: mbr_da=%pad, mbr_ds=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+- __func__, &desc->lld.mbr_da, &desc->lld.mbr_ds, desc->lld.mbr_ubc,
++ "%s: lld: mbr_da=%pad, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
++ __func__, &desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc,
+ desc->lld.mbr_cfg);
+
+ return desc;
+@@ -2067,7 +2067,7 @@ err_dma_unregister:
+ err_clk_disable:
+ clk_disable_unprepare(atxdmac->clk);
+ err_free_irq:
+- free_irq(atxdmac->irq, atxdmac->dma.dev);
++ free_irq(atxdmac->irq, atxdmac);
+ return ret;
+ }
+
+@@ -2081,7 +2081,7 @@ static int at_xdmac_remove(struct platform_device *pdev)
+ dma_async_device_unregister(&atxdmac->dma);
+ clk_disable_unprepare(atxdmac->clk);
+
+- free_irq(atxdmac->irq, atxdmac->dma.dev);
++ free_irq(atxdmac->irq, atxdmac);
+
+ for (i = 0; i < atxdmac->dma.chancnt; i++) {
+ struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
+index 6149b27c33ad..c8dd5b00c1b4 100644
+--- a/drivers/dma/bcm2835-dma.c
++++ b/drivers/dma/bcm2835-dma.c
+@@ -393,11 +393,12 @@ static void bcm2835_dma_fill_cb_chain_with_sg(
+ unsigned int sg_len)
+ {
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+- size_t max_len = bcm2835_dma_max_frame_length(c);
+- unsigned int i, len;
++ size_t len, max_len;
++ unsigned int i;
+ dma_addr_t addr;
+ struct scatterlist *sgent;
+
++ max_len = bcm2835_dma_max_frame_length(c);
+ for_each_sg(sgl, sgent, sg_len, i) {
+ for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent);
+ len > 0;
+diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c
+index 0c99e8fb9af3..8d8ee0ebf14c 100644
+--- a/drivers/gpio/gpio-sa1100.c
++++ b/drivers/gpio/gpio-sa1100.c
+@@ -155,7 +155,7 @@ static int sa1100_gpio_irqdomain_map(struct irq_domain *d,
+ {
+ irq_set_chip_and_handler(irq, &sa1100_gpio_irq_chip,
+ handle_edge_irq);
+- irq_set_noprobe(irq);
++ irq_set_probe(irq);
+
+ return 0;
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
+index edec30fd3ecd..0a7b6ed5ed28 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
+@@ -37,7 +37,10 @@ nv04_fifo_dma_object_dtor(struct nvkm_fifo_chan *base, int cookie)
+ {
+ struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
+ struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
++
++ mutex_lock(&chan->fifo->base.engine.subdev.mutex);
+ nvkm_ramht_remove(imem->ramht, cookie);
++ mutex_unlock(&chan->fifo->base.engine.subdev.mutex);
+ }
+
+ static int
+diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
+index e6abc09b67e3..1f78ec2548ec 100644
+--- a/drivers/gpu/drm/radeon/si_dpm.c
++++ b/drivers/gpu/drm/radeon/si_dpm.c
+@@ -3015,6 +3015,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
+ if (rdev->pdev->device == 0x6811 &&
+ rdev->pdev->revision == 0x81)
+ max_mclk = 120000;
++ /* limit sclk/mclk on Jet parts for stability */
++ if (rdev->pdev->device == 0x6665 &&
++ rdev->pdev->revision == 0xc3) {
++ max_sclk = 75000;
++ max_mclk = 80000;
++ }
+
+ if (rps->vce_active) {
+ rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
+diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
+index d5df555aeba0..cc2dabab270b 100644
+--- a/drivers/gpu/drm/udl/udl_fb.c
++++ b/drivers/gpu/drm/udl/udl_fb.c
+@@ -122,7 +122,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
+ return 0;
+ cmd = urb->transfer_buffer;
+
+- for (i = y; i < height ; i++) {
++ for (i = y; i < y + height ; i++) {
+ const int line_offset = fb->base.pitches[0] * i;
+ const int byte_offset = line_offset + (x * bpp);
+ const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp);
+diff --git a/drivers/hwmon/adt7411.c b/drivers/hwmon/adt7411.c
+index 827c03703128..a7f886961830 100644
+--- a/drivers/hwmon/adt7411.c
++++ b/drivers/hwmon/adt7411.c
+@@ -30,6 +30,7 @@
+
+ #define ADT7411_REG_CFG1 0x18
+ #define ADT7411_CFG1_START_MONITOR (1 << 0)
++#define ADT7411_CFG1_RESERVED_BIT3 (1 << 3)
+
+ #define ADT7411_REG_CFG2 0x19
+ #define ADT7411_CFG2_DISABLE_AVG (1 << 5)
+@@ -296,8 +297,10 @@ static int adt7411_probe(struct i2c_client *client,
+ mutex_init(&data->device_lock);
+ mutex_init(&data->update_lock);
+
++ /* According to the datasheet, we must only write 1 to bit 3 */
+ ret = adt7411_modify_bit(client, ADT7411_REG_CFG1,
+- ADT7411_CFG1_START_MONITOR, 1);
++ ADT7411_CFG1_RESERVED_BIT3
++ | ADT7411_CFG1_START_MONITOR, 1);
+ if (ret < 0)
+ return ret;
+
+diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c
+index 215ac87f606d..e999125ce37d 100644
+--- a/drivers/i2c/muxes/i2c-demux-pinctrl.c
++++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c
+@@ -37,8 +37,6 @@ struct i2c_demux_pinctrl_priv {
+ struct i2c_demux_pinctrl_chan chan[];
+ };
+
+-static struct property status_okay = { .name = "status", .length = 3, .value = "ok" };
+-
+ static int i2c_demux_master_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
+ {
+ struct i2c_demux_pinctrl_priv *priv = adap->algo_data;
+@@ -192,6 +190,7 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
+ {
+ struct device_node *np = pdev->dev.of_node;
+ struct i2c_demux_pinctrl_priv *priv;
++ struct property *props;
+ int num_chan, i, j, err;
+
+ num_chan = of_count_phandle_with_args(np, "i2c-parent", NULL);
+@@ -202,7 +201,10 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv)
+ + num_chan * sizeof(struct i2c_demux_pinctrl_chan), GFP_KERNEL);
+- if (!priv)
++
++ props = devm_kcalloc(&pdev->dev, num_chan, sizeof(*props), GFP_KERNEL);
++
++ if (!priv || !props)
+ return -ENOMEM;
+
+ err = of_property_read_string(np, "i2c-bus-name", &priv->bus_name);
+@@ -220,8 +222,12 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
+ }
+ priv->chan[i].parent_np = adap_np;
+
++ props[i].name = devm_kstrdup(&pdev->dev, "status", GFP_KERNEL);
++ props[i].value = devm_kstrdup(&pdev->dev, "ok", GFP_KERNEL);
++ props[i].length = 3;
++
+ of_changeset_init(&priv->chan[i].chgset);
+- of_changeset_update_property(&priv->chan[i].chgset, adap_np, &status_okay);
++ of_changeset_update_property(&priv->chan[i].chgset, adap_np, &props[i]);
+ }
+
+ priv->num_chan = num_chan;
+diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
+index ad1b1adcf6f0..8b5abc4ce202 100644
+--- a/drivers/infiniband/core/cma.c
++++ b/drivers/infiniband/core/cma.c
+@@ -2452,18 +2452,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
+
+ if (addr->dev_addr.bound_dev_if) {
+ ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+- if (!ndev)
+- return -ENODEV;
++ if (!ndev) {
++ ret = -ENODEV;
++ goto err2;
++ }
+
+ if (ndev->flags & IFF_LOOPBACK) {
+ dev_put(ndev);
+- if (!id_priv->id.device->get_netdev)
+- return -EOPNOTSUPP;
++ if (!id_priv->id.device->get_netdev) {
++ ret = -EOPNOTSUPP;
++ goto err2;
++ }
+
+ ndev = id_priv->id.device->get_netdev(id_priv->id.device,
+ id_priv->id.port_num);
+- if (!ndev)
+- return -ENODEV;
++ if (!ndev) {
++ ret = -ENODEV;
++ goto err2;
++ }
+ }
+
+ route->path_rec->net = &init_net;
+diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
+index a83ec28a147b..365cb53cc8b2 100644
+--- a/drivers/infiniband/core/multicast.c
++++ b/drivers/infiniband/core/multicast.c
+@@ -118,7 +118,6 @@ struct mcast_group {
+ atomic_t refcount;
+ enum mcast_group_state state;
+ struct ib_sa_query *query;
+- int query_id;
+ u16 pkey_index;
+ u8 leave_state;
+ int retries;
+@@ -352,11 +351,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member)
+ member->multicast.comp_mask,
+ 3000, GFP_KERNEL, join_handler, group,
+ &group->query);
+- if (ret >= 0) {
+- group->query_id = ret;
+- ret = 0;
+- }
+- return ret;
++ return (ret > 0) ? 0 : ret;
+ }
+
+ static int send_leave(struct mcast_group *group, u8 leave_state)
+@@ -376,11 +371,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state)
+ IB_SA_MCMEMBER_REC_JOIN_STATE,
+ 3000, GFP_KERNEL, leave_handler,
+ group, &group->query);
+- if (ret >= 0) {
+- group->query_id = ret;
+- ret = 0;
+- }
+- return ret;
++ return (ret > 0) ? 0 : ret;
+ }
+
+ static void join_group(struct mcast_group *group, struct mcast_member *member,
+diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
+index b738acdb9b02..8ec09e470f84 100644
+--- a/drivers/infiniband/hw/i40iw/i40iw.h
++++ b/drivers/infiniband/hw/i40iw/i40iw.h
+@@ -232,7 +232,7 @@ struct i40iw_device {
+ struct i40e_client *client;
+ struct i40iw_hw hw;
+ struct i40iw_cm_core cm_core;
+- unsigned long *mem_resources;
++ u8 *mem_resources;
+ unsigned long *allocated_qps;
+ unsigned long *allocated_cqs;
+ unsigned long *allocated_mrs;
+@@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
+ *next = resource_num + 1;
+ if (*next == max_resources)
+ *next = 0;
+- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+ *req_resource_num = resource_num;
++ spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+
+ return 0;
+ }
+diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
+index d2fa72516960..406a345227c3 100644
+--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
+@@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
+ buf += hdr_len;
+ }
+
+- if (pd_len)
+- memcpy(buf, pdata->addr, pd_len);
++ if (pdata && pdata->addr)
++ memcpy(buf, pdata->addr, pdata->size);
+
+ atomic_set(&sqbuf->refcount, 1);
+
+@@ -3347,26 +3347,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp)
+ }
+
+ /**
+- * i40iw_loopback_nop - Send a nop
+- * @qp: associated hw qp
+- */
+-static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
+-{
+- u64 *wqe;
+- u64 header;
+-
+- wqe = qp->qp_uk.sq_base->elem;
+- set_64bit_val(wqe, 0, 0);
+- set_64bit_val(wqe, 8, 0);
+- set_64bit_val(wqe, 16, 0);
+-
+- header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+- LS_64(0, I40IWQPSQ_SIGCOMPL) |
+- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+- set_64bit_val(wqe, 24, header);
+-}
+-
+-/**
+ * i40iw_qp_disconnect - free qp and close cm
+ * @iwqp: associate qp for the connection
+ */
+@@ -3638,7 +3618,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ } else {
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+- i40iw_loopback_nop(&iwqp->sc_qp);
++ dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
+ }
+
+ if (iwqp->page)
+diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
+index 3ee0cad96bc6..0c92a40b3e86 100644
+--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
+@@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp,
+ info.dont_send_fin = false;
+ if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
+ info.reset_tcp_conn = true;
++ iwqp->hw_iwarp_state = state;
+ i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
+ }
+
+diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
+index 6e9081380a27..445e230d5ff8 100644
+--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
+@@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = {
+ .notifier_call = i40iw_net_event
+ };
+
+-static int i40iw_notifiers_registered;
++static atomic_t i40iw_notifiers_registered;
+
+ /**
+ * i40iw_find_i40e_handler - find a handler given a client info
+@@ -1342,12 +1342,11 @@ exit:
+ */
+ static void i40iw_register_notifiers(void)
+ {
+- if (!i40iw_notifiers_registered) {
++ if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
+ register_inetaddr_notifier(&i40iw_inetaddr_notifier);
+ register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+ register_netevent_notifier(&i40iw_net_notifier);
+ }
+- i40iw_notifiers_registered++;
+ }
+
+ /**
+@@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del
+ i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
+ /* fallthrough */
+ case INET_NOTIFIER:
+- if (i40iw_notifiers_registered > 0) {
+- i40iw_notifiers_registered--;
++ if (!atomic_dec_return(&i40iw_notifiers_registered)) {
+ unregister_netevent_notifier(&i40iw_net_notifier);
+ unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
+ unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+@@ -1558,6 +1556,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
+ enum i40iw_status_code status;
+ struct i40iw_handler *hdl;
+
++ hdl = i40iw_find_netdev(ldev->netdev);
++ if (hdl)
++ return 0;
++
+ hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
+ if (!hdl)
+ return -ENOMEM;
+diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
+index 0e8db0a35141..6fd043b1d714 100644
+--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
+@@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
+ {
+ if (!mem)
+ return I40IW_ERR_PARAM;
++ /*
++ * mem->va points to the parent of mem, so both mem and mem->va
++ * can not be touched once mem->va is freed
++ */
+ kfree(mem->va);
+- mem->va = NULL;
+ return 0;
+ }
+
+diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+index 283b64c942ee..f24bfbdddfaa 100644
+--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+@@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
+ return &iwqp->ibqp;
+ error:
+ i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+- kfree(mem);
+ return ERR_PTR(err_code);
+ }
+
+@@ -1925,8 +1924,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
+ }
+ if (iwpbl->pbl_allocated)
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+- kfree(iwpbl->iwmr);
+- iwpbl->iwmr = NULL;
++ kfree(iwmr);
+ return 0;
+ }
+
+diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
+index 9c2e53d28f98..0f21c3a25552 100644
+--- a/drivers/infiniband/hw/mlx4/mad.c
++++ b/drivers/infiniband/hw/mlx4/mad.c
+@@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work)
+
+ /* Generate GUID changed event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
++ if (mlx4_is_master(dev->dev)) {
++ union ib_gid gid;
++ int err = 0;
++
++ if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
++ err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
++ else
++ gid.global.subnet_prefix =
++ eqe->event.port_mgmt_change.params.port_info.gid_prefix;
++ if (err) {
++ pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
++ port, err);
++ } else {
++ pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
++ port,
++ (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
++ be64_to_cpu(gid.global.subnet_prefix));
++ atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
++ be64_to_cpu(gid.global.subnet_prefix));
++ }
++ }
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+ /*if master, notify all slaves*/
+ if (mlx4_is_master(dev->dev))
+@@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
+ if (err)
+ goto demux_err;
+ dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
++ atomic64_set(&dev->sriov.demux[i].subnet_prefix,
++ be64_to_cpu(gid.global.subnet_prefix));
+ err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
+ &dev->sriov.sqps[i]);
+ if (err)
+diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
+index 8f7ad07915b0..097bfcc4ee99 100644
+--- a/drivers/infiniband/hw/mlx4/mcg.c
++++ b/drivers/infiniband/hw/mlx4/mcg.c
+@@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group)
+ if (!group->members[i])
+ leave_state |= (1 << i);
+
+- return leave_state & (group->rec.scope_join_state & 7);
++ return leave_state & (group->rec.scope_join_state & 0xf);
+ }
+
+ static int join_group(struct mcast_group *group, int slave, u8 join_mask)
+@@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
+ } else
+ mcg_warn_group(group, "DRIVER BUG\n");
+ } else if (group->state == MCAST_LEAVE_SENT) {
+- if (group->rec.scope_join_state & 7)
+- group->rec.scope_join_state &= 0xf8;
++ if (group->rec.scope_join_state & 0xf)
++ group->rec.scope_join_state &= 0xf0;
+ group->state = MCAST_IDLE;
+ mutex_unlock(&group->lock);
+ if (release_group(group, 1))
+@@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
+ static int handle_join_req(struct mcast_group *group, u8 join_mask,
+ struct mcast_req *req)
+ {
+- u8 group_join_state = group->rec.scope_join_state & 7;
++ u8 group_join_state = group->rec.scope_join_state & 0xf;
+ int ref = 0;
+ u16 status;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
+@@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
+ u8 cur_join_state;
+
+ resp_join_state = ((struct ib_sa_mcmember_data *)
+- group->response_sa_mad.data)->scope_join_state & 7;
+- cur_join_state = group->rec.scope_join_state & 7;
++ group->response_sa_mad.data)->scope_join_state & 0xf;
++ cur_join_state = group->rec.scope_join_state & 0xf;
+
+ if (method == IB_MGMT_METHOD_GET_RESP) {
+ /* successfull join */
+@@ -710,7 +710,7 @@ process_requests:
+ req = list_first_entry(&group->pending_list, struct mcast_req,
+ group_list);
+ sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
+- req_join_state = sa_data->scope_join_state & 0x7;
++ req_join_state = sa_data->scope_join_state & 0xf;
+
+ /* For a leave request, we will immediately answer the VF, and
+ * update our internal counters. The actual leave will be sent
+diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
+index 29acda249612..d4ca38103cec 100644
+--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
++++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
+@@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx {
+ struct workqueue_struct *wq;
+ struct workqueue_struct *ud_wq;
+ spinlock_t ud_lock;
+- __be64 subnet_prefix;
++ atomic64_t subnet_prefix;
+ __be64 guid_cache[128];
+ struct mlx4_ib_dev *dev;
+ /* the following lock protects both mcg_table and mcg_mgid0_list */
+diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
+index 8db8405c1e99..d995222e661f 100644
+--- a/drivers/infiniband/hw/mlx4/qp.c
++++ b/drivers/infiniband/hw/mlx4/qp.c
+@@ -2492,24 +2492,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
+ sqp->ud_header.grh.flow_label =
+ ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
+- if (is_eth)
++ if (is_eth) {
+ memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
+- else {
+- if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+- /* When multi-function is enabled, the ib_core gid
+- * indexes don't necessarily match the hw ones, so
+- * we must use our own cache */
+- sqp->ud_header.grh.source_gid.global.subnet_prefix =
+- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+- subnet_prefix;
+- sqp->ud_header.grh.source_gid.global.interface_id =
+- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+- guid_cache[ah->av.ib.gid_index];
+- } else
+- ib_get_cached_gid(ib_dev,
+- be32_to_cpu(ah->av.ib.port_pd) >> 24,
+- ah->av.ib.gid_index,
+- &sqp->ud_header.grh.source_gid, NULL);
++ } else {
++ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
++ /* When multi-function is enabled, the ib_core gid
++ * indexes don't necessarily match the hw ones, so
++ * we must use our own cache
++ */
++ sqp->ud_header.grh.source_gid.global.subnet_prefix =
++ cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
++ demux[sqp->qp.port - 1].
++ subnet_prefix)));
++ sqp->ud_header.grh.source_gid.global.interface_id =
++ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
++ guid_cache[ah->av.ib.gid_index];
++ } else {
++ ib_get_cached_gid(ib_dev,
++ be32_to_cpu(ah->av.ib.port_pd) >> 24,
++ ah->av.ib.gid_index,
++ &sqp->ud_header.grh.source_gid, NULL);
++ }
+ }
+ memcpy(sqp->ud_header.grh.destination_gid.raw,
+ ah->av.ib.dgid, 16);
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index b48ad85315dc..c90f57579a92 100644
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -287,7 +287,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+
+ static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
+ {
+- return !MLX5_CAP_GEN(dev->mdev, ib_virt);
++ if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
++ return !MLX5_CAP_GEN(dev->mdev, ib_virt);
++ return 0;
+ }
+
+ enum {
+@@ -1275,6 +1277,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
++ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
++ smac_47_16),
++ ib_spec->eth.mask.src_mac);
++ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
++ smac_47_16),
++ ib_spec->eth.val.src_mac);
++
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ vlan_tag, 1);
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
+index 4f7d9b48df64..9dbfcc0ab577 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib.h
++++ b/drivers/infiniband/ulp/ipoib/ipoib.h
+@@ -478,6 +478,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ipoib_ah *address, u32 qpn);
+ void ipoib_reap_ah(struct work_struct *work);
+
++struct ipoib_path *__path_find(struct net_device *dev, void *gid);
+ void ipoib_mark_paths_invalid(struct net_device *dev);
+ void ipoib_flush_paths(struct net_device *dev);
+ int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+index 951d9abcca8b..4ad297d3de89 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+@@ -1318,6 +1318,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
+ }
+ }
+
++#define QPN_AND_OPTIONS_OFFSET 4
++
+ static void ipoib_cm_tx_start(struct work_struct *work)
+ {
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+@@ -1326,6 +1328,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
+ struct ipoib_neigh *neigh;
+ struct ipoib_cm_tx *p;
+ unsigned long flags;
++ struct ipoib_path *path;
+ int ret;
+
+ struct ib_sa_path_rec pathrec;
+@@ -1338,7 +1341,19 @@ static void ipoib_cm_tx_start(struct work_struct *work)
+ p = list_entry(priv->cm.start_list.next, typeof(*p), list);
+ list_del_init(&p->list);
+ neigh = p->neigh;
++
+ qpn = IPOIB_QPN(neigh->daddr);
++ /*
++ * As long as the search is with these 2 locks,
++ * path existence indicates its validity.
++ */
++ path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
++ if (!path) {
++ pr_info("%s ignore not valid path %pI6\n",
++ __func__,
++ neigh->daddr + QPN_AND_OPTIONS_OFFSET);
++ goto free_neigh;
++ }
+ memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+@@ -1350,6 +1365,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (ret) {
++free_neigh:
+ neigh = p->neigh;
+ if (neigh) {
+ neigh->cm = NULL;
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+index dc6d241b9406..be11d5d5b8c1 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+@@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
+ }
+
+ if (level == IPOIB_FLUSH_LIGHT) {
++ int oper_up;
+ ipoib_mark_paths_invalid(dev);
++ /* Set IPoIB operation as down to prevent races between:
++ * the flush flow which leaves MCG and on the fly joins
++ * which can happen during that time. mcast restart task
++ * should deal with join requests we missed.
++ */
++ oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
+ ipoib_mcast_dev_flush(dev);
++ if (oper_up)
++ set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
+ ipoib_flush_ah(dev);
+ }
+
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+index 5f58c41ef787..ddebda53e147 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -485,7 +485,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
+ return -EINVAL;
+ }
+
+-static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
++struct ipoib_path *__path_find(struct net_device *dev, void *gid)
+ {
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct rb_node *n = priv->path_tree.rb_node;
+diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
+index 2c5ba0e704bf..53891c34fe19 100644
+--- a/drivers/irqchip/irq-gic-v3.c
++++ b/drivers/irqchip/irq-gic-v3.c
+@@ -558,7 +558,7 @@ static struct notifier_block gic_cpu_notifier = {
+ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
+ unsigned long cluster_id)
+ {
+- int cpu = *base_cpu;
++ int next_cpu, cpu = *base_cpu;
+ unsigned long mpidr = cpu_logical_map(cpu);
+ u16 tlist = 0;
+
+@@ -572,9 +572,10 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
+
+ tlist |= 1 << (mpidr & 0xf);
+
+- cpu = cpumask_next(cpu, mask);
+- if (cpu >= nr_cpu_ids)
++ next_cpu = cpumask_next(cpu, mask);
++ if (next_cpu >= nr_cpu_ids)
+ goto out;
++ cpu = next_cpu;
+
+ mpidr = cpu_logical_map(cpu);
+
+diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c
+index 58d04726cdd7..9ca96e9db6bf 100644
+--- a/drivers/mailbox/mailbox-test.c
++++ b/drivers/mailbox/mailbox-test.c
+@@ -133,6 +133,7 @@ static ssize_t mbox_test_message_write(struct file *filp,
+ out:
+ kfree(tdev->signal);
+ kfree(tdev->message);
++ tdev->signal = NULL;
+
+ return ret < 0 ? ret : count;
+ }
+diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
+index d8f8cc85f96c..dd230752b08a 100644
+--- a/drivers/md/dm-log-writes.c
++++ b/drivers/md/dm-log-writes.c
+@@ -259,7 +259,7 @@ static int log_one_block(struct log_writes_c *lc,
+ sector++;
+
+ atomic_inc(&lc->io_blocks);
+- bio = bio_alloc(GFP_KERNEL, block->vec_cnt);
++ bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
+ if (!bio) {
+ DMERR("Couldn't alloc log bio");
+ goto error;
+@@ -280,7 +280,7 @@ static int log_one_block(struct log_writes_c *lc,
+ if (ret != block->vecs[i].bv_len) {
+ atomic_inc(&lc->io_blocks);
+ submit_bio(WRITE, bio);
+- bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i);
++ bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
+ if (!bio) {
+ DMERR("Couldn't alloc log bio");
+ goto error;
+diff --git a/drivers/media/platform/vsp1/vsp1_pipe.c b/drivers/media/platform/vsp1/vsp1_pipe.c
+index 4f3b4a1d028a..3c8f40bd8b9d 100644
+--- a/drivers/media/platform/vsp1/vsp1_pipe.c
++++ b/drivers/media/platform/vsp1/vsp1_pipe.c
+@@ -172,13 +172,17 @@ void vsp1_pipeline_reset(struct vsp1_pipeline *pipe)
+ bru->inputs[i].rpf = NULL;
+ }
+
+- for (i = 0; i < pipe->num_inputs; ++i) {
+- pipe->inputs[i]->pipe = NULL;
+- pipe->inputs[i] = NULL;
++ for (i = 0; i < ARRAY_SIZE(pipe->inputs); ++i) {
++ if (pipe->inputs[i]) {
++ pipe->inputs[i]->pipe = NULL;
++ pipe->inputs[i] = NULL;
++ }
+ }
+
+- pipe->output->pipe = NULL;
+- pipe->output = NULL;
++ if (pipe->output) {
++ pipe->output->pipe = NULL;
++ pipe->output = NULL;
++ }
+
+ INIT_LIST_HEAD(&pipe->entities);
+ pipe->state = VSP1_PIPELINE_STOPPED;
+diff --git a/drivers/media/usb/em28xx/em28xx-i2c.c b/drivers/media/usb/em28xx/em28xx-i2c.c
+index a19b5c8b56ff..1a9e1e556706 100644
+--- a/drivers/media/usb/em28xx/em28xx-i2c.c
++++ b/drivers/media/usb/em28xx/em28xx-i2c.c
+@@ -507,9 +507,8 @@ static int em28xx_i2c_xfer(struct i2c_adapter *i2c_adap,
+ if (dev->disconnected)
+ return -ENODEV;
+
+- rc = rt_mutex_trylock(&dev->i2c_bus_lock);
+- if (rc < 0)
+- return rc;
++ if (!rt_mutex_trylock(&dev->i2c_bus_lock))
++ return -EAGAIN;
+
+ /* Switch I2C bus if needed */
+ if (bus != dev->cur_i2c_bus &&
+diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c
+index f23df4a9d8c5..52b88e9e656b 100644
+--- a/drivers/media/usb/gspca/cpia1.c
++++ b/drivers/media/usb/gspca/cpia1.c
+@@ -1624,7 +1624,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
+
+ static void sd_stopN(struct gspca_dev *gspca_dev)
+ {
+- struct sd *sd = (struct sd *) gspca_dev;
++ struct sd *sd __maybe_unused = (struct sd *) gspca_dev;
+
+ command_pause(gspca_dev);
+
+diff --git a/drivers/media/usb/gspca/konica.c b/drivers/media/usb/gspca/konica.c
+index 39c96bb4c985..0712b1bc90b4 100644
+--- a/drivers/media/usb/gspca/konica.c
++++ b/drivers/media/usb/gspca/konica.c
+@@ -243,7 +243,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
+
+ static void sd_stopN(struct gspca_dev *gspca_dev)
+ {
+- struct sd *sd = (struct sd *) gspca_dev;
++ struct sd *sd __maybe_unused = (struct sd *) gspca_dev;
+
+ konica_stream_off(gspca_dev);
+ #if IS_ENABLED(CONFIG_INPUT)
+diff --git a/drivers/media/usb/gspca/t613.c b/drivers/media/usb/gspca/t613.c
+index e2cc4e5a0ccb..bb52fc1fe598 100644
+--- a/drivers/media/usb/gspca/t613.c
++++ b/drivers/media/usb/gspca/t613.c
+@@ -837,7 +837,7 @@ static void sd_pkt_scan(struct gspca_dev *gspca_dev,
+ u8 *data, /* isoc packet */
+ int len) /* iso packet length */
+ {
+- struct sd *sd = (struct sd *) gspca_dev;
++ struct sd *sd __maybe_unused = (struct sd *) gspca_dev;
+ int pkt_type;
+
+ if (data[0] == 0x5a) {
+diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
+index bc8d0b9870eb..df651935ed6f 100644
+--- a/drivers/misc/cxl/guest.c
++++ b/drivers/misc/cxl/guest.c
+@@ -1052,16 +1052,18 @@ static void free_adapter(struct cxl *adapter)
+ struct irq_avail *cur;
+ int i;
+
+- if (adapter->guest->irq_avail) {
+- for (i = 0; i < adapter->guest->irq_nranges; i++) {
+- cur = &adapter->guest->irq_avail[i];
+- kfree(cur->bitmap);
++ if (adapter->guest) {
++ if (adapter->guest->irq_avail) {
++ for (i = 0; i < adapter->guest->irq_nranges; i++) {
++ cur = &adapter->guest->irq_avail[i];
++ kfree(cur->bitmap);
++ }
++ kfree(adapter->guest->irq_avail);
+ }
+- kfree(adapter->guest->irq_avail);
++ kfree(adapter->guest->status);
++ kfree(adapter->guest);
+ }
+- kfree(adapter->guest->status);
+ cxl_remove_adapter_nr(adapter);
+- kfree(adapter->guest);
+ kfree(adapter);
+ }
+
+diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
+index bcc0de47fe7e..bd1199825f9f 100644
+--- a/drivers/mmc/host/sdhci-tegra.c
++++ b/drivers/mmc/host/sdhci-tegra.c
+@@ -148,28 +148,37 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
+ return;
+
+ misc_ctrl = sdhci_readl(host, SDHCI_TEGRA_VENDOR_MISC_CTRL);
+- /* Erratum: Enable SDHCI spec v3.00 support */
+- if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300)
+- misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300;
+- /* Advertise UHS modes as supported by host */
+- if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR50)
+- misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR50;
+- else
+- misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR50;
+- if (soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
+- misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_DDR50;
+- else
+- misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_DDR50;
+- if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR104)
+- misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR104;
+- else
+- misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR104;
+- sdhci_writel(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
+-
+ clk_ctrl = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
++
++ misc_ctrl &= ~(SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300 |
++ SDHCI_MISC_CTRL_ENABLE_SDR50 |
++ SDHCI_MISC_CTRL_ENABLE_DDR50 |
++ SDHCI_MISC_CTRL_ENABLE_SDR104);
++
+ clk_ctrl &= ~SDHCI_CLOCK_CTRL_SPI_MODE_CLKEN_OVERRIDE;
+- if (soc_data->nvquirks & SDHCI_MISC_CTRL_ENABLE_SDR50)
+- clk_ctrl |= SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE;
++
++ /*
++ * If the board does not define a regulator for the SDHCI
++ * IO voltage, then don't advertise support for UHS modes
++ * even if the device supports it because the IO voltage
++ * cannot be configured.
++ */
++ if (!IS_ERR(host->mmc->supply.vqmmc)) {
++ /* Erratum: Enable SDHCI spec v3.00 support */
++ if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300)
++ misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300;
++ /* Advertise UHS modes as supported by host */
++ if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR50)
++ misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR50;
++ if (soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
++ misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_DDR50;
++ if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR104)
++ misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR104;
++ if (soc_data->nvquirks & SDHCI_MISC_CTRL_ENABLE_SDR50)
++ clk_ctrl |= SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE;
++ }
++
++ sdhci_writel(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
+ sdhci_writel(host, clk_ctrl, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
+
+ if (soc_data->nvquirks & NVQUIRK_HAS_PADCALIB)
+diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
+index cc07ba0f044d..27fa8b87cd5f 100644
+--- a/drivers/mtd/nand/davinci_nand.c
++++ b/drivers/mtd/nand/davinci_nand.c
+@@ -240,6 +240,9 @@ static void nand_davinci_hwctl_4bit(struct mtd_info *mtd, int mode)
+ unsigned long flags;
+ u32 val;
+
++ /* Reset ECC hardware */
++ davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET);
++
+ spin_lock_irqsave(&davinci_nand_lock, flags);
+
+ /* Start 4-bit ECC calculation for read/write */
+diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
+index ad535a854e5c..eab132778e67 100644
+--- a/drivers/net/can/dev.c
++++ b/drivers/net/can/dev.c
+@@ -21,6 +21,7 @@
+ #include <linux/slab.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
++#include <linux/workqueue.h>
+ #include <linux/can.h>
+ #include <linux/can/dev.h>
+ #include <linux/can/skb.h>
+@@ -471,9 +472,8 @@ EXPORT_SYMBOL_GPL(can_free_echo_skb);
+ /*
+ * CAN device restart for bus-off recovery
+ */
+-static void can_restart(unsigned long data)
++static void can_restart(struct net_device *dev)
+ {
+- struct net_device *dev = (struct net_device *)data;
+ struct can_priv *priv = netdev_priv(dev);
+ struct net_device_stats *stats = &dev->stats;
+ struct sk_buff *skb;
+@@ -513,6 +513,14 @@ restart:
+ netdev_err(dev, "Error %d during restart", err);
+ }
+
++static void can_restart_work(struct work_struct *work)
++{
++ struct delayed_work *dwork = to_delayed_work(work);
++ struct can_priv *priv = container_of(dwork, struct can_priv, restart_work);
++
++ can_restart(priv->dev);
++}
++
+ int can_restart_now(struct net_device *dev)
+ {
+ struct can_priv *priv = netdev_priv(dev);
+@@ -526,8 +534,8 @@ int can_restart_now(struct net_device *dev)
+ if (priv->state != CAN_STATE_BUS_OFF)
+ return -EBUSY;
+
+- /* Runs as soon as possible in the timer context */
+- mod_timer(&priv->restart_timer, jiffies);
++ cancel_delayed_work_sync(&priv->restart_work);
++ can_restart(dev);
+
+ return 0;
+ }
+@@ -548,8 +556,8 @@ void can_bus_off(struct net_device *dev)
+ netif_carrier_off(dev);
+
+ if (priv->restart_ms)
+- mod_timer(&priv->restart_timer,
+- jiffies + (priv->restart_ms * HZ) / 1000);
++ schedule_delayed_work(&priv->restart_work,
++ msecs_to_jiffies(priv->restart_ms));
+ }
+ EXPORT_SYMBOL_GPL(can_bus_off);
+
+@@ -658,6 +666,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
+ return NULL;
+
+ priv = netdev_priv(dev);
++ priv->dev = dev;
+
+ if (echo_skb_max) {
+ priv->echo_skb_max = echo_skb_max;
+@@ -667,7 +676,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
+
+ priv->state = CAN_STATE_STOPPED;
+
+- init_timer(&priv->restart_timer);
++ INIT_DELAYED_WORK(&priv->restart_work, can_restart_work);
+
+ return dev;
+ }
+@@ -748,8 +757,6 @@ int open_candev(struct net_device *dev)
+ if (!netif_carrier_ok(dev))
+ netif_carrier_on(dev);
+
+- setup_timer(&priv->restart_timer, can_restart, (unsigned long)dev);
+-
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(open_candev);
+@@ -764,7 +771,7 @@ void close_candev(struct net_device *dev)
+ {
+ struct can_priv *priv = netdev_priv(dev);
+
+- del_timer_sync(&priv->restart_timer);
++ cancel_delayed_work_sync(&priv->restart_work);
+ can_flush_echo_skb(dev);
+ }
+ EXPORT_SYMBOL_GPL(close_candev);
+diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+index 9c0d87503977..9b5195435c87 100644
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+@@ -983,9 +983,10 @@ void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir)
+ /* generate a new table if we weren't given one */
+ for (j = 0; j < 4; j++) {
+ if (indir)
+- n = indir[i + j];
++ n = indir[4 * i + j];
+ else
+- n = ethtool_rxfh_indir_default(i + j, rss_i);
++ n = ethtool_rxfh_indir_default(4 * i + j,
++ rss_i);
+
+ table[j] = n;
+ }
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
+index 422b41d61c9a..d5cb570d67b7 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
+@@ -297,13 +297,15 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
+ void *buffer, u16 buf_len)
+ {
+ struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
+- u16 len = le16_to_cpu(aq_desc->datalen);
++ u16 len;
+ u8 *buf = (u8 *)buffer;
+ u16 i = 0;
+
+ if ((!(mask & hw->debug_mask)) || (desc == NULL))
+ return;
+
++ len = le16_to_cpu(aq_desc->datalen);
++
+ i40e_debug(hw, mask,
+ "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+ le16_to_cpu(aq_desc->opcode),
+diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
+index 813cdd2621a1..d3f8b9f7756b 100644
+--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
++++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
+@@ -1524,7 +1524,7 @@ static void ath10k_htt_rx_h_filter(struct ath10k *ar,
+ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
+ {
+ struct ath10k *ar = htt->ar;
+- static struct ieee80211_rx_status rx_status;
++ struct ieee80211_rx_status *rx_status = &htt->rx_status;
+ struct sk_buff_head amsdu;
+ int ret;
+
+@@ -1548,11 +1548,11 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
+ return ret;
+ }
+
+- ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff);
++ ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff);
+ ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0);
+- ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status);
+- ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status);
+- ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status);
++ ath10k_htt_rx_h_filter(ar, &amsdu, rx_status);
++ ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status);
++ ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status);
+
+ return 0;
+ }
+diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+index dec1a317a070..e2083f4d2d48 100644
+--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
++++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+@@ -4176,7 +4176,7 @@ static void ath9k_hw_ar9300_set_board_values(struct ath_hw *ah,
+ if (!AR_SREV_9330(ah) && !AR_SREV_9340(ah) && !AR_SREV_9531(ah))
+ ar9003_hw_internal_regulator_apply(ah);
+ ar9003_hw_apply_tuning_caps(ah);
+- ar9003_hw_apply_minccapwr_thresh(ah, chan);
++ ar9003_hw_apply_minccapwr_thresh(ah, is2ghz);
+ ar9003_hw_txend_to_xpa_off_apply(ah, is2ghz);
+ ar9003_hw_thermometer_apply(ah);
+ ar9003_hw_thermo_cal_apply(ah);
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+index c7550dab6a23..570c80a5419d 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+@@ -722,8 +722,10 @@ int brcmf_sdiod_recv_chain(struct brcmf_sdio_dev *sdiodev,
+ return -ENOMEM;
+ err = brcmf_sdiod_buffrw(sdiodev, SDIO_FUNC_2, false, addr,
+ glom_skb);
+- if (err)
++ if (err) {
++ brcmu_pkt_buf_free_skb(glom_skb);
+ goto done;
++ }
+
+ skb_queue_walk(pktq, skb) {
+ memcpy(skb->data, glom_skb->data, skb->len);
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c
+index 796f5f9d5d5a..b7df576bb84d 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/dma.c
+@@ -1079,8 +1079,10 @@ bool dma_rxfill(struct dma_pub *pub)
+
+ pa = dma_map_single(di->dmadev, p->data, di->rxbufsize,
+ DMA_FROM_DEVICE);
+- if (dma_mapping_error(di->dmadev, pa))
++ if (dma_mapping_error(di->dmadev, pa)) {
++ brcmu_pkt_buf_free_skb(p);
+ return false;
++ }
+
+ /* save the free packet pointer */
+ di->rxp[rxout] = p;
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
+index dd9162722495..0ab865de1491 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
+@@ -87,7 +87,7 @@ void
+ brcms_c_stf_ss_algo_channel_get(struct brcms_c_info *wlc, u16 *ss_algo_channel,
+ u16 chanspec)
+ {
+- struct tx_power power;
++ struct tx_power power = { };
+ u8 siso_mcs_id, cdd_mcs_id, stbc_mcs_id;
+
+ /* Clear previous settings */
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
+index e1b6b2c665eb..46b52bf705fb 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
+@@ -288,7 +288,8 @@ static void iwl_mvm_dump_fifos(struct iwl_mvm *mvm,
+ fifo_hdr->fifo_num = cpu_to_le32(i);
+
+ /* Mark the number of TXF we're pulling now */
+- iwl_trans_write_prph(mvm->trans, TXF_CPU2_NUM, i);
++ iwl_trans_write_prph(mvm->trans, TXF_CPU2_NUM, i +
++ ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size));
+
+ fifo_hdr->available_bytes =
+ cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+@@ -959,5 +960,6 @@ int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id)
+ }
+
+ mvm->fw_dbg_conf = conf_id;
+- return ret;
++
++ return 0;
+ }
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+index 7057f35cb2e7..fb71cf6910df 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+@@ -139,17 +139,21 @@ void iwl_free_fw_paging(struct iwl_mvm *mvm)
+ return;
+
+ for (i = 0; i < NUM_OF_FW_PAGING_BLOCKS; i++) {
+- if (!mvm->fw_paging_db[i].fw_paging_block) {
++ struct iwl_fw_paging *paging = &mvm->fw_paging_db[i];
++
++ if (!paging->fw_paging_block) {
+ IWL_DEBUG_FW(mvm,
+ "Paging: block %d already freed, continue to next page\n",
+ i);
+
+ continue;
+ }
++ dma_unmap_page(mvm->trans->dev, paging->fw_paging_phys,
++ paging->fw_paging_size, DMA_BIDIRECTIONAL);
+
+- __free_pages(mvm->fw_paging_db[i].fw_paging_block,
+- get_order(mvm->fw_paging_db[i].fw_paging_size));
+- mvm->fw_paging_db[i].fw_paging_block = NULL;
++ __free_pages(paging->fw_paging_block,
++ get_order(paging->fw_paging_size));
++ paging->fw_paging_block = NULL;
+ }
+ kfree(mvm->trans->paging_download_buf);
+ mvm->trans->paging_download_buf = NULL;
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+index 25a98401a64f..0551a4bb163c 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+@@ -667,8 +667,7 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
+ .mcc = cpu_to_le16(alpha2[0] << 8 | alpha2[1]),
+ .source_id = (u8)src_id,
+ };
+- struct iwl_mcc_update_resp *mcc_resp, *resp_cp = NULL;
+- struct iwl_mcc_update_resp_v1 *mcc_resp_v1 = NULL;
++ struct iwl_mcc_update_resp *resp_cp;
+ struct iwl_rx_packet *pkt;
+ struct iwl_host_cmd cmd = {
+ .id = MCC_UPDATE_CMD,
+@@ -701,34 +700,36 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
+
+ /* Extract MCC response */
+ if (resp_v2) {
+- mcc_resp = (void *)pkt->data;
++ struct iwl_mcc_update_resp *mcc_resp = (void *)pkt->data;
++
+ n_channels = __le32_to_cpu(mcc_resp->n_channels);
++ resp_len = sizeof(struct iwl_mcc_update_resp) +
++ n_channels * sizeof(__le32);
++ resp_cp = kmemdup(mcc_resp, resp_len, GFP_KERNEL);
+ } else {
+- mcc_resp_v1 = (void *)pkt->data;
++ struct iwl_mcc_update_resp_v1 *mcc_resp_v1 = (void *)pkt->data;
++
+ n_channels = __le32_to_cpu(mcc_resp_v1->n_channels);
++ resp_len = sizeof(struct iwl_mcc_update_resp) +
++ n_channels * sizeof(__le32);
++ resp_cp = kzalloc(resp_len, GFP_KERNEL);
++
++ if (resp_cp) {
++ resp_cp->status = mcc_resp_v1->status;
++ resp_cp->mcc = mcc_resp_v1->mcc;
++ resp_cp->cap = mcc_resp_v1->cap;
++ resp_cp->source_id = mcc_resp_v1->source_id;
++ resp_cp->n_channels = mcc_resp_v1->n_channels;
++ memcpy(resp_cp->channels, mcc_resp_v1->channels,
++ n_channels * sizeof(__le32));
++ }
+ }
+
+- resp_len = sizeof(struct iwl_mcc_update_resp) + n_channels *
+- sizeof(__le32);
+-
+- resp_cp = kzalloc(resp_len, GFP_KERNEL);
+ if (!resp_cp) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+- if (resp_v2) {
+- memcpy(resp_cp, mcc_resp, resp_len);
+- } else {
+- resp_cp->status = mcc_resp_v1->status;
+- resp_cp->mcc = mcc_resp_v1->mcc;
+- resp_cp->cap = mcc_resp_v1->cap;
+- resp_cp->source_id = mcc_resp_v1->source_id;
+- resp_cp->n_channels = mcc_resp_v1->n_channels;
+- memcpy(resp_cp->channels, mcc_resp_v1->channels,
+- n_channels * sizeof(__le32));
+- }
+-
+ status = le32_to_cpu(resp_cp->status);
+
+ mcc = le16_to_cpu(resp_cp->mcc);
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sf.c b/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
+index 443a42855c9e..101fb04a8573 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sf.c
+@@ -215,7 +215,7 @@ static int iwl_mvm_sf_config(struct iwl_mvm *mvm, u8 sta_id,
+ enum iwl_sf_state new_state)
+ {
+ struct iwl_sf_cfg_cmd sf_cmd = {
+- .state = cpu_to_le32(SF_FULL_ON),
++ .state = cpu_to_le32(new_state),
+ };
+ struct ieee80211_sta *sta;
+ int ret = 0;
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+index b23ab4a4504f..1822ad374be3 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+@@ -1374,11 +1374,12 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
+ */
+ WARN_ON(rcu_access_pointer(mvm->baid_map[baid]));
+ rcu_assign_pointer(mvm->baid_map[baid], baid_data);
+- } else if (mvm->rx_ba_sessions > 0) {
++ } else {
+ u8 baid = mvm_sta->tid_to_baid[tid];
+
+- /* check that restart flow didn't zero the counter */
+- mvm->rx_ba_sessions--;
++ if (mvm->rx_ba_sessions > 0)
++ /* check that restart flow didn't zero the counter */
++ mvm->rx_ba_sessions--;
+ if (!iwl_mvm_has_new_rx_api(mvm))
+ return 0;
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+index b92b75fea92f..6b8d9a5ab855 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+@@ -138,28 +138,19 @@ static void iwl_mvm_tx_csum(struct iwl_mvm *mvm, struct sk_buff *skb,
+
+ protocol = ipv6h->nexthdr;
+ while (protocol != NEXTHDR_NONE && ipv6_ext_hdr(protocol)) {
++ struct ipv6_opt_hdr *hp;
++
+ /* only supported extension headers */
+ if (protocol != NEXTHDR_ROUTING &&
+ protocol != NEXTHDR_HOP &&
+- protocol != NEXTHDR_DEST &&
+- protocol != NEXTHDR_FRAGMENT) {
++ protocol != NEXTHDR_DEST) {
+ skb_checksum_help(skb);
+ return;
+ }
+
+- if (protocol == NEXTHDR_FRAGMENT) {
+- struct frag_hdr *hp =
+- OPT_HDR(struct frag_hdr, skb, off);
+-
+- protocol = hp->nexthdr;
+- off += sizeof(struct frag_hdr);
+- } else {
+- struct ipv6_opt_hdr *hp =
+- OPT_HDR(struct ipv6_opt_hdr, skb, off);
+-
+- protocol = hp->nexthdr;
+- off += ipv6_optlen(hp);
+- }
++ hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
++ protocol = hp->nexthdr;
++ off += ipv6_optlen(hp);
+ }
+ /* if we get here - protocol now should be TCP/UDP */
+ #endif
+@@ -1312,7 +1303,15 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
+ bool send_eosp_ndp = false;
+
+ spin_lock_bh(&mvmsta->lock);
+- txq_agg = (mvmsta->tid_data[tid].state == IWL_AGG_ON);
++ if (iwl_mvm_is_dqa_supported(mvm)) {
++ enum iwl_mvm_agg_state state;
++
++ state = mvmsta->tid_data[tid].state;
++ txq_agg = (state == IWL_AGG_ON ||
++ state == IWL_EMPTYING_HW_QUEUE_DELBA);
++ } else {
++ txq_agg = txq_id >= mvm->first_agg_queue;
++ }
+
+ if (!is_ndp) {
+ tid_data->next_reclaimed = next_reclaimed;
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+index d6beac9af029..dec63a0aef6b 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+@@ -1595,9 +1595,9 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
+
+ /* start the TFD with the scratchbuf */
+ scratch_size = min_t(int, copy_size, IWL_HCMD_SCRATCHBUF_SIZE);
+- memcpy(&txq->scratchbufs[q->write_ptr], &out_cmd->hdr, scratch_size);
++ memcpy(&txq->scratchbufs[idx], &out_cmd->hdr, scratch_size);
+ iwl_pcie_txq_build_tfd(trans, txq,
+- iwl_pcie_get_scratchbuf_dma(txq, q->write_ptr),
++ iwl_pcie_get_scratchbuf_dma(txq, idx),
+ scratch_size, true);
+
+ /* map first command fragment, if any remains */
+diff --git a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
+index 666e91af59d7..bf5660eb27d3 100644
+--- a/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
++++ b/drivers/net/wireless/marvell/mwifiex/uap_txrx.c
+@@ -272,7 +272,7 @@ int mwifiex_handle_uap_rx_forward(struct mwifiex_private *priv,
+ int mwifiex_uap_recv_packet(struct mwifiex_private *priv,
+ struct sk_buff *skb)
+ {
+- struct mwifiex_adapter *adapter = adapter;
++ struct mwifiex_adapter *adapter = priv->adapter;
+ struct mwifiex_sta_node *src_node;
+ struct ethhdr *p_ethhdr;
+ struct sk_buff *skb_uap;
+diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
+index e44a7a2f4061..16427420b1a2 100644
+--- a/drivers/nfc/fdp/fdp.c
++++ b/drivers/nfc/fdp/fdp.c
+@@ -353,7 +353,7 @@ static int fdp_nci_patch_otp(struct nci_dev *ndev)
+ {
+ struct fdp_nci_info *info = nci_get_drvdata(ndev);
+ struct device *dev = &info->phy->i2c_dev->dev;
+- u8 conn_id;
++ int conn_id;
+ int r = 0;
+
+ if (info->otp_version >= info->otp_patch_version)
+@@ -424,7 +424,7 @@ static int fdp_nci_patch_ram(struct nci_dev *ndev)
+ {
+ struct fdp_nci_info *info = nci_get_drvdata(ndev);
+ struct device *dev = &info->phy->i2c_dev->dev;
+- u8 conn_id;
++ int conn_id;
+ int r = 0;
+
+ if (info->ram_version >= info->ram_patch_version)
+diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
+index 489ea1098c96..69b5e811ea2b 100644
+--- a/drivers/pcmcia/ds.c
++++ b/drivers/pcmcia/ds.c
+@@ -977,7 +977,7 @@ static int pcmcia_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+
+ /************************ runtime PM support ***************************/
+
+-static int pcmcia_dev_suspend(struct device *dev, pm_message_t state);
++static int pcmcia_dev_suspend(struct device *dev);
+ static int pcmcia_dev_resume(struct device *dev);
+
+ static int runtime_suspend(struct device *dev)
+@@ -985,7 +985,7 @@ static int runtime_suspend(struct device *dev)
+ int rc;
+
+ device_lock(dev);
+- rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND);
++ rc = pcmcia_dev_suspend(dev);
+ device_unlock(dev);
+ return rc;
+ }
+@@ -1135,7 +1135,7 @@ ATTRIBUTE_GROUPS(pcmcia_dev);
+
+ /* PM support, also needed for reset */
+
+-static int pcmcia_dev_suspend(struct device *dev, pm_message_t state)
++static int pcmcia_dev_suspend(struct device *dev)
+ {
+ struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
+ struct pcmcia_driver *p_drv = NULL;
+@@ -1410,6 +1410,9 @@ static struct class_interface pcmcia_bus_interface __refdata = {
+ .remove_dev = &pcmcia_bus_remove_socket,
+ };
+
++static const struct dev_pm_ops pcmcia_bus_pm_ops = {
++ SET_SYSTEM_SLEEP_PM_OPS(pcmcia_dev_suspend, pcmcia_dev_resume)
++};
+
+ struct bus_type pcmcia_bus_type = {
+ .name = "pcmcia",
+@@ -1418,8 +1421,7 @@ struct bus_type pcmcia_bus_type = {
+ .dev_groups = pcmcia_dev_groups,
+ .probe = pcmcia_device_probe,
+ .remove = pcmcia_device_remove,
+- .suspend = pcmcia_dev_suspend,
+- .resume = pcmcia_dev_resume,
++ .pm = &pcmcia_bus_pm_ops,
+ };
+
+
+diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
+index 140436a046c0..5824045fab46 100644
+--- a/drivers/perf/arm_pmu.c
++++ b/drivers/perf/arm_pmu.c
+@@ -921,6 +921,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
+ if (i > 0 && spi != using_spi) {
+ pr_err("PPI/SPI IRQ type mismatch for %s!\n",
+ dn->name);
++ of_node_put(dn);
+ kfree(irqs);
+ return -EINVAL;
+ }
+diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
+index 967400971d45..5d08de0b13f5 100644
+--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
++++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
+@@ -73,6 +73,12 @@ static void uniphier_pctl_pin_dbg_show(struct pinctrl_dev *pctldev,
+ case UNIPHIER_PIN_PULL_DOWN:
+ pull_dir = "DOWN";
+ break;
++ case UNIPHIER_PIN_PULL_UP_FIXED:
++ pull_dir = "UP(FIXED)";
++ break;
++ case UNIPHIER_PIN_PULL_DOWN_FIXED:
++ pull_dir = "DOWN(FIXED)";
++ break;
+ case UNIPHIER_PIN_PULL_NONE:
+ pull_dir = "NONE";
+ break;
+diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c
+index fafa3488e960..36e0c930ab49 100644
+--- a/drivers/regulator/pwm-regulator.c
++++ b/drivers/regulator/pwm-regulator.c
+@@ -132,6 +132,7 @@ static int pwm_regulator_set_voltage(struct regulator_dev *rdev,
+ unsigned int duty_pulse;
+ u64 req_period;
+ u32 rem;
++ int old_uV = pwm_regulator_get_voltage(rdev);
+ int ret;
+
+ pwm_get_args(drvdata->pwm, &pargs);
+@@ -166,8 +167,12 @@ static int pwm_regulator_set_voltage(struct regulator_dev *rdev,
+ }
+ drvdata->volt_uV = min_uV;
+
+- /* Delay required by PWM regulator to settle to the new voltage */
+- usleep_range(ramp_delay, ramp_delay + 1000);
++ if ((ramp_delay == 0) || !pwm_regulator_is_enabled(rdev))
++ return 0;
++
++ /* Ramp delay is in uV/uS. Adjust to uS and delay */
++ ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay);
++ usleep_range(ramp_delay, ramp_delay + DIV_ROUND_UP(ramp_delay, 10));
+
+ return 0;
+ }
+diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
+index 6c7fe4778793..891ae44a49c2 100644
+--- a/drivers/regulator/qcom_smd-regulator.c
++++ b/drivers/regulator/qcom_smd-regulator.c
+@@ -178,20 +178,21 @@ static const struct regulator_desc pma8084_hfsmps = {
+ static const struct regulator_desc pma8084_ftsmps = {
+ .linear_ranges = (struct regulator_linear_range[]) {
+ REGULATOR_LINEAR_RANGE(350000, 0, 184, 5000),
+- REGULATOR_LINEAR_RANGE(700000, 185, 339, 10000),
++ REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
+ },
+ .n_linear_ranges = 2,
+- .n_voltages = 340,
++ .n_voltages = 262,
+ .ops = &rpm_smps_ldo_ops,
+ };
+
+ static const struct regulator_desc pma8084_pldo = {
+ .linear_ranges = (struct regulator_linear_range[]) {
+- REGULATOR_LINEAR_RANGE(750000, 0, 30, 25000),
+- REGULATOR_LINEAR_RANGE(1500000, 31, 99, 50000),
++ REGULATOR_LINEAR_RANGE( 750000, 0, 63, 12500),
++ REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
++ REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
+ },
+- .n_linear_ranges = 2,
+- .n_voltages = 100,
++ .n_linear_ranges = 3,
++ .n_voltages = 164,
+ .ops = &rpm_smps_ldo_ops,
+ };
+
+@@ -221,29 +222,30 @@ static const struct regulator_desc pm8x41_hfsmps = {
+ static const struct regulator_desc pm8841_ftsmps = {
+ .linear_ranges = (struct regulator_linear_range[]) {
+ REGULATOR_LINEAR_RANGE(350000, 0, 184, 5000),
+- REGULATOR_LINEAR_RANGE(700000, 185, 339, 10000),
++ REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
+ },
+ .n_linear_ranges = 2,
+- .n_voltages = 340,
++ .n_voltages = 262,
+ .ops = &rpm_smps_ldo_ops,
+ };
+
+ static const struct regulator_desc pm8941_boost = {
+ .linear_ranges = (struct regulator_linear_range[]) {
+- REGULATOR_LINEAR_RANGE(4000000, 0, 15, 100000),
++ REGULATOR_LINEAR_RANGE(4000000, 0, 30, 50000),
+ },
+ .n_linear_ranges = 1,
+- .n_voltages = 16,
++ .n_voltages = 31,
+ .ops = &rpm_smps_ldo_ops,
+ };
+
+ static const struct regulator_desc pm8941_pldo = {
+ .linear_ranges = (struct regulator_linear_range[]) {
+- REGULATOR_LINEAR_RANGE( 750000, 0, 30, 25000),
+- REGULATOR_LINEAR_RANGE(1500000, 31, 99, 50000),
++ REGULATOR_LINEAR_RANGE( 750000, 0, 63, 12500),
++ REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
++ REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
+ },
+- .n_linear_ranges = 2,
+- .n_voltages = 100,
++ .n_linear_ranges = 3,
++ .n_voltages = 164,
+ .ops = &rpm_smps_ldo_ops,
+ };
+
+diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c
+index 84cce21e98cd..16c5f84e06a7 100644
+--- a/drivers/regulator/qcom_spmi-regulator.c
++++ b/drivers/regulator/qcom_spmi-regulator.c
+@@ -1085,6 +1085,8 @@ static struct regulator_ops spmi_vs_ops = {
+ .set_pull_down = spmi_regulator_common_set_pull_down,
+ .set_soft_start = spmi_regulator_common_set_soft_start,
+ .set_over_current_protection = spmi_regulator_vs_ocp,
++ .set_mode = spmi_regulator_common_set_mode,
++ .get_mode = spmi_regulator_common_get_mode,
+ };
+
+ static struct regulator_ops spmi_boost_ops = {
+@@ -1496,6 +1498,7 @@ static const struct spmi_regulator_data pm8941_regulators[] = {
+ { "s1", 0x1400, "vdd_s1", },
+ { "s2", 0x1700, "vdd_s2", },
+ { "s3", 0x1a00, "vdd_s3", },
++ { "s4", 0xa000, },
+ { "l1", 0x4000, "vdd_l1_l3", },
+ { "l2", 0x4100, "vdd_l2_lvs_1_2_3", },
+ { "l3", 0x4200, "vdd_l1_l3", },
+@@ -1523,8 +1526,8 @@ static const struct spmi_regulator_data pm8941_regulators[] = {
+ { "lvs1", 0x8000, "vdd_l2_lvs_1_2_3", },
+ { "lvs2", 0x8100, "vdd_l2_lvs_1_2_3", },
+ { "lvs3", 0x8200, "vdd_l2_lvs_1_2_3", },
+- { "mvs1", 0x8300, "vin_5vs", },
+- { "mvs2", 0x8400, "vin_5vs", },
++ { "5vs1", 0x8300, "vin_5vs", "ocp-5vs1", },
++ { "5vs2", 0x8400, "vin_5vs", "ocp-5vs2", },
+ { }
+ };
+
+diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
+index 67669a9e73c1..f3a33312a9a6 100644
+--- a/drivers/scsi/fnic/fnic_fcs.c
++++ b/drivers/scsi/fnic/fnic_fcs.c
+@@ -954,8 +954,8 @@ int fnic_alloc_rq_frame(struct vnic_rq *rq)
+ skb_put(skb, len);
+ pa = pci_map_single(fnic->pdev, skb->data, len, PCI_DMA_FROMDEVICE);
+
+- r = pci_dma_mapping_error(fnic->pdev, pa);
+- if (r) {
++ if (pci_dma_mapping_error(fnic->pdev, pa)) {
++ r = -ENOMEM;
+ printk(KERN_ERR "PCI mapping failed with error %d\n", r);
+ goto free_skb;
+ }
+@@ -1093,8 +1093,8 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp)
+
+ pa = pci_map_single(fnic->pdev, eth_hdr, tot_len, PCI_DMA_TODEVICE);
+
+- ret = pci_dma_mapping_error(fnic->pdev, pa);
+- if (ret) {
++ if (pci_dma_mapping_error(fnic->pdev, pa)) {
++ ret = -ENOMEM;
+ printk(KERN_ERR "DMA map failed with error %d\n", ret);
+ goto free_skb_on_err;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
+index 4c0f3a774799..8a2368b32dec 100644
+--- a/drivers/scsi/qla2xxx/qla_fw.h
++++ b/drivers/scsi/qla2xxx/qla_fw.h
+@@ -1288,7 +1288,7 @@ struct vp_rpt_id_entry_24xx {
+
+ uint8_t vp_idx_map[16];
+
+- uint8_t reserved_4[28];
++ uint8_t reserved_4[24];
+ uint16_t bbcr;
+ uint8_t reserved_5[6];
+ };
+diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
+index 53ef1cb6418e..1d82053a6a0c 100644
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -587,7 +587,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev,
+
+ ses_enclosure_data_process(edev, to_scsi_device(edev->edev.parent), 0);
+
+- if (is_sas_attached(sdev))
++ if (scsi_is_sas_rphy(&sdev->sdev_gendev))
+ efd.addr = sas_get_address(sdev);
+
+ if (efd.addr) {
+diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
+index a7934ab00b96..d22de4c8c399 100644
+--- a/drivers/spi/spi-sh-msiof.c
++++ b/drivers/spi/spi-sh-msiof.c
+@@ -263,6 +263,9 @@ static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
+
+ for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_div_table); k++) {
+ brps = DIV_ROUND_UP(div, sh_msiof_spi_div_table[k].div);
++ /* SCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */
++ if (sh_msiof_spi_div_table[k].div == 1 && brps > 2)
++ continue;
+ if (brps <= 32) /* max of brdv is 32 */
+ break;
+ }
+diff --git a/drivers/staging/comedi/drivers/adv_pci1760.c b/drivers/staging/comedi/drivers/adv_pci1760.c
+index d7dd1e55e347..9f525ff7290c 100644
+--- a/drivers/staging/comedi/drivers/adv_pci1760.c
++++ b/drivers/staging/comedi/drivers/adv_pci1760.c
+@@ -196,6 +196,7 @@ static int pci1760_pwm_ns_to_div(unsigned int flags, unsigned int ns)
+ break;
+ case CMDF_ROUND_DOWN:
+ divisor = ns / PCI1760_PWM_TIMEBASE;
++ break;
+ default:
+ return -EINVAL;
+ }
+diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c
+index 0c1a77cafe14..4c281df16816 100644
+--- a/drivers/staging/fbtft/fbtft-core.c
++++ b/drivers/staging/fbtft/fbtft-core.c
+@@ -391,11 +391,11 @@ static void fbtft_update_display(struct fbtft_par *par, unsigned start_line,
+
+ if (unlikely(timeit)) {
+ ts_end = ktime_get();
+- if (ktime_to_ns(par->update_time))
++ if (!ktime_to_ns(par->update_time))
+ par->update_time = ts_start;
+
+- par->update_time = ts_start;
+ fps = ktime_us_delta(ts_start, par->update_time);
++ par->update_time = ts_start;
+ fps = fps ? 1000000 / fps : 0;
+
+ throughput = ktime_us_delta(ts_end, ts_start);
+diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
+index 917a55c4480d..ffe9f8875311 100644
+--- a/drivers/usb/class/usbtmc.c
++++ b/drivers/usb/class/usbtmc.c
+@@ -141,6 +141,7 @@ static void usbtmc_delete(struct kref *kref)
+ struct usbtmc_device_data *data = to_usbtmc_data(kref);
+
+ usb_put_dev(data->usb_dev);
++ kfree(data);
+ }
+
+ static int usbtmc_open(struct inode *inode, struct file *filp)
+@@ -1379,7 +1380,7 @@ static int usbtmc_probe(struct usb_interface *intf,
+
+ dev_dbg(&intf->dev, "%s called\n", __func__);
+
+- data = devm_kzalloc(&intf->dev, sizeof(*data), GFP_KERNEL);
++ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c
+index cf8819a5c5b2..8bb011ea78f7 100644
+--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
++++ b/drivers/usb/gadget/udc/fsl_qe_udc.c
+@@ -1878,11 +1878,8 @@ static int qe_get_frame(struct usb_gadget *gadget)
+
+ tmp = in_be16(&udc->usb_param->frame_n);
+ if (tmp & 0x8000)
+- tmp = tmp & 0x07ff;
+- else
+- tmp = -EINVAL;
+-
+- return (int)tmp;
++ return tmp & 0x07ff;
++ return -EINVAL;
+ }
+
+ static int fsl_qe_start(struct usb_gadget *gadget,
+diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
+index 7771be3ac178..4dd531ac5a7f 100644
+--- a/drivers/usb/misc/legousbtower.c
++++ b/drivers/usb/misc/legousbtower.c
+@@ -898,24 +898,6 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
+ dev->interrupt_in_interval = interrupt_in_interval ? interrupt_in_interval : dev->interrupt_in_endpoint->bInterval;
+ dev->interrupt_out_interval = interrupt_out_interval ? interrupt_out_interval : dev->interrupt_out_endpoint->bInterval;
+
+- /* we can register the device now, as it is ready */
+- usb_set_intfdata (interface, dev);
+-
+- retval = usb_register_dev (interface, &tower_class);
+-
+- if (retval) {
+- /* something prevented us from registering this driver */
+- dev_err(idev, "Not able to get a minor for this device.\n");
+- usb_set_intfdata (interface, NULL);
+- goto error;
+- }
+- dev->minor = interface->minor;
+-
+- /* let the user know what node this device is now attached to */
+- dev_info(&interface->dev, "LEGO USB Tower #%d now attached to major "
+- "%d minor %d\n", (dev->minor - LEGO_USB_TOWER_MINOR_BASE),
+- USB_MAJOR, dev->minor);
+-
+ /* get the firmware version and log it */
+ result = usb_control_msg (udev,
+ usb_rcvctrlpipe(udev, 0),
+@@ -936,6 +918,23 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
+ get_version_reply.minor,
+ le16_to_cpu(get_version_reply.build_no));
+
++ /* we can register the device now, as it is ready */
++ usb_set_intfdata (interface, dev);
++
++ retval = usb_register_dev (interface, &tower_class);
++
++ if (retval) {
++ /* something prevented us from registering this driver */
++ dev_err(idev, "Not able to get a minor for this device.\n");
++ usb_set_intfdata (interface, NULL);
++ goto error;
++ }
++ dev->minor = interface->minor;
++
++ /* let the user know what node this device is now attached to */
++ dev_info(&interface->dev, "LEGO USB Tower #%d now attached to major "
++ "%d minor %d\n", (dev->minor - LEGO_USB_TOWER_MINOR_BASE),
++ USB_MAJOR, dev->minor);
+
+ exit:
+ return retval;
+diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
+index 96a70789b4c2..7e91ee27ac3a 100644
+--- a/drivers/usb/serial/cp210x.c
++++ b/drivers/usb/serial/cp210x.c
+@@ -118,6 +118,7 @@ static const struct usb_device_id id_table[] = {
+ { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */
+ { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */
+ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
++ { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
+ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
+ { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
+diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
+index 344bd9473475..e429b59f6f8a 100644
+--- a/drivers/usb/usbip/vudc_rx.c
++++ b/drivers/usb/usbip/vudc_rx.c
+@@ -142,7 +142,7 @@ static int v_recv_cmd_submit(struct vudc *udc,
+ urb_p->urb->status = -EINPROGRESS;
+
+ /* FIXME: more pipe setup to please usbip_common */
+- urb_p->urb->pipe &= ~(11 << 30);
++ urb_p->urb->pipe &= ~(3 << 30);
+ switch (urb_p->ep->type) {
+ case USB_ENDPOINT_XFER_BULK:
+ urb_p->urb->pipe |= (PIPE_BULK << 30);
+diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
+index 3595cffa24ea..cff8a65d7ef0 100644
+--- a/drivers/watchdog/watchdog_dev.c
++++ b/drivers/watchdog/watchdog_dev.c
+@@ -258,10 +258,12 @@ static int watchdog_stop(struct watchdog_device *wdd)
+ return -EBUSY;
+ }
+
+- if (wdd->ops->stop)
++ if (wdd->ops->stop) {
++ clear_bit(WDOG_HW_RUNNING, &wdd->status);
+ err = wdd->ops->stop(wdd);
+- else
++ } else {
+ set_bit(WDOG_HW_RUNNING, &wdd->status);
++ }
+
+ if (err == 0) {
+ clear_bit(WDOG_ACTIVE, &wdd->status);
+diff --git a/fs/aio.c b/fs/aio.c
+index fb8e45b88cd4..4fe81d1c60f9 100644
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -239,7 +239,12 @@ static struct dentry *aio_mount(struct file_system_type *fs_type,
+ static const struct dentry_operations ops = {
+ .d_dname = simple_dname,
+ };
+- return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
++ struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
++ AIO_RING_MAGIC);
++
++ if (!IS_ERR(root))
++ root->d_sb->s_iflags |= SB_I_NOEXEC;
++ return root;
+ }
+
+ /* aio_setup
+diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
+index 6e0fedf6713b..cf22f1e38fa3 100644
+--- a/fs/ceph/dir.c
++++ b/fs/ceph/dir.c
+@@ -597,7 +597,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
+ if (is_hash_order(new_pos)) {
+ /* no need to reset last_name for a forward seek when
+ * dentries are sotred in hash order */
+- } else if (fi->frag |= fpos_frag(new_pos)) {
++ } else if (fi->frag != fpos_frag(new_pos)) {
+ return true;
+ }
+ rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
+diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
+index d81f96aacd51..656f68f7fe53 100644
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -925,7 +925,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
+ if (hdr_arg.minorversion == 0) {
+ cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
+ if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
+- return rpc_drop_reply;
++ goto out_invalidcred;
+ }
+
+ cps.minorversion = hdr_arg.minorversion;
+@@ -953,6 +953,10 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
+ nfs_put_client(cps.clp);
+ dprintk("%s: done, status = %u\n", __func__, ntohl(status));
+ return rpc_success;
++
++out_invalidcred:
++ pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n");
++ return rpc_autherr_badcred;
+ }
+
+ /*
+diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
+index aa59757389dc..b4c1407e8fe4 100644
+--- a/fs/nfs/filelayout/filelayout.c
++++ b/fs/nfs/filelayout/filelayout.c
+@@ -375,8 +375,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
+ return -EAGAIN;
+ }
+
+- if (data->verf.committed == NFS_UNSTABLE)
+- pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
++ pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
+
+ return 0;
+ }
+diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
+index 6da14aedde2b..1c15966c9f52 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayout.c
++++ b/fs/nfs/flexfilelayout/flexfilelayout.c
+@@ -37,6 +37,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
+ if (ffl) {
+ INIT_LIST_HEAD(&ffl->error_list);
+ INIT_LIST_HEAD(&ffl->mirrors);
++ ffl->last_report_time = ktime_get();
+ return &ffl->generic_hdr;
+ } else
+ return NULL;
+@@ -640,19 +641,18 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
+ {
+ static const ktime_t notime = {0};
+ s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;
++ struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
+
+ nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
+ if (ktime_equal(mirror->start_time, notime))
+ mirror->start_time = now;
+- if (ktime_equal(mirror->last_report_time, notime))
+- mirror->last_report_time = now;
+ if (mirror->report_interval != 0)
+ report_interval = (s64)mirror->report_interval * 1000LL;
+ else if (layoutstats_timer != 0)
+ report_interval = (s64)layoutstats_timer * 1000LL;
+- if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
++ if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >=
+ report_interval) {
+- mirror->last_report_time = now;
++ ffl->last_report_time = now;
+ return true;
+ }
+
+@@ -1529,8 +1529,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
+ return -EAGAIN;
+ }
+
+- if (data->verf.committed == NFS_UNSTABLE
+- && ff_layout_need_layoutcommit(data->lseg))
++ if (ff_layout_need_layoutcommit(data->lseg))
+ pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
+
+ return 0;
+diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
+index 1bcdb15d0c41..3ee0c9fcea76 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayout.h
++++ b/fs/nfs/flexfilelayout/flexfilelayout.h
+@@ -84,7 +84,6 @@ struct nfs4_ff_layout_mirror {
+ struct nfs4_ff_layoutstat read_stat;
+ struct nfs4_ff_layoutstat write_stat;
+ ktime_t start_time;
+- ktime_t last_report_time;
+ u32 report_interval;
+ };
+
+@@ -101,6 +100,7 @@ struct nfs4_flexfile_layout {
+ struct pnfs_ds_commit_info commit_info;
+ struct list_head mirrors;
+ struct list_head error_list; /* nfs4_ff_layout_ds_err */
++ ktime_t last_report_time; /* Layoutstat report times */
+ };
+
+ static inline struct nfs4_flexfile_layout *
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index e2320c643107..00fd0c716988 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -4393,7 +4393,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
+ {
+ hdr->timestamp = jiffies;
+- hdr->pgio_done_cb = nfs4_read_done_cb;
++ if (!hdr->pgio_done_cb)
++ hdr->pgio_done_cb = nfs4_read_done_cb;
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
+ }
+diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
+index 45d6110744cb..ec9ddef5ae75 100644
+--- a/fs/pstore/inode.c
++++ b/fs/pstore/inode.c
+@@ -178,7 +178,6 @@ static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
+ }
+
+ static const struct file_operations pstore_file_operations = {
+- .owner = THIS_MODULE,
+ .open = pstore_file_open,
+ .read = pstore_file_read,
+ .llseek = pstore_file_llseek,
+diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
+index 5261751f6bd4..5f5270941ba0 100644
+--- a/include/linux/can/dev.h
++++ b/include/linux/can/dev.h
+@@ -32,6 +32,7 @@ enum can_mode {
+ * CAN common private data
+ */
+ struct can_priv {
++ struct net_device *dev;
+ struct can_device_stats can_stats;
+
+ struct can_bittiming bittiming, data_bittiming;
+@@ -47,7 +48,7 @@ struct can_priv {
+ u32 ctrlmode_static; /* static enabled options for driver/hardware */
+
+ int restart_ms;
+- struct timer_list restart_timer;
++ struct delayed_work restart_work;
+
+ int (*do_set_bittiming)(struct net_device *dev);
+ int (*do_set_data_bittiming)(struct net_device *dev);
+diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
+index 9bb77d3ed6e0..c2256d746543 100644
+--- a/include/linux/nvmem-consumer.h
++++ b/include/linux/nvmem-consumer.h
+@@ -74,7 +74,7 @@ static inline void nvmem_cell_put(struct nvmem_cell *cell)
+ {
+ }
+
+-static inline char *nvmem_cell_read(struct nvmem_cell *cell, size_t *len)
++static inline void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len)
+ {
+ return ERR_PTR(-ENOSYS);
+ }
+diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
+index d1faa019c02a..b71b2581811c 100644
+--- a/include/linux/slub_def.h
++++ b/include/linux/slub_def.h
+@@ -114,15 +114,17 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
+ void object_err(struct kmem_cache *s, struct page *page,
+ u8 *object, char *reason);
+
++void *fixup_red_left(struct kmem_cache *s, void *p);
++
+ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+ void *x) {
+ void *object = x - (x - page_address(page)) % cache->size;
+ void *last_object = page_address(page) +
+ (page->objects - 1) * cache->size;
+- if (unlikely(object > last_object))
+- return last_object;
+- else
+- return object;
++ void *result = (unlikely(object > last_object)) ? last_object : object;
++
++ result = fixup_red_left(cache, result);
++ return result;
+ }
+
+ #endif /* _LINUX_SLUB_DEF_H */
+diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
+index fa7bc29925c9..ef17db6caaed 100644
+--- a/include/linux/sysctl.h
++++ b/include/linux/sysctl.h
+@@ -41,6 +41,8 @@ extern int proc_dostring(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+ extern int proc_dointvec(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
++extern int proc_douintvec(struct ctl_table *, int,
++ void __user *, size_t *, loff_t *);
+ extern int proc_dointvec_minmax(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+ extern int proc_dointvec_jiffies(struct ctl_table *, int,
+diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
+index 13c0b2ba1b6c..31ae074dad9d 100644
+--- a/include/scsi/scsi_transport_sas.h
++++ b/include/scsi/scsi_transport_sas.h
+@@ -15,8 +15,14 @@ static inline int is_sas_attached(struct scsi_device *sdev)
+ {
+ return 0;
+ }
++
++static inline int scsi_is_sas_rphy(const struct device *sdev)
++{
++ return 0;
++}
+ #else
+ extern int is_sas_attached(struct scsi_device *sdev);
++extern int scsi_is_sas_rphy(const struct device *);
+ #endif
+
+ static inline int sas_protocol_ata(enum sas_protocol proto)
+@@ -202,7 +208,6 @@ extern int sas_rphy_add(struct sas_rphy *);
+ extern void sas_rphy_remove(struct sas_rphy *);
+ extern void sas_rphy_delete(struct sas_rphy *);
+ extern void sas_rphy_unlink(struct sas_rphy *);
+-extern int scsi_is_sas_rphy(const struct device *);
+
+ struct sas_port *sas_port_alloc(struct device *, int);
+ struct sas_port *sas_port_alloc_num(struct device *);
+diff --git a/kernel/cgroup.c b/kernel/cgroup.c
+index 129a7ca5f159..b1e0cbb8cf97 100644
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -3452,9 +3452,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
+ * Except for the root, subtree_control must be zero for a cgroup
+ * with tasks so that child cgroups don't compete against tasks.
+ */
+- if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
+- ret = -EBUSY;
+- goto out_unlock;
++ if (enable && cgroup_parent(cgrp)) {
++ struct cgrp_cset_link *link;
++
++ /*
++ * Because namespaces pin csets too, @cgrp->cset_links
++ * might not be empty even when @cgrp is empty. Walk and
++ * verify each cset.
++ */
++ spin_lock_irq(&css_set_lock);
++
++ ret = 0;
++ list_for_each_entry(link, &cgrp->cset_links, cset_link) {
++ if (css_set_populated(link->cset)) {
++ ret = -EBUSY;
++ break;
++ }
++ }
++
++ spin_unlock_irq(&css_set_lock);
++
++ if (ret)
++ goto out_unlock;
+ }
+
+ /* save and update control masks and prepare csses */
+@@ -3905,7 +3924,9 @@ void cgroup_file_notify(struct cgroup_file *cfile)
+ * cgroup_task_count - count the number of tasks in a cgroup.
+ * @cgrp: the cgroup in question
+ *
+- * Return the number of tasks in the cgroup.
++ * Return the number of tasks in the cgroup. The returned number can be
++ * higher than the actual number of tasks due to css_set references from
++ * namespace roots and temporary usages.
+ */
+ static int cgroup_task_count(const struct cgroup *cgrp)
+ {
+diff --git a/kernel/cpuset.c b/kernel/cpuset.c
+index 40b6ed559448..8cee9627ac4b 100644
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -325,8 +325,7 @@ static struct file_system_type cpuset_fs_type = {
+ /*
+ * Return in pmask the portion of a cpusets's cpus_allowed that
+ * are online. If none are online, walk up the cpuset hierarchy
+- * until we find one that does have some online cpus. The top
+- * cpuset always has some cpus online.
++ * until we find one that does have some online cpus.
+ *
+ * One way or another, we guarantee to return some non-empty subset
+ * of cpu_online_mask.
+@@ -335,8 +334,20 @@ static struct file_system_type cpuset_fs_type = {
+ */
+ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
+ {
+- while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask))
++ while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
+ cs = parent_cs(cs);
++ if (unlikely(!cs)) {
++ /*
++ * The top cpuset doesn't have any online cpu as a
++ * consequence of a race between cpuset_hotplug_work
++ * and cpu hotplug notifier. But we know the top
++ * cpuset's effective_cpus is on its way to to be
++ * identical to cpu_online_mask.
++ */
++ cpumask_copy(pmask, cpu_online_mask);
++ return;
++ }
++ }
+ cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
+ }
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index e68c0a735c8f..31c1520b744d 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -6064,7 +6064,7 @@ static int __perf_pmu_output_stop(void *info)
+ {
+ struct perf_event *event = info;
+ struct pmu *pmu = event->pmu;
+- struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
++ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ struct remote_output ro = {
+ .rb = event->rb,
+ };
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 74fd39079031..d6404ede47eb 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -938,14 +938,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+ deactivate_mm(tsk, mm);
+
+ /*
+- * If we're exiting normally, clear a user-space tid field if
+- * requested. We leave this alone when dying by signal, to leave
+- * the value intact in a core dump, and to save the unnecessary
+- * trouble, say, a killed vfork parent shouldn't touch this mm.
+- * Userland only wants this done for a sys_exit.
++ * Signal userspace if we're not exiting with a core dump
++ * because we want to leave the value intact for debugging
++ * purposes.
+ */
+ if (tsk->clear_child_tid) {
+- if (!(tsk->flags & PF_SIGNALED) &&
++ if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
+ atomic_read(&mm->mm_users) > 1) {
+ /*
+ * We don't check the error code - if userspace has
+diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
+index 276762f3a460..d5760c42f042 100644
+--- a/kernel/printk/braille.c
++++ b/kernel/printk/braille.c
+@@ -9,10 +9,10 @@
+
+ char *_braille_console_setup(char **str, char **brl_options)
+ {
+- if (!memcmp(*str, "brl,", 4)) {
++ if (!strncmp(*str, "brl,", 4)) {
+ *brl_options = "";
+ *str += 4;
+- } else if (!memcmp(str, "brl=", 4)) {
++ } else if (!strncmp(*str, "brl=", 4)) {
+ *brl_options = *str + 4;
+ *str = strchr(*brl_options, ',');
+ if (!*str)
+diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
+index 3cee0d8393ed..8ce4eecff319 100644
+--- a/kernel/rcu/rcuperf.c
++++ b/kernel/rcu/rcuperf.c
+@@ -58,7 +58,7 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
+ #define VERBOSE_PERFOUT_ERRSTRING(s) \
+ do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
+
+-torture_param(bool, gp_exp, true, "Use expedited GP wait primitives");
++torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
+ torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
+ torture_param(int, nreaders, -1, "Number of RCU reader threads");
+ torture_param(int, nwriters, -1, "Number of RCU updater threads");
+@@ -363,8 +363,6 @@ rcu_perf_writer(void *arg)
+ u64 *wdpp = writer_durations[me];
+
+ VERBOSE_PERFOUT_STRING("rcu_perf_writer task started");
+- WARN_ON(rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp);
+- WARN_ON(rcu_gp_is_normal() && gp_exp);
+ WARN_ON(!wdpp);
+ set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+ sp.sched_priority = 1;
+@@ -631,6 +629,16 @@ rcu_perf_init(void)
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
++ if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) {
++ VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
++ firsterr = -EINVAL;
++ goto unwind;
++ }
++ if (rcu_gp_is_normal() && gp_exp) {
++ VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
++ firsterr = -EINVAL;
++ goto unwind;
++ }
+ for (i = 0; i < nrealwriters; i++) {
+ writer_durations[i] =
+ kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 38eacc323fdd..e1422c1f5a29 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7255,7 +7255,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
+ struct rq *rq = cpu_rq(cpu);
+
+ rq->calc_load_update = calc_load_update;
+- account_reset_rq(rq);
+ update_max_interval();
+ }
+
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 898c0d2f18fe..b4f3edf9a219 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1809,16 +1809,3 @@ static inline void cpufreq_trigger_update(u64 time) {}
+ #else /* arch_scale_freq_capacity */
+ #define arch_scale_freq_invariant() (false)
+ #endif
+-
+-static inline void account_reset_rq(struct rq *rq)
+-{
+-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+- rq->prev_irq_time = 0;
+-#endif
+-#ifdef CONFIG_PARAVIRT
+- rq->prev_steal_time = 0;
+-#endif
+-#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+- rq->prev_steal_time_rq = 0;
+-#endif
+-}
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 87b2fc38398b..b58e447233bf 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -2122,6 +2122,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
+ return 0;
+ }
+
++static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
++ int *valp,
++ int write, void *data)
++{
++ if (write) {
++ if (*negp)
++ return -EINVAL;
++ *valp = *lvalp;
++ } else {
++ unsigned int val = *valp;
++ *lvalp = (unsigned long)val;
++ }
++ return 0;
++}
++
+ static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
+
+ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
+@@ -2241,8 +2256,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
+ int proc_dointvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+- return do_proc_dointvec(table,write,buffer,lenp,ppos,
+- NULL,NULL);
++ return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
++}
++
++/**
++ * proc_douintvec - read a vector of unsigned integers
++ * @table: the sysctl table
++ * @write: %TRUE if this is a write to the sysctl file
++ * @buffer: the user buffer
++ * @lenp: the size of the user buffer
++ * @ppos: file position
++ *
++ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
++ * values from/to the user buffer, treated as an ASCII string.
++ *
++ * Returns 0 on success.
++ */
++int proc_douintvec(struct ctl_table *table, int write,
++ void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++ return do_proc_dointvec(table, write, buffer, lenp, ppos,
++ do_proc_douintvec_conv, NULL);
+ }
+
+ /*
+@@ -2840,6 +2874,12 @@ int proc_dointvec(struct ctl_table *table, int write,
+ return -ENOSYS;
+ }
+
++int proc_douintvec(struct ctl_table *table, int write,
++ void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++ return -ENOSYS;
++}
++
+ int proc_dointvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+@@ -2885,6 +2925,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+ * exception granted :-)
+ */
+ EXPORT_SYMBOL(proc_dointvec);
++EXPORT_SYMBOL(proc_douintvec);
+ EXPORT_SYMBOL(proc_dointvec_jiffies);
+ EXPORT_SYMBOL(proc_dointvec_minmax);
+ EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
+diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
+index fafeaf803bd0..f4b86e8ca1e7 100644
+--- a/kernel/trace/Kconfig
++++ b/kernel/trace/Kconfig
+@@ -542,6 +542,7 @@ config HIST_TRIGGERS
+ bool "Histogram triggers"
+ depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select TRACING_MAP
++ select TRACING
+ default n
+ help
+ Hist triggers allow one or more arbitrary trace event fields
+diff --git a/lib/test_hash.c b/lib/test_hash.c
+index c9549c8b4909..a06ac379ad42 100644
+--- a/lib/test_hash.c
++++ b/lib/test_hash.c
+@@ -143,7 +143,7 @@ static int __init
+ test_hash_init(void)
+ {
+ char buf[SIZE+1];
+- u32 string_or = 0, hash_or[2][33] = { 0 };
++ u32 string_or = 0, hash_or[2][33] = { { 0, } };
+ unsigned tests = 0;
+ unsigned long long h64 = 0;
+ int i, j;
+@@ -219,21 +219,27 @@ test_hash_init(void)
+ }
+
+ /* Issue notices about skipped tests. */
+-#ifndef HAVE_ARCH__HASH_32
+- pr_info("__hash_32() has no arch implementation to test.");
+-#elif HAVE_ARCH__HASH_32 != 1
++#ifdef HAVE_ARCH__HASH_32
++#if HAVE_ARCH__HASH_32 != 1
+ pr_info("__hash_32() is arch-specific; not compared to generic.");
+ #endif
+-#ifndef HAVE_ARCH_HASH_32
+- pr_info("hash_32() has no arch implementation to test.");
+-#elif HAVE_ARCH_HASH_32 != 1
++#else
++ pr_info("__hash_32() has no arch implementation to test.");
++#endif
++#ifdef HAVE_ARCH_HASH_32
++#if HAVE_ARCH_HASH_32 != 1
+ pr_info("hash_32() is arch-specific; not compared to generic.");
+ #endif
+-#ifndef HAVE_ARCH_HASH_64
+- pr_info("hash_64() has no arch implementation to test.");
+-#elif HAVE_ARCH_HASH_64 != 1
++#else
++ pr_info("hash_32() has no arch implementation to test.");
++#endif
++#ifdef HAVE_ARCH_HASH_64
++#if HAVE_ARCH_HASH_64 != 1
+ pr_info("hash_64() is arch-specific; not compared to generic.");
+ #endif
++#else
++ pr_info("hash_64() has no arch implementation to test.");
++#endif
+
+ pr_notice("%u tests passed.", tests);
+
+diff --git a/mm/ksm.c b/mm/ksm.c
+index 4786b4150f62..443bac35cd87 100644
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rmap_item(void)
+ {
+ struct rmap_item *rmap_item;
+
+- rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
++ rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
++ __GFP_NORETRY | __GFP_NOWARN);
+ if (rmap_item)
+ ksm_rmap_items++;
+ return rmap_item;
+diff --git a/mm/slub.c b/mm/slub.c
+index 7a6d268840c0..b94fb362a018 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -124,7 +124,7 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
+ #endif
+ }
+
+-static inline void *fixup_red_left(struct kmem_cache *s, void *p)
++inline void *fixup_red_left(struct kmem_cache *s, void *p)
+ {
+ if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
+ p += s->red_left_pad;
+diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
+index df42eb1365a0..63762b3df7c7 100644
+--- a/net/batman-adv/bat_v_elp.c
++++ b/net/batman-adv/bat_v_elp.c
+@@ -334,7 +334,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
+ goto out;
+
+ skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
+- elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
++ elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+ elp_packet = (struct batadv_elp_packet *)elp_buff;
+ memset(elp_packet, 0, BATADV_ELP_HLEN);
+
+diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
+index ab8c4f9738fe..1da7bfc61ac6 100644
+--- a/net/batman-adv/originator.c
++++ b/net/batman-adv/originator.c
+@@ -251,10 +251,8 @@ static void batadv_neigh_node_release(struct kref *ref)
+ struct hlist_node *node_tmp;
+ struct batadv_neigh_node *neigh_node;
+ struct batadv_neigh_ifinfo *neigh_ifinfo;
+- struct batadv_algo_ops *bao;
+
+ neigh_node = container_of(ref, struct batadv_neigh_node, refcount);
+- bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
+
+ hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
+ &neigh_node->ifinfo_list, list) {
+@@ -263,9 +261,6 @@ static void batadv_neigh_node_release(struct kref *ref)
+
+ batadv_hardif_neigh_put(neigh_node->hardif_neigh);
+
+- if (bao->bat_neigh_free)
+- bao->bat_neigh_free(neigh_node);
+-
+ batadv_hardif_put(neigh_node->if_incoming);
+
+ kfree_rcu(neigh_node, rcu);
+diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
+index bfac086b4d01..69d425d5729e 100644
+--- a/net/batman-adv/routing.c
++++ b/net/batman-adv/routing.c
+@@ -456,6 +456,29 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
+ }
+
+ /**
++ * batadv_last_bonding_get - Get last_bonding_candidate of orig_node
++ * @orig_node: originator node whose last bonding candidate should be retrieved
++ *
++ * Return: last bonding candidate of router or NULL if not found
++ *
++ * The object is returned with refcounter increased by 1.
++ */
++static struct batadv_orig_ifinfo *
++batadv_last_bonding_get(struct batadv_orig_node *orig_node)
++{
++ struct batadv_orig_ifinfo *last_bonding_candidate;
++
++ spin_lock_bh(&orig_node->neigh_list_lock);
++ last_bonding_candidate = orig_node->last_bonding_candidate;
++
++ if (last_bonding_candidate)
++ kref_get(&last_bonding_candidate->refcount);
++ spin_unlock_bh(&orig_node->neigh_list_lock);
++
++ return last_bonding_candidate;
++}
++
++/**
+ * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
+ * @orig_node: originator node whose bonding candidates should be replaced
+ * @new_candidate: new bonding candidate or NULL
+@@ -525,7 +548,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
+ * router - obviously there are no other candidates.
+ */
+ rcu_read_lock();
+- last_candidate = orig_node->last_bonding_candidate;
++ last_candidate = batadv_last_bonding_get(orig_node);
+ if (last_candidate)
+ last_cand_router = rcu_dereference(last_candidate->router);
+
+@@ -617,6 +640,9 @@ next:
+ batadv_orig_ifinfo_put(next_candidate);
+ }
+
++ if (last_candidate)
++ batadv_orig_ifinfo_put(last_candidate);
++
+ return router;
+ }
+
+diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
+index 74d865a4df46..40aed16fbc28 100644
+--- a/net/batman-adv/types.h
++++ b/net/batman-adv/types.h
+@@ -1284,8 +1284,6 @@ struct batadv_forw_packet {
+ * better than neigh2 for their respective outgoing interface from the metric
+ * prospective
+ * @bat_neigh_print: print the single hop neighbor list (optional)
+- * @bat_neigh_free: free the resources allocated by the routing algorithm for a
+- * neigh_node object
+ * @bat_orig_print: print the originator table (optional)
+ * @bat_orig_free: free the resources allocated by the routing algorithm for an
+ * orig_node object
+@@ -1316,7 +1314,6 @@ struct batadv_algo_ops {
+ struct batadv_neigh_node *neigh2,
+ struct batadv_hard_iface *if_outgoing2);
+ void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq);
+- void (*bat_neigh_free)(struct batadv_neigh_node *neigh);
+ /* orig_node handling API */
+ void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq,
+ struct batadv_hard_iface *hard_iface);
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index eb4f5f24cbe3..106643e268ba 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -32,6 +32,7 @@
+
+ #include <linux/debugfs.h>
+ #include <linux/crc16.h>
++#include <linux/filter.h>
+
+ #include <net/bluetooth/bluetooth.h>
+ #include <net/bluetooth/hci_core.h>
+@@ -5835,6 +5836,9 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb,
+ if (chan->sdu)
+ break;
+
++ if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE))
++ break;
++
+ chan->sdu_len = get_unaligned_le16(skb->data);
+ skb_pull(skb, L2CAP_SDULEN_SIZE);
+
+@@ -6610,6 +6614,10 @@ static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
+ goto drop;
+ }
+
++ if ((chan->mode == L2CAP_MODE_ERTM ||
++ chan->mode == L2CAP_MODE_STREAMING) && sk_filter(chan->data, skb))
++ goto drop;
++
+ if (!control->sframe) {
+ int err;
+
+diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
+index 1842141baedb..a8ba752732c9 100644
+--- a/net/bluetooth/l2cap_sock.c
++++ b/net/bluetooth/l2cap_sock.c
+@@ -1019,7 +1019,7 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ goto done;
+
+ if (pi->rx_busy_skb) {
+- if (!sock_queue_rcv_skb(sk, pi->rx_busy_skb))
++ if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb))
+ pi->rx_busy_skb = NULL;
+ else
+ goto done;
+@@ -1270,7 +1270,17 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
+ goto done;
+ }
+
+- err = sock_queue_rcv_skb(sk, skb);
++ if (chan->mode != L2CAP_MODE_ERTM &&
++ chan->mode != L2CAP_MODE_STREAMING) {
++ /* Even if no filter is attached, we could potentially
++ * get errors from security modules, etc.
++ */
++ err = sk_filter(sk, skb);
++ if (err)
++ goto done;
++ }
++
++ err = __sock_queue_rcv_skb(sk, skb);
+
+ /* For ERTM, handle one skb that doesn't fit into the recv
+ * buffer. This is important to do because the data frames
+diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
+index 8bad2ad81399..5f0ed8c6584d 100644
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -1330,8 +1330,12 @@ out:
+ spin_unlock_bh(&txqi->queue.lock);
+
+ if (skb && skb_has_frag_list(skb) &&
+- !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
+- skb_linearize(skb);
++ !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
++ if (skb_linearize(skb)) {
++ ieee80211_free_txskb(&local->hw, skb);
++ return NULL;
++ }
++ }
+
+ return skb;
+ }
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index 2808d550d273..0294ada6bc40 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -453,7 +453,7 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+ struct rpc_xprt_switch *xps;
+
+ if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) {
+- WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
++ WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
+ xps = args->bc_xprt->xpt_bc_xps;
+ xprt_switch_get(xps);
+ } else {
+@@ -520,7 +520,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
+ char servername[48];
+
+ if (args->bc_xprt) {
+- WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
++ WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
+ xprt = args->bc_xprt->xpt_bc_xprt;
+ if (xprt) {
+ xprt_get(xprt);
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index cc9852897395..c5b0cb4f4056 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -1188,11 +1188,17 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
+ *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+
+ /* Encode reply */
+- if (test_bit(RQ_DROPME, &rqstp->rq_flags)) {
++ if (*statp == rpc_drop_reply ||
++ test_bit(RQ_DROPME, &rqstp->rq_flags)) {
+ if (procp->pc_release)
+ procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+ goto dropit;
+ }
++ if (*statp == rpc_autherr_badcred) {
++ if (procp->pc_release)
++ procp->pc_release(rqstp, NULL, rqstp->rq_resp);
++ goto err_bad_auth;
++ }
+ if (*statp == rpc_success &&
+ (xdr = procp->pc_encode) &&
+ !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
+diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
+index 6326ebe8b595..c748ff6f6877 100644
+--- a/net/sunrpc/xprtrdma/fmr_ops.c
++++ b/net/sunrpc/xprtrdma/fmr_ops.c
+@@ -63,9 +63,12 @@ static int
+ __fmr_unmap(struct rpcrdma_mw *mw)
+ {
+ LIST_HEAD(l);
++ int rc;
+
+ list_add(&mw->fmr.fmr->list, &l);
+- return ib_unmap_fmr(&l);
++ rc = ib_unmap_fmr(&l);
++ list_del_init(&mw->fmr.fmr->list);
++ return rc;
+ }
+
+ /* Deferred reset of a single FMR. Generate a fresh rkey by
+@@ -267,7 +270,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+ seg = &req->rl_segments[i];
+ mw = seg->rl_mw;
+
+- list_add(&mw->fmr.fmr->list, &unmap_list);
++ list_add_tail(&mw->fmr.fmr->list, &unmap_list);
+
+ i += seg->mr_nsegs;
+ }
+@@ -280,7 +283,9 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+ */
+ for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
+ seg = &req->rl_segments[i];
++ mw = seg->rl_mw;
+
++ list_del_init(&mw->fmr.fmr->list);
+ __fmr_dma_unmap(r_xprt, seg);
+ rpcrdma_put_mw(r_xprt, seg->rl_mw);
+
+diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
+index 42396a74405d..a68f03133df9 100644
+--- a/scripts/recordmcount.c
++++ b/scripts/recordmcount.c
+@@ -363,6 +363,7 @@ is_mcounted_section_name(char const *const txtname)
+ strcmp(".sched.text", txtname) == 0 ||
+ strcmp(".spinlock.text", txtname) == 0 ||
+ strcmp(".irqentry.text", txtname) == 0 ||
++ strcmp(".softirqentry.text", txtname) == 0 ||
+ strcmp(".kprobes.text", txtname) == 0 ||
+ strcmp(".text.unlikely", txtname) == 0;
+ }
+diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
+index 96e2486a6fc4..2d48011bc362 100755
+--- a/scripts/recordmcount.pl
++++ b/scripts/recordmcount.pl
+@@ -134,6 +134,7 @@ my %text_sections = (
+ ".sched.text" => 1,
+ ".spinlock.text" => 1,
+ ".irqentry.text" => 1,
++ ".softirqentry.text" => 1,
+ ".kprobes.text" => 1,
+ ".text.unlikely" => 1,
+ );
+diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
+index 56fefbd85782..ed62748a6d55 100644
+--- a/sound/pci/hda/patch_conexant.c
++++ b/sound/pci/hda/patch_conexant.c
+@@ -261,6 +261,7 @@ enum {
+ CXT_FIXUP_HP_530,
+ CXT_FIXUP_CAP_MIX_AMP_5047,
+ CXT_FIXUP_MUTE_LED_EAPD,
++ CXT_FIXUP_HP_SPECTRE,
+ };
+
+ /* for hda_fixup_thinkpad_acpi() */
+@@ -765,6 +766,14 @@ static const struct hda_fixup cxt_fixups[] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cxt_fixup_mute_led_eapd,
+ },
++ [CXT_FIXUP_HP_SPECTRE] = {
++ .type = HDA_FIXUP_PINS,
++ .v.pins = (const struct hda_pintbl[]) {
++ /* enable NID 0x1d for the speaker on top */
++ { 0x1d, 0x91170111 },
++ { }
++ }
++ },
+ };
+
+ static const struct snd_pci_quirk cxt5045_fixups[] = {
+@@ -814,6 +823,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
+ SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
+ SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
++ SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
+ SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
+ SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
+ SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index eaee626ab185..b1fa50aed888 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -5790,6 +5790,13 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
+ {0x14, 0x90170110}, \
+ {0x15, 0x0221401f}
+
++#define ALC295_STANDARD_PINS \
++ {0x12, 0xb7a60130}, \
++ {0x14, 0x90170110}, \
++ {0x17, 0x21014020}, \
++ {0x18, 0x21a19030}, \
++ {0x21, 0x04211020}
++
+ #define ALC298_STANDARD_PINS \
+ {0x12, 0x90a60130}, \
+ {0x21, 0x03211020}
+@@ -5830,6 +5837,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ {0x14, 0x90170120},
+ {0x21, 0x02211030}),
+ SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++ {0x14, 0x90170110},
++ {0x1b, 0x02011020},
++ {0x21, 0x0221101f}),
++ SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x14, 0x90170130},
+ {0x1b, 0x01014020},
+ {0x21, 0x0221103f}),
+@@ -5895,6 +5906,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ {0x14, 0x90170120},
+ {0x21, 0x02211030}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++ {0x12, 0xb7a60130},
++ {0x14, 0x90170110},
++ {0x21, 0x02211020}),
++ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ ALC256_STANDARD_PINS),
+ SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
+ {0x12, 0x90a60130},
+@@ -6005,6 +6020,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ SND_HDA_PIN_QUIRK(0x10ec0293, 0x1028, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
+ ALC292_STANDARD_PINS,
+ {0x13, 0x90a60140}),
++ SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
++ ALC295_STANDARD_PINS),
+ SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+ ALC298_STANDARD_PINS,
+ {0x17, 0x90170110}),
+diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
+index 06d8c263c68f..d5a0327e8ff1 100644
+--- a/sound/soc/intel/skylake/skl.c
++++ b/sound/soc/intel/skylake/skl.c
+@@ -659,8 +659,10 @@ static int skl_probe(struct pci_dev *pci,
+
+ skl->nhlt = skl_nhlt_init(bus->dev);
+
+- if (skl->nhlt == NULL)
++ if (skl->nhlt == NULL) {
++ err = -ENODEV;
+ goto out_free;
++ }
+
+ skl_nhlt_update_topology_bin(skl);
+
+diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c
+index b837265ac3e9..8d0d45d330e7 100644
+--- a/sound/soc/omap/omap-mcpdm.c
++++ b/sound/soc/omap/omap-mcpdm.c
+@@ -390,8 +390,8 @@ static int omap_mcpdm_probe(struct snd_soc_dai *dai)
+ pm_runtime_get_sync(mcpdm->dev);
+ omap_mcpdm_write(mcpdm, MCPDM_REG_CTRL, 0x00);
+
+- ret = devm_request_irq(mcpdm->dev, mcpdm->irq, omap_mcpdm_irq_handler,
+- 0, "McPDM", (void *)mcpdm);
++ ret = request_irq(mcpdm->irq, omap_mcpdm_irq_handler, 0, "McPDM",
++ (void *)mcpdm);
+
+ pm_runtime_put_sync(mcpdm->dev);
+
+@@ -416,6 +416,7 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai)
+ {
+ struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
+
++ free_irq(mcpdm->irq, (void *)mcpdm);
+ pm_runtime_disable(mcpdm->dev);
+
+ return 0;
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-10-16 19:22 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-10-16 19:22 UTC (permalink / raw
To: gentoo-commits
commit: e5544ac026f7c8b3fd255a51802658980788492f
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sun Oct 16 19:22:32 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sun Oct 16 19:22:32 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=e5544ac0
Linux patch 4.7.8
0000_README | 4 +
1007_linux-4.7.8.patch | 1626 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 1630 insertions(+)
diff --git a/0000_README b/0000_README
index 01abfe8..00700f9 100644
--- a/0000_README
+++ b/0000_README
@@ -71,6 +71,10 @@ Patch: 1006_linux-4.7.7.patch
From: http://www.kernel.org
Desc: Linux 4.7.7
+Patch: 1007_linux-4.7.8.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.8
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1007_linux-4.7.8.patch b/1007_linux-4.7.8.patch
new file mode 100644
index 0000000..a1353a1
--- /dev/null
+++ b/1007_linux-4.7.8.patch
@@ -0,0 +1,1626 @@
+diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt
+index c04165868faf..02f50686c418 100644
+--- a/Documentation/virtual/kvm/devices/vcpu.txt
++++ b/Documentation/virtual/kvm/devices/vcpu.txt
+@@ -30,4 +30,6 @@ Returns: -ENODEV: PMUv3 not supported
+ attribute
+ -EBUSY: PMUv3 already initialized
+
+-Request the initialization of the PMUv3.
++Request the initialization of the PMUv3. This must be done after creating the
++in-kernel irqchip. Creating a PMU with a userspace irqchip is currently not
++supported.
+diff --git a/Makefile b/Makefile
+index 320a9308470b..4e17baa91e9c 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 7
++SUBLEVEL = 8
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/arm/boot/dts/armada-390.dtsi b/arch/arm/boot/dts/armada-390.dtsi
+index 094e39c66039..6cd18d8aaac7 100644
+--- a/arch/arm/boot/dts/armada-390.dtsi
++++ b/arch/arm/boot/dts/armada-390.dtsi
+@@ -47,6 +47,8 @@
+ #include "armada-39x.dtsi"
+
+ / {
++ compatible = "marvell,armada390";
++
+ soc {
+ internal-regs {
+ pinctrl@18000 {
+@@ -54,4 +56,5 @@
+ reg = <0x18000 0x20>;
+ };
+ };
++ };
+ };
+diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi
+index df96ccdc9bb4..779f8445942c 100644
+--- a/arch/arm/boot/dts/qcom-apq8064.dtsi
++++ b/arch/arm/boot/dts/qcom-apq8064.dtsi
+@@ -5,6 +5,7 @@
+ #include <dt-bindings/reset/qcom,gcc-msm8960.h>
+ #include <dt-bindings/clock/qcom,mmcc-msm8960.h>
+ #include <dt-bindings/soc/qcom,gsbi.h>
++#include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ / {
+ model = "Qualcomm APQ8064";
+@@ -552,22 +553,50 @@
+ compatible = "qcom,pm8921-gpio",
+ "qcom,ssbi-gpio";
+ reg = <0x150>;
+- interrupts = <192 1>, <193 1>, <194 1>,
+- <195 1>, <196 1>, <197 1>,
+- <198 1>, <199 1>, <200 1>,
+- <201 1>, <202 1>, <203 1>,
+- <204 1>, <205 1>, <206 1>,
+- <207 1>, <208 1>, <209 1>,
+- <210 1>, <211 1>, <212 1>,
+- <213 1>, <214 1>, <215 1>,
+- <216 1>, <217 1>, <218 1>,
+- <219 1>, <220 1>, <221 1>,
+- <222 1>, <223 1>, <224 1>,
+- <225 1>, <226 1>, <227 1>,
+- <228 1>, <229 1>, <230 1>,
+- <231 1>, <232 1>, <233 1>,
+- <234 1>, <235 1>;
+-
++ interrupts = <192 IRQ_TYPE_NONE>,
++ <193 IRQ_TYPE_NONE>,
++ <194 IRQ_TYPE_NONE>,
++ <195 IRQ_TYPE_NONE>,
++ <196 IRQ_TYPE_NONE>,
++ <197 IRQ_TYPE_NONE>,
++ <198 IRQ_TYPE_NONE>,
++ <199 IRQ_TYPE_NONE>,
++ <200 IRQ_TYPE_NONE>,
++ <201 IRQ_TYPE_NONE>,
++ <202 IRQ_TYPE_NONE>,
++ <203 IRQ_TYPE_NONE>,
++ <204 IRQ_TYPE_NONE>,
++ <205 IRQ_TYPE_NONE>,
++ <206 IRQ_TYPE_NONE>,
++ <207 IRQ_TYPE_NONE>,
++ <208 IRQ_TYPE_NONE>,
++ <209 IRQ_TYPE_NONE>,
++ <210 IRQ_TYPE_NONE>,
++ <211 IRQ_TYPE_NONE>,
++ <212 IRQ_TYPE_NONE>,
++ <213 IRQ_TYPE_NONE>,
++ <214 IRQ_TYPE_NONE>,
++ <215 IRQ_TYPE_NONE>,
++ <216 IRQ_TYPE_NONE>,
++ <217 IRQ_TYPE_NONE>,
++ <218 IRQ_TYPE_NONE>,
++ <219 IRQ_TYPE_NONE>,
++ <220 IRQ_TYPE_NONE>,
++ <221 IRQ_TYPE_NONE>,
++ <222 IRQ_TYPE_NONE>,
++ <223 IRQ_TYPE_NONE>,
++ <224 IRQ_TYPE_NONE>,
++ <225 IRQ_TYPE_NONE>,
++ <226 IRQ_TYPE_NONE>,
++ <227 IRQ_TYPE_NONE>,
++ <228 IRQ_TYPE_NONE>,
++ <229 IRQ_TYPE_NONE>,
++ <230 IRQ_TYPE_NONE>,
++ <231 IRQ_TYPE_NONE>,
++ <232 IRQ_TYPE_NONE>,
++ <233 IRQ_TYPE_NONE>,
++ <234 IRQ_TYPE_NONE>,
++ <235 IRQ_TYPE_NONE>;
+ gpio-controller;
+ #gpio-cells = <2>;
+
+@@ -580,9 +609,18 @@
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupts =
+- <128 1>, <129 1>, <130 1>, <131 1>,
+- <132 1>, <133 1>, <134 1>, <135 1>,
+- <136 1>, <137 1>, <138 1>, <139 1>;
++ <128 IRQ_TYPE_NONE>,
++ <129 IRQ_TYPE_NONE>,
++ <130 IRQ_TYPE_NONE>,
++ <131 IRQ_TYPE_NONE>,
++ <132 IRQ_TYPE_NONE>,
++ <133 IRQ_TYPE_NONE>,
++ <134 IRQ_TYPE_NONE>,
++ <135 IRQ_TYPE_NONE>,
++ <136 IRQ_TYPE_NONE>,
++ <137 IRQ_TYPE_NONE>,
++ <138 IRQ_TYPE_NONE>,
++ <139 IRQ_TYPE_NONE>;
+ };
+
+ rtc@11d {
+diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
+index d9751a4769e7..d34fd72172b6 100644
+--- a/arch/arm64/kernel/stacktrace.c
++++ b/arch/arm64/kernel/stacktrace.c
+@@ -43,6 +43,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
+ unsigned long fp = frame->fp;
+ unsigned long irq_stack_ptr;
+
++ if (!tsk)
++ tsk = current;
++
+ /*
+ * Switching between stacks is valid when tracing current and in
+ * non-preemptible context.
+@@ -67,7 +70,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
+ frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- if (tsk && tsk->ret_stack &&
++ if (tsk->ret_stack &&
+ (frame->pc == (unsigned long)return_to_handler)) {
+ /*
+ * This is a case where function graph tracer has
+diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
+index 2a43012616b7..f014df90f956 100644
+--- a/arch/arm64/kernel/traps.c
++++ b/arch/arm64/kernel/traps.c
+@@ -149,6 +149,11 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+ unsigned long irq_stack_ptr;
+ int skip;
+
++ pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
++
++ if (!tsk)
++ tsk = current;
++
+ /*
+ * Switching between stacks is valid when tracing current and in
+ * non-preemptible context.
+@@ -158,11 +163,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+ else
+ irq_stack_ptr = 0;
+
+- pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+-
+- if (!tsk)
+- tsk = current;
+-
+ if (tsk == current) {
+ frame.fp = (unsigned long)__builtin_frame_address(0);
+ frame.sp = current_stack_pointer;
+diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
+index 2b42a74ed771..efa592f0bf9a 100644
+--- a/arch/mips/kvm/emulate.c
++++ b/arch/mips/kvm/emulate.c
+@@ -807,6 +807,47 @@ enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu)
+ return EMULATE_FAIL;
+ }
+
++/**
++ * kvm_mips_invalidate_guest_tlb() - Indicates a change in guest MMU map.
++ * @vcpu: VCPU with changed mappings.
++ * @tlb: TLB entry being removed.
++ *
++ * This is called to indicate a single change in guest MMU mappings, so that we
++ * can arrange TLB flushes on this and other CPUs.
++ */
++static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu,
++ struct kvm_mips_tlb *tlb)
++{
++ int cpu, i;
++ bool user;
++
++ /* No need to flush for entries which are already invalid */
++ if (!((tlb->tlb_lo[0] | tlb->tlb_lo[1]) & ENTRYLO_V))
++ return;
++ /* User address space doesn't need flushing for KSeg2/3 changes */
++ user = tlb->tlb_hi < KVM_GUEST_KSEG0;
++
++ preempt_disable();
++
++ /*
++ * Probe the shadow host TLB for the entry being overwritten, if one
++ * matches, invalidate it
++ */
++ kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi);
++
++ /* Invalidate the whole ASID on other CPUs */
++ cpu = smp_processor_id();
++ for_each_possible_cpu(i) {
++ if (i == cpu)
++ continue;
++ if (user)
++ vcpu->arch.guest_user_asid[i] = 0;
++ vcpu->arch.guest_kernel_asid[i] = 0;
++ }
++
++ preempt_enable();
++}
++
+ /* Write Guest TLB Entry @ Index */
+ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu)
+ {
+@@ -826,11 +867,8 @@ enum emulation_result kvm_mips_emul_tlbwi(struct kvm_vcpu *vcpu)
+ }
+
+ tlb = &vcpu->arch.guest_tlb[index];
+- /*
+- * Probe the shadow host TLB for the entry being overwritten, if one
+- * matches, invalidate it
+- */
+- kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi);
++
++ kvm_mips_invalidate_guest_tlb(vcpu, tlb);
+
+ tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0);
+ tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0);
+@@ -859,11 +897,7 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu)
+
+ tlb = &vcpu->arch.guest_tlb[index];
+
+- /*
+- * Probe the shadow host TLB for the entry being overwritten, if one
+- * matches, invalidate it
+- */
+- kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi);
++ kvm_mips_invalidate_guest_tlb(vcpu, tlb);
+
+ tlb->tlb_mask = kvm_read_c0_guest_pagemask(cop0);
+ tlb->tlb_hi = kvm_read_c0_guest_entryhi(cop0);
+@@ -982,6 +1016,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
+ int32_t rt, rd, copz, sel, co_bit, op;
+ uint32_t pc = vcpu->arch.pc;
+ unsigned long curr_pc;
++ int cpu, i;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+@@ -1089,8 +1124,16 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
+ vcpu->arch.gprs[rt]
+ & KVM_ENTRYHI_ASID);
+
++ preempt_disable();
+ /* Blow away the shadow host TLBs */
+ kvm_mips_flush_host_tlb(1);
++ cpu = smp_processor_id();
++ for_each_possible_cpu(i)
++ if (i != cpu) {
++ vcpu->arch.guest_user_asid[i] = 0;
++ vcpu->arch.guest_kernel_asid[i] = 0;
++ }
++ preempt_enable();
+ }
+ kvm_write_c0_guest_entryhi(cop0,
+ vcpu->arch.gprs[rt]);
+diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
+index 467c0b05b6fb..9e9d38a3f56f 100644
+--- a/arch/powerpc/include/asm/pci-bridge.h
++++ b/arch/powerpc/include/asm/pci-bridge.h
+@@ -299,6 +299,7 @@ extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ /* Allocate & free a PCI host bridge structure */
+ extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev);
+ extern void pcibios_free_controller(struct pci_controller *phb);
++extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge);
+
+ #ifdef CONFIG_PCI
+ extern int pcibios_vaddr_is_ioport(void __iomem *address);
+diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
+index a0948f40bc7b..145e5b71c1a9 100644
+--- a/arch/powerpc/include/asm/reg.h
++++ b/arch/powerpc/include/asm/reg.h
+@@ -718,6 +718,7 @@
+ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */
+ #define SPRN_MMCR1 798
+ #define SPRN_MMCR2 785
++#define SPRN_UMMCR2 769
+ #define SPRN_MMCRA 0x312
+ #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
+ #define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
+diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
+index 0f7a60f1e9f6..14af45c5f77d 100644
+--- a/arch/powerpc/kernel/pci-common.c
++++ b/arch/powerpc/kernel/pci-common.c
+@@ -103,6 +103,42 @@ void pcibios_free_controller(struct pci_controller *phb)
+ EXPORT_SYMBOL_GPL(pcibios_free_controller);
+
+ /*
++ * This function is used to call pcibios_free_controller()
++ * in a deferred manner: a callback from the PCI subsystem.
++ *
++ * _*DO NOT*_ call pcibios_free_controller() explicitly if
++ * this is used (or it may access an invalid *phb pointer).
++ *
++ * The callback occurs when all references to the root bus
++ * are dropped (e.g., child buses/devices and their users).
++ *
++ * It's called as .release_fn() of 'struct pci_host_bridge'
++ * which is associated with the 'struct pci_controller.bus'
++ * (root bus) - it expects .release_data to hold a pointer
++ * to 'struct pci_controller'.
++ *
++ * In order to use it, register .release_fn()/release_data
++ * like this:
++ *
++ * pci_set_host_bridge_release(bridge,
++ * pcibios_free_controller_deferred
++ * (void *) phb);
++ *
++ * e.g. in the pcibios_root_bridge_prepare() callback from
++ * pci_create_root_bus().
++ */
++void pcibios_free_controller_deferred(struct pci_host_bridge *bridge)
++{
++ struct pci_controller *phb = (struct pci_controller *)
++ bridge->release_data;
++
++ pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic);
++
++ pcibios_free_controller(phb);
++}
++EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred);
++
++/*
+ * The function is used to return the minimal alignment
+ * for memory or I/O windows of the associated P2P bridge.
+ * By default, 4KiB alignment for I/O windows and 1MiB for
+diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
+index 2afdb9c0937d..729f8faa95c5 100644
+--- a/arch/powerpc/kvm/book3s_emulate.c
++++ b/arch/powerpc/kvm/book3s_emulate.c
+@@ -498,6 +498,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+ case SPRN_MMCR0:
+ case SPRN_MMCR1:
+ case SPRN_MMCR2:
++ case SPRN_UMMCR2:
+ #endif
+ break;
+ unprivileged:
+@@ -640,6 +641,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
+ case SPRN_MMCR0:
+ case SPRN_MMCR1:
+ case SPRN_MMCR2:
++ case SPRN_UMMCR2:
+ case SPRN_TIR:
+ #endif
+ *spr_val = 0;
+diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
+index 4afae695899a..eb283c5d8cf1 100644
+--- a/arch/powerpc/kvm/booke.c
++++ b/arch/powerpc/kvm/booke.c
+@@ -2038,7 +2038,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ if (type == KVMPPC_DEBUG_NONE)
+ continue;
+
+- if (type & !(KVMPPC_DEBUG_WATCH_READ |
++ if (type & ~(KVMPPC_DEBUG_WATCH_READ |
+ KVMPPC_DEBUG_WATCH_WRITE |
+ KVMPPC_DEBUG_BREAKPOINT))
+ return -EINVAL;
+diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
+index fe16a50700de..09eba5a9929a 100644
+--- a/arch/powerpc/platforms/pseries/pci.c
++++ b/arch/powerpc/platforms/pseries/pci.c
+@@ -119,6 +119,10 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
+
+ bus = bridge->bus;
+
++ /* Rely on the pcibios_free_controller_deferred() callback. */
++ pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred,
++ (void *) pci_bus_to_host(bus));
++
+ dn = pcibios_get_phb_of_node(bus);
+ if (!dn)
+ return 0;
+diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
+index 906dbaa97fe2..547fd13e4f8e 100644
+--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
++++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
+@@ -106,8 +106,11 @@ int remove_phb_dynamic(struct pci_controller *phb)
+ release_resource(res);
+ }
+
+- /* Free pci_controller data structure */
+- pcibios_free_controller(phb);
++ /*
++ * The pci_controller data structure is freed by
++ * the pcibios_free_controller_deferred() callback;
++ * see pseries_root_bridge_prepare().
++ */
+
+ return 0;
+ }
+diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
+index 38951b0fcc5a..993a07235e91 100644
+--- a/arch/x86/include/asm/fpu/xstate.h
++++ b/arch/x86/include/asm/fpu/xstate.h
+@@ -24,11 +24,12 @@
+ XFEATURE_MASK_YMM | \
+ XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+- XFEATURE_MASK_Hi16_ZMM | \
+- XFEATURE_MASK_PKRU)
++ XFEATURE_MASK_Hi16_ZMM)
+
+ /* Supported features which require eager state saving */
+-#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
++#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \
++ XFEATURE_MASK_BNDCSR | \
++ XFEATURE_MASK_PKRU)
+
+ /* All currently supported features */
+ #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
+diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
+index b07233b64578..c2f94dcc92ce 100644
+--- a/arch/x86/include/asm/mpspec.h
++++ b/arch/x86/include/asm/mpspec.h
+@@ -6,7 +6,6 @@
+ #include <asm/x86_init.h>
+ #include <asm/apicdef.h>
+
+-extern int apic_version[];
+ extern int pic_mode;
+
+ #ifdef CONFIG_X86_32
+@@ -40,6 +39,7 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES];
+ extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+ extern unsigned int boot_cpu_physical_apicid;
++extern u8 boot_cpu_apic_version;
+ extern unsigned long mp_lapic_addr;
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
+index 9414f84584e4..8e6e8bc8cd9f 100644
+--- a/arch/x86/kernel/acpi/boot.c
++++ b/arch/x86/kernel/acpi/boot.c
+@@ -180,7 +180,7 @@ static int acpi_register_lapic(int id, u8 enabled)
+ }
+
+ if (boot_cpu_physical_apicid != -1U)
+- ver = apic_version[boot_cpu_physical_apicid];
++ ver = boot_cpu_apic_version;
+
+ return generic_processor_info(id, ver);
+ }
+diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
+index b15e1c158bda..cc7bc8bfd890 100644
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -64,6 +64,8 @@ unsigned disabled_cpus;
+ unsigned int boot_cpu_physical_apicid = -1U;
+ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
+
++u8 boot_cpu_apic_version;
++
+ /*
+ * The highest APIC ID seen during enumeration.
+ */
+@@ -1790,8 +1792,7 @@ void __init init_apic_mappings(void)
+ * since smp_sanity_check is prepared for such a case
+ * and disable smp mode
+ */
+- apic_version[new_apicid] =
+- GET_APIC_VERSION(apic_read(APIC_LVR));
++ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+ }
+
+@@ -1806,13 +1807,10 @@ void __init register_lapic_address(unsigned long address)
+ }
+ if (boot_cpu_physical_apicid == -1U) {
+ boot_cpu_physical_apicid = read_apic_id();
+- apic_version[boot_cpu_physical_apicid] =
+- GET_APIC_VERSION(apic_read(APIC_LVR));
++ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+ }
+
+-int apic_version[MAX_LOCAL_APIC];
+-
+ /*
+ * Local APIC interrupts
+ */
+@@ -2102,11 +2100,10 @@ int generic_processor_info(int apicid, int version)
+ cpu, apicid);
+ version = 0x10;
+ }
+- apic_version[apicid] = version;
+
+- if (version != apic_version[boot_cpu_physical_apicid]) {
++ if (version != boot_cpu_apic_version) {
+ pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+- apic_version[boot_cpu_physical_apicid], cpu, version);
++ boot_cpu_apic_version, cpu, version);
+ }
+
+ physid_set(apicid, phys_cpu_present_map);
+@@ -2249,7 +2246,7 @@ int __init APIC_init_uniprocessor(void)
+ * Complain if the BIOS pretends there is one.
+ */
+ if (!boot_cpu_has(X86_FEATURE_APIC) &&
+- APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
++ APIC_INTEGRATED(boot_cpu_apic_version)) {
+ pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+ boot_cpu_physical_apicid);
+ return -1;
+diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
+index 446702ed99dc..fbfb2446de58 100644
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -1592,7 +1592,7 @@ void __init setup_ioapic_ids_from_mpc(void)
+ * no meaning without the serial APIC bus.
+ */
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
++ || APIC_XAPIC(boot_cpu_apic_version))
+ return;
+ setup_ioapic_ids_from_mpc_nocheck();
+ }
+@@ -2422,7 +2422,7 @@ static int io_apic_get_unique_id(int ioapic, int apic_id)
+ static u8 io_apic_unique_id(int idx, u8 id)
+ {
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
++ !APIC_XAPIC(boot_cpu_apic_version))
+ return io_apic_get_unique_id(idx, id);
+ else
+ return id;
+diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
+index f316e34abb42..49da80cf6a77 100644
+--- a/arch/x86/kernel/apic/probe_32.c
++++ b/arch/x86/kernel/apic/probe_32.c
+@@ -153,7 +153,7 @@ early_param("apic", parse_apic);
+
+ void __init default_setup_apic_routing(void)
+ {
+- int version = apic_version[boot_cpu_physical_apicid];
++ int version = boot_cpu_apic_version;
+
+ if (num_possible_cpus() > 8) {
+ switch (boot_cpu_data.x86_vendor) {
+diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
+index a5e400afc563..84eced58a428 100644
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -661,11 +661,28 @@ void irq_complete_move(struct irq_cfg *cfg)
+ */
+ void irq_force_complete_move(struct irq_desc *desc)
+ {
+- struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+- struct apic_chip_data *data = apic_chip_data(irqdata);
+- struct irq_cfg *cfg = data ? &data->cfg : NULL;
++ struct irq_data *irqdata;
++ struct apic_chip_data *data;
++ struct irq_cfg *cfg;
+ unsigned int cpu;
+
++ /*
++ * The function is called for all descriptors regardless of which
++ * irqdomain they belong to. For example if an IRQ is provided by
++ * an irq_chip as part of a GPIO driver, the chip data for that
++ * descriptor is specific to the irq_chip in question.
++ *
++ * Check first that the chip_data is what we expect
++ * (apic_chip_data) before touching it any further.
++ */
++ irqdata = irq_domain_get_irq_data(x86_vector_domain,
++ irq_desc_get_irq(desc));
++ if (!irqdata)
++ return;
++
++ data = apic_chip_data(irqdata);
++ cfg = data ? &data->cfg : NULL;
++
+ if (!cfg)
+ return;
+
+diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
+index 621b501f8935..8a90f1517837 100644
+--- a/arch/x86/kernel/e820.c
++++ b/arch/x86/kernel/e820.c
+@@ -348,7 +348,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
+ * continue building up new bios map based on this
+ * information
+ */
+- if (current_type != last_type || current_type == E820_PRAM) {
++ if (current_type != last_type) {
+ if (last_type != 0) {
+ new_bios[new_bios_entry].size =
+ change_point[chgidx]->addr - last_addr;
+@@ -754,7 +754,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
+ /*
+ * Find the highest page frame number we have available
+ */
+-static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
++static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
+ {
+ int i;
+ unsigned long last_pfn = 0;
+@@ -765,11 +765,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+
+- /*
+- * Persistent memory is accounted as ram for purposes of
+- * establishing max_pfn and mem_map.
+- */
+- if (ei->type != E820_RAM && ei->type != E820_PRAM)
++ if (ei->type != type)
+ continue;
+
+ start_pfn = ei->addr >> PAGE_SHIFT;
+@@ -794,12 +790,12 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
+ }
+ unsigned long __init e820_end_of_ram_pfn(void)
+ {
+- return e820_end_pfn(MAX_ARCH_PFN);
++ return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+ }
+
+ unsigned long __init e820_end_of_low_ram_pfn(void)
+ {
+- return e820_end_pfn(1UL << (32-PAGE_SHIFT));
++ return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_RAM);
+ }
+
+ static void early_panic(char *msg)
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 6e789ca1f841..5dc86d216a58 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -110,12 +110,13 @@ void __show_regs(struct pt_regs *regs, int all)
+ get_debugreg(d7, 7);
+
+ /* Only print out debug registers if they are in their non-default state. */
+- if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
+- (d6 == DR6_RESERVED) && (d7 == 0x400))
+- return;
+-
+- printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+- printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
++ if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
++ (d6 == DR6_RESERVED) && (d7 == 0x400))) {
++ printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
++ d0, d1, d2);
++ printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
++ d3, d6, d7);
++ }
+
+ if (boot_cpu_has(X86_FEATURE_OSPKE))
+ printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
+diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
+index 600edd225e81..67ed18e02523 100644
+--- a/arch/x86/kernel/ptrace.c
++++ b/arch/x86/kernel/ptrace.c
+@@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
+ return sp;
+
+ prev_esp = (u32 *)(context);
+- if (prev_esp)
+- return (unsigned long)prev_esp;
++ if (*prev_esp)
++ return (unsigned long)*prev_esp;
+
+ return (unsigned long)regs;
+ }
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index fafe8b923cac..694c5297a0fa 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -676,7 +676,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
++ if (APIC_INTEGRATED(boot_cpu_apic_version)) {
+ maxlvt = lapic_get_maxlvt();
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+@@ -703,7 +703,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
++ if (APIC_INTEGRATED(boot_cpu_apic_version)) {
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+@@ -742,7 +742,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+ */
+- if (APIC_INTEGRATED(apic_version[phys_apicid]))
++ if (APIC_INTEGRATED(boot_cpu_apic_version))
+ num_starts = 2;
+ else
+ num_starts = 0;
+@@ -980,7 +980,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
++ if (APIC_INTEGRATED(boot_cpu_apic_version)) {
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+@@ -1235,7 +1235,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
+ /*
+ * If we couldn't find a local APIC, then get out of here now!
+ */
+- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
++ if (APIC_INTEGRATED(boot_cpu_apic_version) &&
+ !boot_cpu_has(X86_FEATURE_APIC)) {
+ if (!disable_apic) {
+ pr_err("BIOS bug, local APIC #%d not detected!...\n",
+@@ -1393,9 +1393,21 @@ __init void prefill_possible_map(void)
+ {
+ int i, possible;
+
+- /* no processor from mptable or madt */
+- if (!num_processors)
+- num_processors = 1;
++ /* No boot processor was found in mptable or ACPI MADT */
++ if (!num_processors) {
++ int apicid = boot_cpu_physical_apicid;
++ int cpu = hard_smp_processor_id();
++
++ pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
++
++ /* Make sure boot cpu is enumerated */
++ if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
++ apic->apic_id_valid(apicid))
++ generic_processor_info(apicid, boot_cpu_apic_version);
++
++ if (!num_processors)
++ num_processors = 1;
++ }
+
+ i = setup_max_cpus ?: 1;
+ if (setup_possible_cpus == -1) {
+diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
+index 719cf291dcdf..40c3aab901ed 100644
+--- a/arch/x86/xen/smp.c
++++ b/arch/x86/xen/smp.c
+@@ -87,6 +87,12 @@ static void cpu_bringup(void)
+ cpu_data(cpu).x86_max_cores = 1;
+ set_cpu_sibling_map(cpu);
+
++ /*
++ * identify_cpu() may have set logical_pkg_id to -1 due
++ * to incorrect phys_proc_id. Let's re-comupte it.
++ */
++ topology_update_package_map(apic->cpu_present_to_apicid(cpu), cpu);
++
+ xen_setup_cpu_clockevents();
+
+ notify_cpu_starting(cpu);
+diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
+index de0337ebd658..4f3137d9a35e 100644
+--- a/drivers/char/tpm/tpm-dev.c
++++ b/drivers/char/tpm/tpm-dev.c
+@@ -139,7 +139,7 @@ static ssize_t tpm_write(struct file *file, const char __user *buf,
+
+ /* atomic tpm command send and result receive */
+ out_size = tpm_transmit(priv->chip, priv->data_buffer,
+- sizeof(priv->data_buffer));
++ sizeof(priv->data_buffer), 0);
+ if (out_size < 0) {
+ mutex_unlock(&priv->buffer_mutex);
+ return out_size;
+diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
+index e2fa89c88304..9ba86eb3f2ef 100644
+--- a/drivers/char/tpm/tpm-interface.c
++++ b/drivers/char/tpm/tpm-interface.c
+@@ -330,8 +330,8 @@ EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
+ /*
+ * Internal kernel interface to transmit TPM commands
+ */
+-ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
+- size_t bufsiz)
++ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz,
++ unsigned int flags)
+ {
+ ssize_t rc;
+ u32 count, ordinal;
+@@ -350,7 +350,8 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
+ return -E2BIG;
+ }
+
+- mutex_lock(&chip->tpm_mutex);
++ if (!(flags & TPM_TRANSMIT_UNLOCKED))
++ mutex_lock(&chip->tpm_mutex);
+
+ rc = chip->ops->send(chip, (u8 *) buf, count);
+ if (rc < 0) {
+@@ -393,20 +394,21 @@ out_recv:
+ dev_err(chip->pdev,
+ "tpm_transmit: tpm_recv: error %zd\n", rc);
+ out:
+- mutex_unlock(&chip->tpm_mutex);
++ if (!(flags & TPM_TRANSMIT_UNLOCKED))
++ mutex_unlock(&chip->tpm_mutex);
+ return rc;
+ }
+
+ #define TPM_DIGEST_SIZE 20
+ #define TPM_RET_CODE_IDX 6
+
+-ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd,
+- int len, const char *desc)
++ssize_t tpm_transmit_cmd(struct tpm_chip *chip, const void *cmd,
++ int len, unsigned int flags, const char *desc)
+ {
+- struct tpm_output_header *header;
++ const struct tpm_output_header *header;
+ int err;
+
+- len = tpm_transmit(chip, (u8 *) cmd, len);
++ len = tpm_transmit(chip, (const u8 *)cmd, len, flags);
+ if (len < 0)
+ return len;
+ else if (len < TPM_HEADER_SIZE)
+@@ -454,7 +456,8 @@ ssize_t tpm_getcap(struct device *dev, __be32 subcap_id, cap_t *cap,
+ tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+ tpm_cmd.params.getcap_in.subcap = subcap_id;
+ }
+- rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc);
++ rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
++ desc);
+ if (!rc)
+ *cap = tpm_cmd.params.getcap_out.cap;
+ return rc;
+@@ -470,7 +473,7 @@ void tpm_gen_interrupt(struct tpm_chip *chip)
+ tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+ tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+
+- rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
++ rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
+ "attempting to determine the timeouts");
+ }
+ EXPORT_SYMBOL_GPL(tpm_gen_interrupt);
+@@ -491,7 +494,7 @@ static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
+ start_cmd.header.in = tpm_startup_header;
+
+ start_cmd.params.startup_in.startup_type = startup_type;
+- return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
++ return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
+ "attempting to start the TPM");
+ }
+
+@@ -522,7 +525,8 @@ int tpm_get_timeouts(struct tpm_chip *chip)
+ tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+ tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+ tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+- rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL);
++ rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
++ NULL);
+
+ if (rc == TPM_ERR_INVALID_POSTINIT) {
+ /* The TPM is not started, we are the first to talk to it.
+@@ -536,7 +540,7 @@ int tpm_get_timeouts(struct tpm_chip *chip)
+ tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+ tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+ rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+- NULL);
++ 0, NULL);
+ }
+ if (rc) {
+ dev_err(chip->pdev,
+@@ -597,7 +601,7 @@ duration:
+ tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+ tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION;
+
+- rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
++ rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
+ "attempting to determine the durations");
+ if (rc)
+ return rc;
+@@ -653,7 +657,7 @@ static int tpm_continue_selftest(struct tpm_chip *chip)
+ struct tpm_cmd_t cmd;
+
+ cmd.header.in = continue_selftest_header;
+- rc = tpm_transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE,
++ rc = tpm_transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE, 0,
+ "continue selftest");
+ return rc;
+ }
+@@ -673,7 +677,7 @@ int tpm_pcr_read_dev(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
+
+ cmd.header.in = pcrread_header;
+ cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
+- rc = tpm_transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE,
++ rc = tpm_transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE, 0,
+ "attempting to read a pcr value");
+
+ if (rc == 0)
+@@ -771,7 +775,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
+ cmd.header.in = pcrextend_header;
+ cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
+ memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
+- rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
++ rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, 0,
+ "attempting extend a PCR value");
+
+ tpm_chip_put(chip);
+@@ -810,7 +814,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
+ /* Attempt to read a PCR value */
+ cmd.header.in = pcrread_header;
+ cmd.params.pcrread_in.pcr_idx = cpu_to_be32(0);
+- rc = tpm_transmit(chip, (u8 *) &cmd, READ_PCR_RESULT_SIZE);
++ rc = tpm_transmit(chip, (u8 *) &cmd, READ_PCR_RESULT_SIZE, 0);
+ /* Some buggy TPMs will not respond to tpm_tis_ready() for
+ * around 300ms while the self test is ongoing, keep trying
+ * until the self test duration expires. */
+@@ -851,7 +855,7 @@ int tpm_send(u32 chip_num, void *cmd, size_t buflen)
+ if (chip == NULL)
+ return -ENODEV;
+
+- rc = tpm_transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
++ rc = tpm_transmit_cmd(chip, cmd, buflen, 0, "attempting tpm_cmd");
+
+ tpm_chip_put(chip);
+ return rc;
+@@ -953,14 +957,15 @@ int tpm_pm_suspend(struct device *dev)
+ cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(tpm_suspend_pcr);
+ memcpy(cmd.params.pcrextend_in.hash, dummy_hash,
+ TPM_DIGEST_SIZE);
+- rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
++ rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, 0,
+ "extending dummy pcr before suspend");
+ }
+
+ /* now do the actual savestate */
+ for (try = 0; try < TPM_RETRY; try++) {
+ cmd.header.in = savestate_header;
+- rc = tpm_transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL);
++ rc = tpm_transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, 0,
++ NULL);
+
+ /*
+ * If the TPM indicates that it is too busy to respond to
+@@ -1044,8 +1049,8 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max)
+ tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
+
+ err = tpm_transmit_cmd(chip, &tpm_cmd,
+- TPM_GETRANDOM_RESULT_SIZE + num_bytes,
+- "attempting get random");
++ TPM_GETRANDOM_RESULT_SIZE + num_bytes,
++ 0, "attempting get random");
+ if (err)
+ break;
+
+diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c
+index ee66fd4673f3..f880856aa75e 100644
+--- a/drivers/char/tpm/tpm-sysfs.c
++++ b/drivers/char/tpm/tpm-sysfs.c
+@@ -39,7 +39,7 @@ static ssize_t pubek_show(struct device *dev, struct device_attribute *attr,
+ struct tpm_chip *chip = dev_get_drvdata(dev);
+
+ tpm_cmd.header.in = tpm_readpubek_header;
+- err = tpm_transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
++ err = tpm_transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE, 0,
+ "attempting to read the PUBEK");
+ if (err)
+ goto out;
+diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
+index 28b477e8da6a..f475b747454b 100644
+--- a/drivers/char/tpm/tpm.h
++++ b/drivers/char/tpm/tpm.h
+@@ -494,11 +494,15 @@ extern struct class *tpm_class;
+ extern dev_t tpm_devt;
+ extern const struct file_operations tpm_fops;
+
++enum tpm_transmit_flags {
++ TPM_TRANSMIT_UNLOCKED = BIT(0),
++};
++
++ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz,
++ unsigned int flags);
++ssize_t tpm_transmit_cmd(struct tpm_chip *chip, const void *cmd, int len,
++ unsigned int flags, const char *desc);
+ ssize_t tpm_getcap(struct device *, __be32, cap_t *, const char *);
+-ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
+- size_t bufsiz);
+-ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd, int len,
+- const char *desc);
+ extern int tpm_get_timeouts(struct tpm_chip *);
+ extern void tpm_gen_interrupt(struct tpm_chip *);
+ extern int tpm_do_selftest(struct tpm_chip *);
+diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
+index ca2d2386a4eb..19f14dc4cda7 100644
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -282,7 +282,7 @@ int tpm2_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
+ sizeof(cmd.params.pcrread_in.pcr_select));
+ cmd.params.pcrread_in.pcr_select[pcr_idx >> 3] = 1 << (pcr_idx & 0x7);
+
+- rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
++ rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0,
+ "attempting to read a pcr value");
+ if (rc == 0) {
+ buf = cmd.params.pcrread_out.digest;
+@@ -330,7 +330,7 @@ int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash)
+ cmd.params.pcrextend_in.hash_alg = cpu_to_be16(TPM2_ALG_SHA1);
+ memcpy(cmd.params.pcrextend_in.digest, hash, TPM_DIGEST_SIZE);
+
+- rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
++ rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0,
+ "attempting extend a PCR value");
+
+ return rc;
+@@ -376,7 +376,7 @@ int tpm2_get_random(struct tpm_chip *chip, u8 *out, size_t max)
+ cmd.header.in = tpm2_getrandom_header;
+ cmd.params.getrandom_in.size = cpu_to_be16(num_bytes);
+
+- err = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
++ err = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0,
+ "attempting get random");
+ if (err)
+ break;
+@@ -434,12 +434,12 @@ static void tpm2_buf_append_auth(struct tpm_buf *buf, u32 session_handle,
+ }
+
+ /**
+- * tpm2_seal_trusted() - seal a trusted key
+- * @chip_num: A specific chip number for the request or TPM_ANY_NUM
+- * @options: authentication values and other options
++ * tpm2_seal_trusted() - seal the payload of a trusted key
++ * @chip_num: TPM chip to use
+ * @payload: the key data in clear and encrypted form
++ * @options: authentication values and other options
+ *
+- * Returns < 0 on error and 0 on success.
++ * Return: < 0 on error and 0 on success.
+ */
+ int tpm2_seal_trusted(struct tpm_chip *chip,
+ struct trusted_key_payload *payload,
+@@ -512,7 +512,7 @@ int tpm2_seal_trusted(struct tpm_chip *chip,
+ goto out;
+ }
+
+- rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "sealing data");
++ rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, 0, "sealing data");
+ if (rc)
+ goto out;
+
+@@ -538,10 +538,18 @@ out:
+ return rc;
+ }
+
+-static int tpm2_load(struct tpm_chip *chip,
+- struct trusted_key_payload *payload,
+- struct trusted_key_options *options,
+- u32 *blob_handle)
++/**
++ * tpm2_load_cmd() - execute a TPM2_Load command
++ * @chip_num: TPM chip to use
++ * @payload: the key data in clear and encrypted form
++ * @options: authentication values and other options
++ *
++ * Return: same as with tpm_transmit_cmd
++ */
++static int tpm2_load_cmd(struct tpm_chip *chip,
++ struct trusted_key_payload *payload,
++ struct trusted_key_options *options,
++ u32 *blob_handle, unsigned int flags)
+ {
+ struct tpm_buf buf;
+ unsigned int private_len;
+@@ -576,7 +584,7 @@ static int tpm2_load(struct tpm_chip *chip,
+ goto out;
+ }
+
+- rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "loading blob");
++ rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags, "loading blob");
+ if (!rc)
+ *blob_handle = be32_to_cpup(
+ (__be32 *) &buf.data[TPM_HEADER_SIZE]);
+@@ -590,7 +598,16 @@ out:
+ return rc;
+ }
+
+-static void tpm2_flush_context(struct tpm_chip *chip, u32 handle)
++/**
++ * tpm2_flush_context_cmd() - execute a TPM2_FlushContext command
++ * @chip_num: TPM chip to use
++ * @payload: the key data in clear and encrypted form
++ * @options: authentication values and other options
++ *
++ * Return: same as with tpm_transmit_cmd
++ */
++static void tpm2_flush_context_cmd(struct tpm_chip *chip, u32 handle,
++ unsigned int flags)
+ {
+ struct tpm_buf buf;
+ int rc;
+@@ -604,7 +621,8 @@ static void tpm2_flush_context(struct tpm_chip *chip, u32 handle)
+
+ tpm_buf_append_u32(&buf, handle);
+
+- rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "flushing context");
++ rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags,
++ "flushing context");
+ if (rc)
+ dev_warn(chip->pdev, "0x%08x was not flushed, rc=%d\n", handle,
+ rc);
+@@ -612,10 +630,18 @@ static void tpm2_flush_context(struct tpm_chip *chip, u32 handle)
+ tpm_buf_destroy(&buf);
+ }
+
+-static int tpm2_unseal(struct tpm_chip *chip,
+- struct trusted_key_payload *payload,
+- struct trusted_key_options *options,
+- u32 blob_handle)
++/**
++ * tpm2_unseal_cmd() - execute a TPM2_Unload command
++ * @chip_num: TPM chip to use
++ * @payload: the key data in clear and encrypted form
++ * @options: authentication values and other options
++ *
++ * Return: same as with tpm_transmit_cmd
++ */
++static int tpm2_unseal_cmd(struct tpm_chip *chip,
++ struct trusted_key_payload *payload,
++ struct trusted_key_options *options,
++ u32 blob_handle, unsigned int flags)
+ {
+ struct tpm_buf buf;
+ u16 data_len;
+@@ -635,7 +661,7 @@ static int tpm2_unseal(struct tpm_chip *chip,
+ options->blobauth /* hmac */,
+ TPM_DIGEST_SIZE);
+
+- rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "unsealing");
++ rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags, "unsealing");
+ if (rc > 0)
+ rc = -EPERM;
+
+@@ -654,12 +680,12 @@ static int tpm2_unseal(struct tpm_chip *chip,
+ }
+
+ /**
+- * tpm_unseal_trusted() - unseal a trusted key
+- * @chip_num: A specific chip number for the request or TPM_ANY_NUM
+- * @options: authentication values and other options
++ * tpm_unseal_trusted() - unseal the payload of a trusted key
++ * @chip_num: TPM chip to use
+ * @payload: the key data in clear and encrypted form
++ * @options: authentication values and other options
+ *
+- * Returns < 0 on error and 0 on success.
++ * Return: < 0 on error and 0 on success.
+ */
+ int tpm2_unseal_trusted(struct tpm_chip *chip,
+ struct trusted_key_payload *payload,
+@@ -668,14 +694,17 @@ int tpm2_unseal_trusted(struct tpm_chip *chip,
+ u32 blob_handle;
+ int rc;
+
+- rc = tpm2_load(chip, payload, options, &blob_handle);
++ mutex_lock(&chip->tpm_mutex);
++ rc = tpm2_load_cmd(chip, payload, options, &blob_handle,
++ TPM_TRANSMIT_UNLOCKED);
+ if (rc)
+- return rc;
+-
+- rc = tpm2_unseal(chip, payload, options, blob_handle);
+-
+- tpm2_flush_context(chip, blob_handle);
++ goto out;
+
++ rc = tpm2_unseal_cmd(chip, payload, options, blob_handle,
++ TPM_TRANSMIT_UNLOCKED);
++ tpm2_flush_context_cmd(chip, blob_handle, TPM_TRANSMIT_UNLOCKED);
++out:
++ mutex_unlock(&chip->tpm_mutex);
+ return rc;
+ }
+
+@@ -701,7 +730,7 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value,
+ cmd.params.get_tpm_pt_in.property_id = cpu_to_be32(property_id);
+ cmd.params.get_tpm_pt_in.property_cnt = cpu_to_be32(1);
+
+- rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), desc);
++ rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0, desc);
+ if (!rc)
+ *value = be32_to_cpu(cmd.params.get_tpm_pt_out.value);
+
+@@ -735,7 +764,7 @@ int tpm2_startup(struct tpm_chip *chip, u16 startup_type)
+ cmd.header.in = tpm2_startup_header;
+
+ cmd.params.startup_in.startup_type = cpu_to_be16(startup_type);
+- return tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
++ return tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0,
+ "attempting to start the TPM");
+ }
+ EXPORT_SYMBOL_GPL(tpm2_startup);
+@@ -764,7 +793,7 @@ void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type)
+ cmd.header.in = tpm2_shutdown_header;
+ cmd.params.startup_in.startup_type = cpu_to_be16(shutdown_type);
+
+- rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), "stopping the TPM");
++ rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0, "stopping the TPM");
+
+ /* In places where shutdown command is sent there's no much we can do
+ * except print the error code on a system failure.
+@@ -830,7 +859,7 @@ static int tpm2_start_selftest(struct tpm_chip *chip, bool full)
+ cmd.header.in = tpm2_selftest_header;
+ cmd.params.selftest_in.full_test = full;
+
+- rc = tpm_transmit_cmd(chip, &cmd, TPM2_SELF_TEST_IN_SIZE,
++ rc = tpm_transmit_cmd(chip, &cmd, TPM2_SELF_TEST_IN_SIZE, 0,
+ "continue selftest");
+
+ /* At least some prototype chips seem to give RC_TESTING error
+@@ -882,7 +911,7 @@ int tpm2_do_selftest(struct tpm_chip *chip)
+ cmd.params.pcrread_in.pcr_select[1] = 0x00;
+ cmd.params.pcrread_in.pcr_select[2] = 0x00;
+
+- rc = tpm_transmit_cmd(chip, (u8 *) &cmd, sizeof(cmd), NULL);
++ rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0, NULL);
+ if (rc < 0)
+ break;
+
+@@ -931,7 +960,7 @@ int tpm2_probe(struct tpm_chip *chip)
+ cmd.params.get_tpm_pt_in.property_id = cpu_to_be32(0x100);
+ cmd.params.get_tpm_pt_in.property_cnt = cpu_to_be32(1);
+
+- rc = tpm_transmit(chip, (const char *) &cmd, sizeof(cmd));
++ rc = tpm_transmit(chip, (const u8 *)&cmd, sizeof(cmd), 0);
+ if (rc < 0)
+ return rc;
+ else if (rc < TPM_HEADER_SIZE)
+diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
+index 0f7ec0deed6c..ee300bdcd266 100644
+--- a/drivers/char/tpm/tpm_crb.c
++++ b/drivers/char/tpm/tpm_crb.c
+@@ -142,6 +142,11 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t len)
+ struct crb_priv *priv = chip->vendor.priv;
+ int rc = 0;
+
++ /* Zero the cancel register so that the next command will not get
++ * canceled.
++ */
++ iowrite32(0, &priv->cca->cancel);
++
+ if (len > ioread32(&priv->cca->cmd_size)) {
+ dev_err(&chip->dev,
+ "invalid command count value %x %zx\n",
+@@ -175,8 +180,6 @@ static void crb_cancel(struct tpm_chip *chip)
+
+ if ((priv->flags & CRB_FL_ACPI_START) && crb_do_acpi_start(chip))
+ dev_err(&chip->dev, "ACPI Start failed\n");
+-
+- iowrite32(0, &priv->cca->cancel);
+ }
+
+ static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
+diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
+index e342565e8715..1855b9ee807f 100644
+--- a/drivers/cpuidle/cpuidle-arm.c
++++ b/drivers/cpuidle/cpuidle-arm.c
+@@ -135,6 +135,7 @@ static int __init arm_idle_init(void)
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev) {
+ pr_err("Failed to allocate cpuidle device\n");
++ ret = -ENOMEM;
+ goto out_fail;
+ }
+ dev->cpu = cpu;
+diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
+index 1bcf601de5bc..26c084da877c 100644
+--- a/drivers/mfd/Kconfig
++++ b/drivers/mfd/Kconfig
+@@ -1535,6 +1535,7 @@ config MFD_WM8350
+ config MFD_WM8350_I2C
+ bool "Wolfson Microelectronics WM8350 with I2C"
+ select MFD_WM8350
++ select REGMAP_I2C
+ depends on I2C=y
+ help
+ The WM8350 is an integrated audio and power management
+diff --git a/drivers/mfd/atmel-hlcdc.c b/drivers/mfd/atmel-hlcdc.c
+index eca7ea69b81c..4b15b0840f16 100644
+--- a/drivers/mfd/atmel-hlcdc.c
++++ b/drivers/mfd/atmel-hlcdc.c
+@@ -50,8 +50,9 @@ static int regmap_atmel_hlcdc_reg_write(void *context, unsigned int reg,
+ if (reg <= ATMEL_HLCDC_DIS) {
+ u32 status;
+
+- readl_poll_timeout(hregmap->regs + ATMEL_HLCDC_SR, status,
+- !(status & ATMEL_HLCDC_SIP), 1, 100);
++ readl_poll_timeout_atomic(hregmap->regs + ATMEL_HLCDC_SR,
++ status, !(status & ATMEL_HLCDC_SIP),
++ 1, 100);
+ }
+
+ writel(val, hregmap->regs + reg);
+diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c
+index dbd907d7170e..691dab791f7a 100644
+--- a/drivers/mfd/rtsx_usb.c
++++ b/drivers/mfd/rtsx_usb.c
+@@ -46,9 +46,6 @@ static void rtsx_usb_sg_timed_out(unsigned long data)
+
+ dev_dbg(&ucr->pusb_intf->dev, "%s: sg transfer timed out", __func__);
+ usb_sg_cancel(&ucr->current_sg);
+-
+- /* we know the cancellation is caused by time-out */
+- ucr->current_sg.status = -ETIMEDOUT;
+ }
+
+ static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr,
+@@ -67,12 +64,15 @@ static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr,
+ ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout);
+ add_timer(&ucr->sg_timer);
+ usb_sg_wait(&ucr->current_sg);
+- del_timer_sync(&ucr->sg_timer);
++ if (!del_timer_sync(&ucr->sg_timer))
++ ret = -ETIMEDOUT;
++ else
++ ret = ucr->current_sg.status;
+
+ if (act_len)
+ *act_len = ucr->current_sg.bytes;
+
+- return ucr->current_sg.status;
++ return ret;
+ }
+
+ int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe,
+diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
+index cdc7723b845d..bec6c2260a11 100644
+--- a/drivers/misc/cxl/vphb.c
++++ b/drivers/misc/cxl/vphb.c
+@@ -243,6 +243,11 @@ int cxl_pci_vphb_add(struct cxl_afu *afu)
+ if (phb->bus == NULL)
+ return -ENXIO;
+
++ /* Set release hook on root bus */
++ pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge),
++ pcibios_free_controller_deferred,
++ (void *) phb);
++
+ /* Claim resources. This might need some rework as well depending
+ * whether we are doing probe-only or not, like assigning unassigned
+ * resources etc...
+@@ -269,7 +274,10 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu)
+ afu->phb = NULL;
+
+ pci_remove_root_bus(phb->bus);
+- pcibios_free_controller(phb);
++ /*
++ * We don't free phb here - that's handled by
++ * pcibios_free_controller_deferred()
++ */
+ }
+
+ bool cxl_pci_is_vphb_device(struct pci_dev *dev)
+diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
+index 5f4a2e04c8d7..add66236215c 100644
+--- a/drivers/pci/host-bridge.c
++++ b/drivers/pci/host-bridge.c
+@@ -44,6 +44,7 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
+ bridge->release_fn = release_fn;
+ bridge->release_data = release_data;
+ }
++EXPORT_SYMBOL_GPL(pci_set_host_bridge_release);
+
+ void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
+ struct resource *res)
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 05a5300aa1ab..b16813c04484 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -363,7 +363,8 @@ static int dwc3_send_clear_stall_ep_cmd(struct dwc3_ep *dep)
+ * IN transfers due to a mishandled error condition. Synopsys
+ * STAR 9000614252.
+ */
+- if (dep->direction && (dwc->revision >= DWC3_REVISION_260A))
++ if (dep->direction && (dwc->revision >= DWC3_REVISION_260A) &&
++ (dwc->gadget.speed >= USB_SPEED_SUPER))
+ cmd |= DWC3_DEPCMD_CLEARPENDIN;
+
+ memset(¶ms, 0, sizeof(params));
+diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h
+index d409ceb2231e..c118a7ec94d6 100644
+--- a/include/linux/mfd/88pm80x.h
++++ b/include/linux/mfd/88pm80x.h
+@@ -350,7 +350,7 @@ static inline int pm80x_dev_suspend(struct device *dev)
+ int irq = platform_get_irq(pdev, 0);
+
+ if (device_may_wakeup(dev))
+- set_bit((1 << irq), &chip->wu_flag);
++ set_bit(irq, &chip->wu_flag);
+
+ return 0;
+ }
+@@ -362,7 +362,7 @@ static inline int pm80x_dev_resume(struct device *dev)
+ int irq = platform_get_irq(pdev, 0);
+
+ if (device_may_wakeup(dev))
+- clear_bit((1 << irq), &chip->wu_flag);
++ clear_bit(irq, &chip->wu_flag);
+
+ return 0;
+ }
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index b6c394563178..1b72b17d90fe 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -403,8 +403,11 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
+ tkr = tkf->base + (seq & 0x01);
+ now = ktime_to_ns(tkr->base);
+
+- now += clocksource_delta(tkr->read(tkr->clock),
+- tkr->cycle_last, tkr->mask);
++ now += timekeeping_delta_to_ns(tkr,
++ clocksource_delta(
++ tkr->read(tkr->clock),
++ tkr->cycle_last,
++ tkr->mask));
+ } while (read_seqcount_retry(&tkf->seq, seq));
+
+ return now;
+diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
+index 1bcbc12e03d9..b5b932c7d1a8 100644
+--- a/security/integrity/ima/ima_appraise.c
++++ b/security/integrity/ima/ima_appraise.c
+@@ -190,7 +190,7 @@ int ima_appraise_measurement(enum ima_hooks func,
+ {
+ static const char op[] = "appraise_data";
+ char *cause = "unknown";
+- struct dentry *dentry = file->f_path.dentry;
++ struct dentry *dentry = file_dentry(file);
+ struct inode *inode = d_backing_inode(dentry);
+ enum integrity_status status = INTEGRITY_UNKNOWN;
+ int rc = xattr_len, hash_start = 0;
+@@ -295,7 +295,7 @@ out:
+ */
+ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
+ {
+- struct dentry *dentry = file->f_path.dentry;
++ struct dentry *dentry = file_dentry(file);
+ int rc = 0;
+
+ /* do not collect and update hash for digital signatures */
+diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
+index 68b26c340acd..60ac3aeb5bc6 100644
+--- a/security/integrity/ima/ima_main.c
++++ b/security/integrity/ima/ima_main.c
+@@ -222,7 +222,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size,
+ if ((action & IMA_APPRAISE_SUBMASK) ||
+ strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0)
+ /* read 'security.ima' */
+- xattr_len = ima_read_xattr(file->f_path.dentry, &xattr_value);
++ xattr_len = ima_read_xattr(file_dentry(file), &xattr_value);
+
+ hash_algo = ima_get_hash_algo(xattr_value, xattr_len);
+
+diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c
+index 36470af7eda7..92b819e4f729 100644
+--- a/sound/pci/ali5451/ali5451.c
++++ b/sound/pci/ali5451/ali5451.c
+@@ -1408,6 +1408,7 @@ snd_ali_playback_pointer(struct snd_pcm_substream *substream)
+ spin_unlock(&codec->reg_lock);
+ dev_dbg(codec->card->dev, "playback pointer returned cso=%xh.\n", cso);
+
++ cso %= runtime->buffer_size;
+ return cso;
+ }
+
+@@ -1428,6 +1429,7 @@ static snd_pcm_uframes_t snd_ali_pointer(struct snd_pcm_substream *substream)
+ cso = inw(ALI_REG(codec, ALI_CSO_ALPHA_FMS + 2));
+ spin_unlock(&codec->reg_lock);
+
++ cso %= runtime->buffer_size;
+ return cso;
+ }
+
+diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
+index 81b7da8e56d3..183311cb849e 100644
+--- a/sound/usb/line6/driver.c
++++ b/sound/usb/line6/driver.c
+@@ -29,7 +29,7 @@
+ /*
+ This is Line 6's MIDI manufacturer ID.
+ */
+-const unsigned char line6_midi_id[] = {
++const unsigned char line6_midi_id[3] = {
+ 0x00, 0x01, 0x0c
+ };
+ EXPORT_SYMBOL_GPL(line6_midi_id);
+diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
+index f6c3bf79af9a..04991b009132 100644
+--- a/sound/usb/mixer_quirks.c
++++ b/sound/usb/mixer_quirks.c
+@@ -1831,6 +1831,7 @@ void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer,
+ }
+
+ static void snd_dragonfly_quirk_db_scale(struct usb_mixer_interface *mixer,
++ struct usb_mixer_elem_info *cval,
+ struct snd_kcontrol *kctl)
+ {
+ /* Approximation using 10 ranges based on output measurement on hw v1.2.
+@@ -1848,10 +1849,19 @@ static void snd_dragonfly_quirk_db_scale(struct usb_mixer_interface *mixer,
+ 41, 50, TLV_DB_MINMAX_ITEM(-441, 0),
+ );
+
+- usb_audio_info(mixer->chip, "applying DragonFly dB scale quirk\n");
+- kctl->tlv.p = scale;
+- kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+- kctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
++ if (cval->min == 0 && cval->max == 50) {
++ usb_audio_info(mixer->chip, "applying DragonFly dB scale quirk (0-50 variant)\n");
++ kctl->tlv.p = scale;
++ kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
++ kctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
++
++ } else if (cval->min == 0 && cval->max <= 1000) {
++ /* Some other clearly broken DragonFly variant.
++ * At least a 0..53 variant (hw v1.0) exists.
++ */
++ usb_audio_info(mixer->chip, "ignoring too narrow dB range on a DragonFly device");
++ kctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
++ }
+ }
+
+ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer,
+@@ -1860,8 +1870,8 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer,
+ {
+ switch (mixer->chip->usb_id) {
+ case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */
+- if (unitid == 7 && cval->min == 0 && cval->max == 50)
+- snd_dragonfly_quirk_db_scale(mixer, kctl);
++ if (unitid == 7 && cval->control == UAC_FU_VOLUME)
++ snd_dragonfly_quirk_db_scale(mixer, cval, kctl);
+ break;
+ }
+ }
+diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
+index a027569facfa..6e9c40eea208 100644
+--- a/virt/kvm/arm/pmu.c
++++ b/virt/kvm/arm/pmu.c
+@@ -423,6 +423,14 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
+ if (!kvm_arm_support_pmu_v3())
+ return -ENODEV;
+
++ /*
++ * We currently require an in-kernel VGIC to use the PMU emulation,
++ * because we do not support forwarding PMU overflow interrupts to
++ * userspace yet.
++ */
++ if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))
++ return -ENODEV;
++
+ if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) ||
+ !kvm_arm_pmu_irq_initialized(vcpu))
+ return -ENXIO;
+diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
+index 69b61abefa19..7ea791151006 100644
+--- a/virt/kvm/arm/vgic/vgic.c
++++ b/virt/kvm/arm/vgic/vgic.c
+@@ -553,6 +553,9 @@ next:
+ /* Sync back the hardware VGIC state into our emulation after a guest's run. */
+ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
+ {
++ if (unlikely(!vgic_initialized(vcpu->kvm)))
++ return;
++
+ vgic_process_maintenance_interrupt(vcpu);
+ vgic_fold_lr_state(vcpu);
+ vgic_prune_ap_list(vcpu);
+@@ -561,6 +564,9 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
+ /* Flush our emulation state into the GIC hardware before entering the guest. */
+ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
+ {
++ if (unlikely(!vgic_initialized(vcpu->kvm)))
++ return;
++
+ spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ vgic_flush_lr_state(vcpu);
+ spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-10-20 8:35 Alice Ferrazzi
0 siblings, 0 replies; 20+ messages in thread
From: Alice Ferrazzi @ 2016-10-20 8:35 UTC (permalink / raw
To: gentoo-commits
commit: 93de6abd2dbb94113170fc116514b46b910ed494
Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
AuthorDate: Thu Oct 20 08:33:28 2016 +0000
Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
CommitDate: Thu Oct 20 08:34:42 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=93de6abd
Linux patch 4.7.9
0000_README | 4 +++
1008_linux-4.7.9.patch | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 79 insertions(+)
diff --git a/0000_README b/0000_README
index 00700f9..dfea86d 100644
--- a/0000_README
+++ b/0000_README
@@ -75,6 +75,10 @@ Patch: 1007_linux-4.7.8.patch
From: http://www.kernel.org
Desc: Linux 4.7.8
+Patch: 1008_linux-4.7.9.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.9
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1008_linux-4.7.9.patch b/1008_linux-4.7.9.patch
new file mode 100644
index 0000000..c84f8fe
--- /dev/null
+++ b/1008_linux-4.7.9.patch
@@ -0,0 +1,75 @@
+diff --git a/Makefile b/Makefile
+index 4e17baa91e9c..cb3f64e40099 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 8
++SUBLEVEL = 9
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
+index ea62245fee26..62900938f26d 100644
+--- a/fs/xfs/xfs_xattr.c
++++ b/fs/xfs/xfs_xattr.c
+@@ -147,6 +147,7 @@ __xfs_xattr_put_listent(
+ arraytop = context->count + prefix_len + namelen + 1;
+ if (arraytop > context->firstu) {
+ context->count = -1; /* insufficient space */
++ context->seen_enough = 1;
+ return 0;
+ }
+ offset = (char *)context->alist + context->count;
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 317564b11dc7..7c3df8de0fe7 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2194,6 +2194,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
+ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
+ #define FOLL_MLOCK 0x1000 /* lock present pages */
+ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
++#define FOLL_COW 0x4000 /* internal GUP flag */
+
+ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+ void *data);
+diff --git a/mm/gup.c b/mm/gup.c
+index c057784c8444..2c764f0d71cc 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -60,6 +60,16 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
+ return -EEXIST;
+ }
+
++/*
++ * FOLL_FORCE can write to even unwritable pte's, but only
++ * after we've gone through a COW cycle and they are dirty.
++ */
++static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
++{
++ return pte_write(pte) ||
++ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
++}
++
+ static struct page *follow_page_pte(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd, unsigned int flags)
+ {
+@@ -95,7 +105,7 @@ retry:
+ }
+ if ((flags & FOLL_NUMA) && pte_protnone(pte))
+ goto no_page;
+- if ((flags & FOLL_WRITE) && !pte_write(pte)) {
++ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
+ pte_unmap_unlock(ptep, ptl);
+ return NULL;
+ }
+@@ -409,7 +419,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
+ * reCOWed by userspace write).
+ */
+ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
+- *flags &= ~FOLL_WRITE;
++ *flags |= FOLL_COW;
+ return 0;
+ }
+
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [gentoo-commits] proj/linux-patches:4.7 commit in: /
@ 2016-10-22 13:06 Mike Pagano
0 siblings, 0 replies; 20+ messages in thread
From: Mike Pagano @ 2016-10-22 13:06 UTC (permalink / raw
To: gentoo-commits
commit: 4bcce038842874ce87861be9f9682af7c21c9d61
Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sat Oct 22 13:06:44 2016 +0000
Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sat Oct 22 13:06:44 2016 +0000
URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=4bcce038
Linux patch 4.7.10
0000_README | 4 +
1009_linux-4.7.10.patch | 1630 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 1634 insertions(+)
diff --git a/0000_README b/0000_README
index dfea86d..bf17fae 100644
--- a/0000_README
+++ b/0000_README
@@ -79,6 +79,10 @@ Patch: 1008_linux-4.7.9.patch
From: http://www.kernel.org
Desc: Linux 4.7.9
+Patch: 1009_linux-4.7.10.patch
+From: http://www.kernel.org
+Desc: Linux 4.7.10
+
Patch: 1500_XATTR_USER_PREFIX.patch
From: https://bugs.gentoo.org/show_bug.cgi?id=470644
Desc: Support for namespace user.pax.* on tmpfs.
diff --git a/1009_linux-4.7.10.patch b/1009_linux-4.7.10.patch
new file mode 100644
index 0000000..3130dce
--- /dev/null
+++ b/1009_linux-4.7.10.patch
@@ -0,0 +1,1630 @@
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 8c20323d1277..67c42dbed89b 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -12620,11 +12620,10 @@ F: arch/x86/xen/*swiotlb*
+ F: drivers/xen/*swiotlb*
+
+ XFS FILESYSTEM
+-P: Silicon Graphics Inc
+ M: Dave Chinner <david@fromorbit.com>
+-M: xfs@oss.sgi.com
+-L: xfs@oss.sgi.com
+-W: http://oss.sgi.com/projects/xfs
++M: linux-xfs@vger.kernel.org
++L: linux-xfs@vger.kernel.org
++W: http://xfs.org/
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git
+ S: Supported
+ F: Documentation/filesystems/xfs.txt
+diff --git a/Makefile b/Makefile
+index cb3f64e40099..219ab6dff415 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 7
+-SUBLEVEL = 9
++SUBLEVEL = 10
+ EXTRAVERSION =
+ NAME = Psychotic Stoned Sheep
+
+diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
+index d1ec7f6b31e0..e880dfa3fcd3 100644
+--- a/arch/arc/include/asm/irqflags-arcv2.h
++++ b/arch/arc/include/asm/irqflags-arcv2.h
+@@ -112,7 +112,7 @@ static inline long arch_local_save_flags(void)
+ */
+ temp = (1 << 5) |
+ ((!!(temp & STATUS_IE_MASK)) << CLRI_STATUS_IE_BIT) |
+- (temp & CLRI_STATUS_E_MASK);
++ ((temp >> 1) & CLRI_STATUS_E_MASK);
+ return temp;
+ }
+
+diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
+index 6c24faf48b16..62b59409a5d9 100644
+--- a/arch/arc/kernel/intc-arcv2.c
++++ b/arch/arc/kernel/intc-arcv2.c
+@@ -74,7 +74,7 @@ void arc_init_IRQ(void)
+ tmp = read_aux_reg(0xa);
+ tmp |= STATUS_AD_MASK | (irq_prio << 1);
+ tmp &= ~STATUS_IE_MASK;
+- asm volatile("flag %0 \n"::"r"(tmp));
++ asm volatile("kflag %0 \n"::"r"(tmp));
+ }
+
+ static void arcv2_irq_mask(struct irq_data *data)
+diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
+index 4a349787bc62..73a277df2308 100644
+--- a/block/cfq-iosched.c
++++ b/block/cfq-iosched.c
+@@ -3021,7 +3021,6 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
+ if (time_before(jiffies, rq->fifo_time))
+ rq = NULL;
+
+- cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
+ return rq;
+ }
+
+@@ -3395,6 +3394,9 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+ {
+ unsigned int max_dispatch;
+
++ if (cfq_cfqq_must_dispatch(cfqq))
++ return true;
++
+ /*
+ * Drain async requests before we start sync IO
+ */
+@@ -3486,15 +3488,20 @@ static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+
+ BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
+
++ rq = cfq_check_fifo(cfqq);
++ if (rq)
++ cfq_mark_cfqq_must_dispatch(cfqq);
++
+ if (!cfq_may_dispatch(cfqd, cfqq))
+ return false;
+
+ /*
+ * follow expired path, else get first next available
+ */
+- rq = cfq_check_fifo(cfqq);
+ if (!rq)
+ rq = cfqq->next_rq;
++ else
++ cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
+
+ /*
+ * insert request into driver dispatch list
+@@ -3962,7 +3969,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
+ * if the new request is sync, but the currently running queue is
+ * not, let the sync request have priority.
+ */
+- if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
++ if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq) && !cfq_cfqq_must_dispatch(cfqq))
+ return true;
+
+ /*
+diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
+index 08b3ac68952b..f83de99d7d71 100644
+--- a/crypto/async_tx/async_pq.c
++++ b/crypto/async_tx/async_pq.c
+@@ -368,8 +368,6 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+
+ dma_set_unmap(tx, unmap);
+ async_tx_submit(chan, tx, submit);
+-
+- return tx;
+ } else {
+ struct page *p_src = P(blocks, disks);
+ struct page *q_src = Q(blocks, disks);
+@@ -424,9 +422,11 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+ submit->cb_param = cb_param_orig;
+ submit->flags = flags_orig;
+ async_tx_sync_epilog(submit);
+-
+- return NULL;
++ tx = NULL;
+ }
++ dmaengine_unmap_put(unmap);
++
++ return tx;
+ }
+ EXPORT_SYMBOL_GPL(async_syndrome_val);
+
+diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
+index bac70995e064..12ad3e3a84e3 100644
+--- a/crypto/ghash-generic.c
++++ b/crypto/ghash-generic.c
+@@ -14,24 +14,13 @@
+
+ #include <crypto/algapi.h>
+ #include <crypto/gf128mul.h>
++#include <crypto/ghash.h>
+ #include <crypto/internal/hash.h>
+ #include <linux/crypto.h>
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+
+-#define GHASH_BLOCK_SIZE 16
+-#define GHASH_DIGEST_SIZE 16
+-
+-struct ghash_ctx {
+- struct gf128mul_4k *gf128;
+-};
+-
+-struct ghash_desc_ctx {
+- u8 buffer[GHASH_BLOCK_SIZE];
+- u32 bytes;
+-};
+-
+ static int ghash_init(struct shash_desc *desc)
+ {
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
+index d799662f19eb..261420ddfe66 100644
+--- a/drivers/base/dma-mapping.c
++++ b/drivers/base/dma-mapping.c
+@@ -334,7 +334,7 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
+ return;
+ }
+
+- unmap_kernel_range((unsigned long)cpu_addr, size);
++ unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
+ vunmap(cpu_addr);
+ }
+ #endif
+diff --git a/drivers/clk/mvebu/cp110-system-controller.c b/drivers/clk/mvebu/cp110-system-controller.c
+index 7fa42d6b2b92..f2303da7fda7 100644
+--- a/drivers/clk/mvebu/cp110-system-controller.c
++++ b/drivers/clk/mvebu/cp110-system-controller.c
+@@ -81,13 +81,6 @@ enum {
+ #define CP110_GATE_EIP150 25
+ #define CP110_GATE_EIP197 26
+
+-static struct clk *cp110_clks[CP110_CLK_NUM];
+-
+-static struct clk_onecell_data cp110_clk_data = {
+- .clks = cp110_clks,
+- .clk_num = CP110_CLK_NUM,
+-};
+-
+ struct cp110_gate_clk {
+ struct clk_hw hw;
+ struct regmap *regmap;
+@@ -142,6 +135,8 @@ static struct clk *cp110_register_gate(const char *name,
+ if (!gate)
+ return ERR_PTR(-ENOMEM);
+
++ memset(&init, 0, sizeof(init));
++
+ init.name = name;
+ init.ops = &cp110_gate_ops;
+ init.parent_names = &parent_name;
+@@ -194,7 +189,8 @@ static int cp110_syscon_clk_probe(struct platform_device *pdev)
+ struct regmap *regmap;
+ struct device_node *np = pdev->dev.of_node;
+ const char *ppv2_name, *apll_name, *core_name, *eip_name, *nand_name;
+- struct clk *clk;
++ struct clk_onecell_data *cp110_clk_data;
++ struct clk *clk, **cp110_clks;
+ u32 nand_clk_ctrl;
+ int i, ret;
+
+@@ -207,6 +203,20 @@ static int cp110_syscon_clk_probe(struct platform_device *pdev)
+ if (ret)
+ return ret;
+
++ cp110_clks = devm_kcalloc(&pdev->dev, sizeof(struct clk *),
++ CP110_CLK_NUM, GFP_KERNEL);
++ if (!cp110_clks)
++ return -ENOMEM;
++
++ cp110_clk_data = devm_kzalloc(&pdev->dev,
++ sizeof(*cp110_clk_data),
++ GFP_KERNEL);
++ if (!cp110_clk_data)
++ return -ENOMEM;
++
++ cp110_clk_data->clks = cp110_clks;
++ cp110_clk_data->clk_num = CP110_CLK_NUM;
++
+ /* Register the APLL which is the root of the clk tree */
+ of_property_read_string_index(np, "core-clock-output-names",
+ CP110_CORE_APLL, &apll_name);
+@@ -334,10 +344,12 @@ static int cp110_syscon_clk_probe(struct platform_device *pdev)
+ cp110_clks[CP110_MAX_CORE_CLOCKS + i] = clk;
+ }
+
+- ret = of_clk_add_provider(np, cp110_of_clk_get, &cp110_clk_data);
++ ret = of_clk_add_provider(np, cp110_of_clk_get, cp110_clk_data);
+ if (ret)
+ goto fail_clk_add;
+
++ platform_set_drvdata(pdev, cp110_clks);
++
+ return 0;
+
+ fail_clk_add:
+@@ -364,6 +376,7 @@ fail0:
+
+ static int cp110_syscon_clk_remove(struct platform_device *pdev)
+ {
++ struct clk **cp110_clks = platform_get_drvdata(pdev);
+ int i;
+
+ of_clk_del_provider(pdev->dev.of_node);
+diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
+index 6c999cb01b80..27a94a119009 100644
+--- a/drivers/crypto/vmx/ghash.c
++++ b/drivers/crypto/vmx/ghash.c
+@@ -26,16 +26,13 @@
+ #include <linux/hardirq.h>
+ #include <asm/switch_to.h>
+ #include <crypto/aes.h>
++#include <crypto/ghash.h>
+ #include <crypto/scatterwalk.h>
+ #include <crypto/internal/hash.h>
+ #include <crypto/b128ops.h>
+
+ #define IN_INTERRUPT in_interrupt()
+
+-#define GHASH_BLOCK_SIZE (16)
+-#define GHASH_DIGEST_SIZE (16)
+-#define GHASH_KEY_LEN (16)
+-
+ void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
+ void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
+ void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
+@@ -55,16 +52,11 @@ struct p8_ghash_desc_ctx {
+
+ static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
+ {
+- const char *alg;
++ const char *alg = "ghash-generic";
+ struct crypto_shash *fallback;
+ struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+- if (!(alg = crypto_tfm_alg_name(tfm))) {
+- printk(KERN_ERR "Failed to get algorithm name.\n");
+- return -ENOENT;
+- }
+-
+ fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(fallback)) {
+ printk(KERN_ERR
+@@ -78,10 +70,18 @@ static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
+ crypto_shash_set_flags(fallback,
+ crypto_shash_get_flags((struct crypto_shash
+ *) tfm));
+- ctx->fallback = fallback;
+
+- shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx)
+- + crypto_shash_descsize(fallback);
++ /* Check if the descsize defined in the algorithm is still enough. */
++ if (shash_tfm->descsize < sizeof(struct p8_ghash_desc_ctx)
++ + crypto_shash_descsize(fallback)) {
++ printk(KERN_ERR
++ "Desc size of the fallback implementation (%s) does not match the expected value: %lu vs %u\n",
++ alg,
++ shash_tfm->descsize - sizeof(struct p8_ghash_desc_ctx),
++ crypto_shash_descsize(fallback));
++ return -EINVAL;
++ }
++ ctx->fallback = fallback;
+
+ return 0;
+ }
+@@ -113,7 +113,7 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
+ {
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
+
+- if (keylen != GHASH_KEY_LEN)
++ if (keylen != GHASH_BLOCK_SIZE)
+ return -EINVAL;
+
+ preempt_disable();
+@@ -211,7 +211,8 @@ struct shash_alg p8_ghash_alg = {
+ .update = p8_ghash_update,
+ .final = p8_ghash_final,
+ .setkey = p8_ghash_setkey,
+- .descsize = sizeof(struct p8_ghash_desc_ctx),
++ .descsize = sizeof(struct p8_ghash_desc_ctx)
++ + sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "p8_ghash",
+diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
+index 792f15eb8efe..29e3ce254989 100644
+--- a/drivers/infiniband/hw/hfi1/rc.c
++++ b/drivers/infiniband/hw/hfi1/rc.c
+@@ -889,8 +889,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
+ return;
+
+ queue_ack:
+- this_cpu_inc(*ibp->rvp.rc_qacks);
+ spin_lock_irqsave(&qp->s_lock, flags);
++ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
++ goto unlock;
++ this_cpu_inc(*ibp->rvp.rc_qacks);
+ qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+@@ -899,6 +901,7 @@ queue_ack:
+
+ /* Schedule the send tasklet. */
+ hfi1_schedule_send(qp);
++unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+
+diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
+index a039a5df6f21..fd9271bc1a11 100644
+--- a/drivers/misc/mei/amthif.c
++++ b/drivers/misc/mei/amthif.c
+@@ -67,8 +67,12 @@ int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl)
+ struct mei_cl *cl = &dev->iamthif_cl;
+ int ret;
+
+- if (mei_cl_is_connected(cl))
+- return 0;
++ mutex_lock(&dev->device_lock);
++
++ if (mei_cl_is_connected(cl)) {
++ ret = 0;
++ goto out;
++ }
+
+ dev->iamthif_state = MEI_IAMTHIF_IDLE;
+
+@@ -77,11 +81,13 @@ int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl)
+ ret = mei_cl_link(cl);
+ if (ret < 0) {
+ dev_err(dev->dev, "amthif: failed cl_link %d\n", ret);
+- return ret;
++ goto out;
+ }
+
+ ret = mei_cl_connect(cl, me_cl, NULL);
+
++out:
++ mutex_unlock(&dev->device_lock);
+ return ret;
+ }
+
+diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
+index 1f33fea9299f..e094df3cf2d5 100644
+--- a/drivers/misc/mei/bus.c
++++ b/drivers/misc/mei/bus.c
+@@ -983,12 +983,10 @@ void mei_cl_bus_rescan_work(struct work_struct *work)
+ container_of(work, struct mei_device, bus_rescan_work);
+ struct mei_me_client *me_cl;
+
+- mutex_lock(&bus->device_lock);
+ me_cl = mei_me_cl_by_uuid(bus, &mei_amthif_guid);
+ if (me_cl)
+ mei_amthif_host_init(bus, me_cl);
+ mei_me_cl_put(me_cl);
+- mutex_unlock(&bus->device_lock);
+
+ mei_cl_bus_rescan(bus);
+ }
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 501f15d9f4d6..e7ba7318bedb 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -11415,6 +11415,12 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
+
+ dev_info(&pdev->dev, "%s: error %d\n", __func__, error);
+
++ if (!pf) {
++ dev_info(&pdev->dev,
++ "Cannot recover - error happened during device probe\n");
++ return PCI_ERS_RESULT_DISCONNECT;
++ }
++
+ /* shutdown all operations */
+ if (!test_bit(__I40E_SUSPENDED, &pf->state)) {
+ rtnl_lock();
+diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c
+index 6808db433283..ec3a64e5d2bb 100644
+--- a/drivers/net/wireless/ath/carl9170/debug.c
++++ b/drivers/net/wireless/ath/carl9170/debug.c
+@@ -75,7 +75,8 @@ static ssize_t carl9170_debugfs_read(struct file *file, char __user *userbuf,
+
+ if (!ar)
+ return -ENODEV;
+- dfops = container_of(file->f_op, struct carl9170_debugfs_fops, fops);
++ dfops = container_of(debugfs_real_fops(file),
++ struct carl9170_debugfs_fops, fops);
+
+ if (!dfops->read)
+ return -ENOSYS;
+@@ -127,7 +128,8 @@ static ssize_t carl9170_debugfs_write(struct file *file,
+
+ if (!ar)
+ return -ENODEV;
+- dfops = container_of(file->f_op, struct carl9170_debugfs_fops, fops);
++ dfops = container_of(debugfs_real_fops(file),
++ struct carl9170_debugfs_fops, fops);
+
+ if (!dfops->write)
+ return -ENOSYS;
+diff --git a/drivers/net/wireless/broadcom/b43/debugfs.c b/drivers/net/wireless/broadcom/b43/debugfs.c
+index b4bcd94aff6c..77046384dd80 100644
+--- a/drivers/net/wireless/broadcom/b43/debugfs.c
++++ b/drivers/net/wireless/broadcom/b43/debugfs.c
+@@ -524,7 +524,8 @@ static ssize_t b43_debugfs_read(struct file *file, char __user *userbuf,
+ goto out_unlock;
+ }
+
+- dfops = container_of(file->f_op, struct b43_debugfs_fops, fops);
++ dfops = container_of(debugfs_real_fops(file),
++ struct b43_debugfs_fops, fops);
+ if (!dfops->read) {
+ err = -ENOSYS;
+ goto out_unlock;
+@@ -585,7 +586,8 @@ static ssize_t b43_debugfs_write(struct file *file,
+ goto out_unlock;
+ }
+
+- dfops = container_of(file->f_op, struct b43_debugfs_fops, fops);
++ dfops = container_of(debugfs_real_fops(file),
++ struct b43_debugfs_fops, fops);
+ if (!dfops->write) {
+ err = -ENOSYS;
+ goto out_unlock;
+diff --git a/drivers/net/wireless/broadcom/b43legacy/debugfs.c b/drivers/net/wireless/broadcom/b43legacy/debugfs.c
+index 090910ea259e..82ef56ed7ca1 100644
+--- a/drivers/net/wireless/broadcom/b43legacy/debugfs.c
++++ b/drivers/net/wireless/broadcom/b43legacy/debugfs.c
+@@ -221,7 +221,8 @@ static ssize_t b43legacy_debugfs_read(struct file *file, char __user *userbuf,
+ goto out_unlock;
+ }
+
+- dfops = container_of(file->f_op, struct b43legacy_debugfs_fops, fops);
++ dfops = container_of(debugfs_real_fops(file),
++ struct b43legacy_debugfs_fops, fops);
+ if (!dfops->read) {
+ err = -ENOSYS;
+ goto out_unlock;
+@@ -287,7 +288,8 @@ static ssize_t b43legacy_debugfs_write(struct file *file,
+ goto out_unlock;
+ }
+
+- dfops = container_of(file->f_op, struct b43legacy_debugfs_fops, fops);
++ dfops = container_of(debugfs_real_fops(file),
++ struct b43legacy_debugfs_fops, fops);
+ if (!dfops->write) {
+ err = -ENOSYS;
+ goto out_unlock;
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+index 121baba7acb1..9014bf4af248 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+@@ -2473,7 +2473,7 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si)
+ WL_BSS_INFO_MAX);
+ if (err) {
+ brcmf_err("Failed to get bss info (%d)\n", err);
+- return;
++ goto out_kfree;
+ }
+ si->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
+ si->bss_param.beacon_interval = le16_to_cpu(buf->bss_le.beacon_period);
+@@ -2485,6 +2485,9 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si)
+ si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
+ if (capability & WLAN_CAPABILITY_SHORT_SLOT_TIME)
+ si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
++
++out_kfree:
++ kfree(buf);
+ }
+
+ static s32
+@@ -3824,11 +3827,11 @@ brcmf_cfg80211_del_pmksa(struct wiphy *wiphy, struct net_device *ndev,
+ if (!check_vif_up(ifp->vif))
+ return -EIO;
+
+- brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", &pmksa->bssid);
++ brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", pmksa->bssid);
+
+ npmk = le32_to_cpu(cfg->pmk_list.npmk);
+ for (i = 0; i < npmk; i++)
+- if (!memcmp(&pmksa->bssid, &pmk[i].bssid, ETH_ALEN))
++ if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN))
+ break;
+
+ if ((npmk > 0) && (i < npmk)) {
+diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
+index 7e269f9aa607..63664442e687 100644
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
+@@ -234,13 +234,20 @@ static void brcmf_flowring_block(struct brcmf_flowring *flow, u16 flowid,
+
+ void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid)
+ {
++ struct brcmf_bus *bus_if = dev_get_drvdata(flow->dev);
+ struct brcmf_flowring_ring *ring;
++ struct brcmf_if *ifp;
+ u16 hash_idx;
++ u8 ifidx;
+ struct sk_buff *skb;
+
+ ring = flow->rings[flowid];
+ if (!ring)
+ return;
++
++ ifidx = brcmf_flowring_ifidx_get(flow, flowid);
++ ifp = brcmf_get_ifp(bus_if->drvr, ifidx);
++
+ brcmf_flowring_block(flow, flowid, false);
+ hash_idx = ring->hash_id;
+ flow->hash[hash_idx].ifidx = BRCMF_FLOWRING_INVALID_IFIDX;
+@@ -249,7 +256,7 @@ void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid)
+
+ skb = skb_dequeue(&ring->skblist);
+ while (skb) {
+- brcmu_pkt_buf_free_skb(skb);
++ brcmf_txfinalize(ifp, skb, false);
+ skb = skb_dequeue(&ring->skblist);
+ }
+
+diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
+index 7640498964a5..3d53d636b17b 100644
+--- a/drivers/scsi/arcmsr/arcmsr_hba.c
++++ b/drivers/scsi/arcmsr/arcmsr_hba.c
+@@ -2388,15 +2388,23 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb,
+ }
+ case ARCMSR_MESSAGE_WRITE_WQBUFFER: {
+ unsigned char *ver_addr;
+- int32_t user_len, cnt2end;
++ uint32_t user_len;
++ int32_t cnt2end;
+ uint8_t *pQbuffer, *ptmpuserbuffer;
++
++ user_len = pcmdmessagefld->cmdmessage.Length;
++ if (user_len > ARCMSR_API_DATA_BUFLEN) {
++ retvalue = ARCMSR_MESSAGE_FAIL;
++ goto message_out;
++ }
++
+ ver_addr = kmalloc(ARCMSR_API_DATA_BUFLEN, GFP_ATOMIC);
+ if (!ver_addr) {
+ retvalue = ARCMSR_MESSAGE_FAIL;
+ goto message_out;
+ }
+ ptmpuserbuffer = ver_addr;
+- user_len = pcmdmessagefld->cmdmessage.Length;
++
+ memcpy(ptmpuserbuffer,
+ pcmdmessagefld->messagedatabuffer, user_len);
+ spin_lock_irqsave(&acb->wqbuffer_lock, flags);
+diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
+index fc523c3e5019..6398f3d7b323 100644
+--- a/drivers/scsi/ibmvscsi/ibmvfc.c
++++ b/drivers/scsi/ibmvscsi/ibmvfc.c
+@@ -717,7 +717,6 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
+ spin_lock_irqsave(vhost->host->host_lock, flags);
+ vhost->state = IBMVFC_NO_CRQ;
+ vhost->logged_in = 0;
+- ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
+
+ /* Clean out the queue */
+ memset(crq->msgs, 0, PAGE_SIZE);
+diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
+index e19969614203..b022f5a01e63 100644
+--- a/drivers/tty/serial/8250/8250_dw.c
++++ b/drivers/tty/serial/8250/8250_dw.c
+@@ -462,7 +462,7 @@ static int dw8250_probe(struct platform_device *pdev)
+ }
+
+ data->pclk = devm_clk_get(&pdev->dev, "apb_pclk");
+- if (IS_ERR(data->clk) && PTR_ERR(data->clk) == -EPROBE_DEFER) {
++ if (IS_ERR(data->pclk) && PTR_ERR(data->pclk) == -EPROBE_DEFER) {
+ err = -EPROBE_DEFER;
+ goto err_clk;
+ }
+diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
+index d4036038a4dd..427dd789c0e5 100644
+--- a/drivers/tty/serial/8250/8250_port.c
++++ b/drivers/tty/serial/8250/8250_port.c
+@@ -1415,12 +1415,8 @@ static void __do_stop_tx_rs485(struct uart_8250_port *p)
+ if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) {
+ serial8250_clear_fifos(p);
+
+- serial8250_rpm_get(p);
+-
+ p->ier |= UART_IER_RLSI | UART_IER_RDI;
+ serial_port_out(&p->port, UART_IER, p->ier);
+-
+- serial8250_rpm_put(p);
+ }
+ }
+
+@@ -1430,6 +1426,7 @@ static void serial8250_em485_handle_stop_tx(unsigned long arg)
+ struct uart_8250_em485 *em485 = p->em485;
+ unsigned long flags;
+
++ serial8250_rpm_get(p);
+ spin_lock_irqsave(&p->port.lock, flags);
+ if (em485 &&
+ em485->active_timer == &em485->stop_tx_timer) {
+@@ -1437,6 +1434,7 @@ static void serial8250_em485_handle_stop_tx(unsigned long arg)
+ em485->active_timer = NULL;
+ }
+ spin_unlock_irqrestore(&p->port.lock, flags);
++ serial8250_rpm_put(p);
+ }
+
+ static void __stop_tx_rs485(struct uart_8250_port *p)
+@@ -1476,7 +1474,7 @@ static inline void __stop_tx(struct uart_8250_port *p)
+ unsigned char lsr = serial_in(p, UART_LSR);
+ /*
+ * To provide required timeing and allow FIFO transfer,
+- * __stop_tx_rs485 must be called only when both FIFO and
++ * __stop_tx_rs485() must be called only when both FIFO and
+ * shift register are empty. It is for device driver to enable
+ * interrupt on TEMT.
+ */
+@@ -1485,9 +1483,10 @@ static inline void __stop_tx(struct uart_8250_port *p)
+
+ del_timer(&em485->start_tx_timer);
+ em485->active_timer = NULL;
++
++ __stop_tx_rs485(p);
+ }
+ __do_stop_tx(p);
+- __stop_tx_rs485(p);
+ }
+
+ static void serial8250_stop_tx(struct uart_port *port)
+diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
+index 0df2b1c091ae..615c0279a1a6 100644
+--- a/drivers/tty/serial/imx.c
++++ b/drivers/tty/serial/imx.c
+@@ -740,12 +740,13 @@ static unsigned int imx_get_hwmctrl(struct imx_port *sport)
+ {
+ unsigned int tmp = TIOCM_DSR;
+ unsigned usr1 = readl(sport->port.membase + USR1);
++ unsigned usr2 = readl(sport->port.membase + USR2);
+
+ if (usr1 & USR1_RTSS)
+ tmp |= TIOCM_CTS;
+
+ /* in DCE mode DCDIN is always 0 */
+- if (!(usr1 & USR2_DCDIN))
++ if (!(usr2 & USR2_DCDIN))
+ tmp |= TIOCM_CAR;
+
+ if (sport->dte_mode)
+diff --git a/fs/attr.c b/fs/attr.c
+index 25b24d0f6c88..ccde2700fc17 100644
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -202,6 +202,21 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
+ return -EPERM;
+ }
+
++ /*
++ * If utimes(2) and friends are called with times == NULL (or both
++ * times are UTIME_NOW), then we need to check for write permission
++ */
++ if (ia_valid & ATTR_TOUCH) {
++ if (IS_IMMUTABLE(inode))
++ return -EPERM;
++
++ if (!inode_owner_or_capable(inode)) {
++ error = inode_permission(inode, MAY_WRITE);
++ if (error)
++ return error;
++ }
++ }
++
+ if ((ia_valid & ATTR_MODE)) {
+ umode_t amode = attr->ia_mode;
+ /* Flag setting protected by i_mutex */
+diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
+index 658c39b70fba..702e5834f110 100644
+--- a/fs/btrfs/compression.c
++++ b/fs/btrfs/compression.c
+@@ -690,7 +690,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+ ret = btrfs_map_bio(root, READ, comp_bio,
+ mirror_num, 0);
+ if (ret) {
+- bio->bi_error = ret;
++ comp_bio->bi_error = ret;
+ bio_endio(comp_bio);
+ }
+
+@@ -719,7 +719,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+
+ ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
+ if (ret) {
+- bio->bi_error = ret;
++ comp_bio->bi_error = ret;
+ bio_endio(comp_bio);
+ }
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 72f50480eee7..699ee7c45ce4 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -265,7 +265,8 @@ struct btrfs_super_block {
+ #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
+
+ #define BTRFS_FEATURE_COMPAT_RO_SUPP \
+- (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
++ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
++ BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
+
+ #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
+ #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 864cf3be0251..c14e8c70d26a 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2528,6 +2528,7 @@ int open_ctree(struct super_block *sb,
+ int num_backups_tried = 0;
+ int backup_index = 0;
+ int max_active;
++ int clear_free_space_tree = 0;
+
+ tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
+ chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
+@@ -3129,6 +3130,14 @@ retry_root_backup:
+
+ if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+ btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
++ clear_free_space_tree = 1;
++ } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
++ !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
++ btrfs_warn(fs_info, "free space tree is invalid");
++ clear_free_space_tree = 1;
++ }
++
++ if (clear_free_space_tree) {
+ btrfs_info(fs_info, "clearing free space tree");
+ ret = btrfs_clear_free_space_tree(fs_info);
+ if (ret) {
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index 92fe3f8012db..28f60fcefb81 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -5508,17 +5508,45 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+ }
+ }
+
+-/*
+- * The extent buffer bitmap operations are done with byte granularity because
+- * bitmap items are not guaranteed to be aligned to a word and therefore a
+- * single word in a bitmap may straddle two pages in the extent buffer.
+- */
+-#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+-#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+-#define BITMAP_FIRST_BYTE_MASK(start) \
+- ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+-#define BITMAP_LAST_BYTE_MASK(nbits) \
+- (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
++void le_bitmap_set(u8 *map, unsigned int start, int len)
++{
++ u8 *p = map + BIT_BYTE(start);
++ const unsigned int size = start + len;
++ int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
++ u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
++
++ while (len - bits_to_set >= 0) {
++ *p |= mask_to_set;
++ len -= bits_to_set;
++ bits_to_set = BITS_PER_BYTE;
++ mask_to_set = ~(u8)0;
++ p++;
++ }
++ if (len) {
++ mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
++ *p |= mask_to_set;
++ }
++}
++
++void le_bitmap_clear(u8 *map, unsigned int start, int len)
++{
++ u8 *p = map + BIT_BYTE(start);
++ const unsigned int size = start + len;
++ int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
++ u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
++
++ while (len - bits_to_clear >= 0) {
++ *p &= ~mask_to_clear;
++ len -= bits_to_clear;
++ bits_to_clear = BITS_PER_BYTE;
++ mask_to_clear = ~(u8)0;
++ p++;
++ }
++ if (len) {
++ mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
++ *p &= ~mask_to_clear;
++ }
++}
+
+ /*
+ * eb_bitmap_offset() - calculate the page and offset of the byte containing the
+@@ -5562,7 +5590,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
+ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long nr)
+ {
+- char *kaddr;
++ u8 *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+@@ -5584,13 +5612,13 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len)
+ {
+- char *kaddr;
++ u8 *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+ const unsigned int size = pos + len;
+ int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+- unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
++ u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+
+ eb_bitmap_offset(eb, start, pos, &i, &offset);
+ page = eb->pages[i];
+@@ -5601,7 +5629,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ kaddr[offset] |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_BYTE;
+- mask_to_set = ~0U;
++ mask_to_set = ~(u8)0;
+ if (++offset >= PAGE_SIZE && len > 0) {
+ offset = 0;
+ page = eb->pages[++i];
+@@ -5626,13 +5654,13 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len)
+ {
+- char *kaddr;
++ u8 *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+ const unsigned int size = pos + len;
+ int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+- unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
++ u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+
+ eb_bitmap_offset(eb, start, pos, &i, &offset);
+ page = eb->pages[i];
+@@ -5643,7 +5671,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+ kaddr[offset] &= ~mask_to_clear;
+ len -= bits_to_clear;
+ bits_to_clear = BITS_PER_BYTE;
+- mask_to_clear = ~0U;
++ mask_to_clear = ~(u8)0;
+ if (++offset >= PAGE_SIZE && len > 0) {
+ offset = 0;
+ page = eb->pages[++i];
+diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
+index c0c1c4fef6ce..d19010729468 100644
+--- a/fs/btrfs/extent_io.h
++++ b/fs/btrfs/extent_io.h
+@@ -58,6 +58,28 @@
+ */
+ #define EXTENT_PAGE_PRIVATE 1
+
++/*
++ * The extent buffer bitmap operations are done with byte granularity instead of
++ * word granularity for two reasons:
++ * 1. The bitmaps must be little-endian on disk.
++ * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
++ * single word in a bitmap may straddle two pages in the extent buffer.
++ */
++#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
++#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
++#define BITMAP_FIRST_BYTE_MASK(start) \
++ ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
++#define BITMAP_LAST_BYTE_MASK(nbits) \
++ (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
++
++static inline int le_test_bit(int nr, const u8 *addr)
++{
++ return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
++}
++
++extern void le_bitmap_set(u8 *map, unsigned int start, int len);
++extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
++
+ struct extent_state;
+ struct btrfs_root;
+ struct btrfs_io_bio;
+diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
+index 53dbeaf6ce94..0e041bf75c71 100644
+--- a/fs/btrfs/free-space-tree.c
++++ b/fs/btrfs/free-space-tree.c
+@@ -151,7 +151,7 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
+ return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
+ }
+
+-static unsigned long *alloc_bitmap(u32 bitmap_size)
++static u8 *alloc_bitmap(u32 bitmap_size)
+ {
+ void *mem;
+
+@@ -180,8 +180,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+- unsigned long *bitmap;
+- char *bitmap_cursor;
++ u8 *bitmap, *bitmap_cursor;
+ u64 start, end;
+ u64 bitmap_range, i;
+ u32 bitmap_size, flags, expected_extent_count;
+@@ -231,7 +230,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+ block_group->sectorsize);
+ last = div_u64(found_key.objectid + found_key.offset - start,
+ block_group->sectorsize);
+- bitmap_set(bitmap, first, last - first);
++ le_bitmap_set(bitmap, first, last - first);
+
+ extent_count++;
+ nr++;
+@@ -269,7 +268,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+ goto out;
+ }
+
+- bitmap_cursor = (char *)bitmap;
++ bitmap_cursor = bitmap;
+ bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+ i = start;
+ while (i < end) {
+@@ -318,7 +317,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key, found_key;
+ struct extent_buffer *leaf;
+- unsigned long *bitmap;
++ u8 *bitmap;
+ u64 start, end;
+ /* Initialize to silence GCC. */
+ u64 extent_start = 0;
+@@ -362,7 +361,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ break;
+ } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+ unsigned long ptr;
+- char *bitmap_cursor;
++ u8 *bitmap_cursor;
+ u32 bitmap_pos, data_size;
+
+ ASSERT(found_key.objectid >= start);
+@@ -372,7 +371,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ bitmap_pos = div_u64(found_key.objectid - start,
+ block_group->sectorsize *
+ BITS_PER_BYTE);
+- bitmap_cursor = ((char *)bitmap) + bitmap_pos;
++ bitmap_cursor = bitmap + bitmap_pos;
+ data_size = free_space_bitmap_size(found_key.offset,
+ block_group->sectorsize);
+
+@@ -409,7 +408,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+ offset = start;
+ bitnr = 0;
+ while (offset < end) {
+- bit = !!test_bit(bitnr, bitmap);
++ bit = !!le_test_bit(bitnr, bitmap);
+ if (prev_bit == 0 && bit == 1) {
+ extent_start = offset;
+ } else if (prev_bit == 1 && bit == 0) {
+@@ -1183,6 +1182,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
+ }
+
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
++ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+ fs_info->creating_free_space_tree = 0;
+
+ ret = btrfs_commit_transaction(trans, tree_root);
+@@ -1251,6 +1251,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
+ return PTR_ERR(trans);
+
+ btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
++ btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+ fs_info->free_space_root = NULL;
+
+ ret = clear_free_space_tree(trans, free_space_root);
+diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
+index ce5f345d70f5..e7f16a77a22a 100644
+--- a/fs/cachefiles/interface.c
++++ b/fs/cachefiles/interface.c
+@@ -253,6 +253,8 @@ static void cachefiles_drop_object(struct fscache_object *_object)
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ const struct cred *saved_cred;
++ struct inode *inode;
++ blkcnt_t i_blocks = 0;
+
+ ASSERT(_object);
+
+@@ -279,6 +281,10 @@ static void cachefiles_drop_object(struct fscache_object *_object)
+ _object != cache->cache.fsdef
+ ) {
+ _debug("- retire object OBJ%x", object->fscache.debug_id);
++ inode = d_backing_inode(object->dentry);
++ if (inode)
++ i_blocks = inode->i_blocks;
++
+ cachefiles_begin_secure(cache, &saved_cred);
+ cachefiles_delete_object(cache, object);
+ cachefiles_end_secure(cache, saved_cred);
+@@ -292,7 +298,7 @@ static void cachefiles_drop_object(struct fscache_object *_object)
+
+ /* note that the object is now inactive */
+ if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags))
+- cachefiles_mark_object_inactive(cache, object);
++ cachefiles_mark_object_inactive(cache, object, i_blocks);
+
+ dput(object->dentry);
+ object->dentry = NULL;
+diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
+index 2fcde1a34b7c..cd1effee8a49 100644
+--- a/fs/cachefiles/internal.h
++++ b/fs/cachefiles/internal.h
+@@ -160,7 +160,8 @@ extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
+ * namei.c
+ */
+ extern void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
+- struct cachefiles_object *object);
++ struct cachefiles_object *object,
++ blkcnt_t i_blocks);
+ extern int cachefiles_delete_object(struct cachefiles_cache *cache,
+ struct cachefiles_object *object);
+ extern int cachefiles_walk_to_object(struct cachefiles_object *parent,
+diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
+index 3f7c2cd41f8f..c6ee4b5fb7e6 100644
+--- a/fs/cachefiles/namei.c
++++ b/fs/cachefiles/namei.c
+@@ -261,10 +261,9 @@ requeue:
+ * Mark an object as being inactive.
+ */
+ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
+- struct cachefiles_object *object)
++ struct cachefiles_object *object,
++ blkcnt_t i_blocks)
+ {
+- blkcnt_t i_blocks = d_backing_inode(object->dentry)->i_blocks;
+-
+ write_lock(&cache->active_lock);
+ rb_erase(&object->active_node, &cache->active_nodes);
+ clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
+@@ -707,7 +706,8 @@ mark_active_timed_out:
+
+ check_error:
+ _debug("check error %d", ret);
+- cachefiles_mark_object_inactive(cache, object);
++ cachefiles_mark_object_inactive(
++ cache, object, d_backing_inode(object->dentry)->i_blocks);
+ release_dentry:
+ dput(object->dentry);
+ object->dentry = NULL;
+diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
+index 592059f88e04..309f4e9b2419 100644
+--- a/fs/debugfs/file.c
++++ b/fs/debugfs/file.c
+@@ -97,9 +97,6 @@ EXPORT_SYMBOL_GPL(debugfs_use_file_finish);
+
+ #define F_DENTRY(filp) ((filp)->f_path.dentry)
+
+-#define REAL_FOPS_DEREF(dentry) \
+- ((const struct file_operations *)(dentry)->d_fsdata)
+-
+ static int open_proxy_open(struct inode *inode, struct file *filp)
+ {
+ const struct dentry *dentry = F_DENTRY(filp);
+@@ -112,7 +109,7 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
+ goto out;
+ }
+
+- real_fops = REAL_FOPS_DEREF(dentry);
++ real_fops = debugfs_real_fops(filp);
+ real_fops = fops_get(real_fops);
+ if (!real_fops) {
+ /* Huh? Module did not clean up after itself at exit? */
+@@ -143,7 +140,7 @@ static ret_type full_proxy_ ## name(proto) \
+ { \
+ const struct dentry *dentry = F_DENTRY(filp); \
+ const struct file_operations *real_fops = \
+- REAL_FOPS_DEREF(dentry); \
++ debugfs_real_fops(filp); \
+ int srcu_idx; \
+ ret_type r; \
+ \
+@@ -176,7 +173,7 @@ static unsigned int full_proxy_poll(struct file *filp,
+ struct poll_table_struct *wait)
+ {
+ const struct dentry *dentry = F_DENTRY(filp);
+- const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
++ const struct file_operations *real_fops = debugfs_real_fops(filp);
+ int srcu_idx;
+ unsigned int r = 0;
+
+@@ -193,7 +190,7 @@ static unsigned int full_proxy_poll(struct file *filp,
+ static int full_proxy_release(struct inode *inode, struct file *filp)
+ {
+ const struct dentry *dentry = F_DENTRY(filp);
+- const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
++ const struct file_operations *real_fops = debugfs_real_fops(filp);
+ const struct file_operations *proxy_fops = filp->f_op;
+ int r = 0;
+
+@@ -241,7 +238,7 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
+ goto out;
+ }
+
+- real_fops = REAL_FOPS_DEREF(dentry);
++ real_fops = debugfs_real_fops(filp);
+ real_fops = fops_get(real_fops);
+ if (!real_fops) {
+ /* Huh? Module did not cleanup after itself at exit? */
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 1ab012a27d9f..be14bea958b4 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1657,16 +1657,12 @@ void dlm_lowcomms_stop(void)
+ mutex_lock(&connections_lock);
+ dlm_allow_conn = 0;
+ foreach_conn(stop_conn);
++ clean_writequeues();
++ foreach_conn(free_conn);
+ mutex_unlock(&connections_lock);
+
+ work_stop();
+
+- mutex_lock(&connections_lock);
+- clean_writequeues();
+-
+- foreach_conn(free_conn);
+-
+- mutex_unlock(&connections_lock);
+ kmem_cache_destroy(con_cache);
+ }
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index d7ccb7f51dfc..7f69347bd5a5 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -5734,6 +5734,9 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ up_write(&EXT4_I(inode)->i_data_sem);
+ goto out_stop;
+ }
++ } else {
++ ext4_ext_drop_refs(path);
++ kfree(path);
+ }
+
+ ret = ext4_es_remove_extent(inode, offset_lblk,
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index ea628af9d609..8fa01cbd74b3 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -647,11 +647,19 @@ found:
+ /*
+ * We have to zeroout blocks before inserting them into extent
+ * status tree. Otherwise someone could look them up there and
+- * use them before they are really zeroed.
++ * use them before they are really zeroed. We also have to
++ * unmap metadata before zeroing as otherwise writeback can
++ * overwrite zeros with stale data from block device.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO &&
+ map->m_flags & EXT4_MAP_MAPPED &&
+ map->m_flags & EXT4_MAP_NEW) {
++ ext4_lblk_t i;
++
++ for (i = 0; i < map->m_len; i++) {
++ unmap_underlying_metadata(inode->i_sb->s_bdev,
++ map->m_pblk + i);
++ }
+ ret = ext4_issue_zeroout(inode, map->m_lblk,
+ map->m_pblk, map->m_len);
+ if (ret) {
+@@ -1649,6 +1657,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+ if (invalidate) {
++ if (page_mapped(page))
++ clear_page_dirty_for_io(page);
+ block_invalidatepage(page, 0, PAGE_SIZE);
+ ClearPageUptodate(page);
+ }
+@@ -3890,7 +3900,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ }
+
+ /*
+- * ext4_punch_hole: punches a hole in a file by releaseing the blocks
++ * ext4_punch_hole: punches a hole in a file by releasing the blocks
+ * associated with the given offset and length
+ *
+ * @inode: File inode
+@@ -3919,7 +3929,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+ * Write out all dirty pages to avoid race conditions
+ * Then release them.
+ */
+- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
++ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+ ret = filemap_write_and_wait_range(mapping, offset,
+ offset + length - 1);
+ if (ret)
+@@ -4814,14 +4824,14 @@ static int ext4_do_update_inode(handle_t *handle,
+ * Fix up interoperability with old kernels. Otherwise, old inodes get
+ * re-used with the upper 16 bits of the uid/gid intact
+ */
+- if (!ei->i_dtime) {
++ if (ei->i_dtime && list_empty(&ei->i_orphan)) {
++ raw_inode->i_uid_high = 0;
++ raw_inode->i_gid_high = 0;
++ } else {
+ raw_inode->i_uid_high =
+ cpu_to_le16(high_16_bits(i_uid));
+ raw_inode->i_gid_high =
+ cpu_to_le16(high_16_bits(i_gid));
+- } else {
+- raw_inode->i_uid_high = 0;
+- raw_inode->i_gid_high = 0;
+ }
+ } else {
+ raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
+diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
+index a920c5d29fac..6fc14def0c70 100644
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -598,6 +598,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
+ return -EOPNOTSUPP;
+ }
+
++ if (ext4_encrypted_inode(orig_inode) ||
++ ext4_encrypted_inode(donor_inode)) {
++ ext4_msg(orig_inode->i_sb, KERN_ERR,
++ "Online defrag not supported for encrypted files");
++ return -EOPNOTSUPP;
++ }
++
+ /* Protect orig and donor inodes against a truncate */
+ lock_two_nondirectories(orig_inode, donor_inode);
+
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 5bb46b6ed456..593f32b7fcd9 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -2043,33 +2043,31 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
+ frame->entries = entries;
+ frame->at = entries;
+ frame->bh = bh;
+- bh = bh2;
+
+ retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
+ if (retval)
+ goto out_frames;
+- retval = ext4_handle_dirty_dirent_node(handle, dir, bh);
++ retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
+ if (retval)
+ goto out_frames;
+
+- de = do_split(handle,dir, &bh, frame, &fname->hinfo);
++ de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
+ if (IS_ERR(de)) {
+ retval = PTR_ERR(de);
+ goto out_frames;
+ }
+- dx_release(frames);
+
+- retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
+- brelse(bh);
+- return retval;
++ retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
+ out_frames:
+ /*
+ * Even if the block split failed, we have to properly write
+ * out all the changes we did so far. Otherwise we can end up
+ * with corrupted filesystem.
+ */
+- ext4_mark_inode_dirty(handle, dir);
++ if (retval)
++ ext4_mark_inode_dirty(handle, dir);
+ dx_release(frames);
++ brelse(bh2);
+ return retval;
+ }
+
+diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
+index cca7b048c07b..31145d625a61 100644
+--- a/fs/fuse/dir.c
++++ b/fs/fuse/dir.c
+@@ -1701,14 +1701,46 @@ error:
+ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+ {
+ struct inode *inode = d_inode(entry);
++ struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
++ int ret;
+
+ if (!fuse_allow_current_process(get_fuse_conn(inode)))
+ return -EACCES;
+
+- if (attr->ia_valid & ATTR_FILE)
+- return fuse_do_setattr(inode, attr, attr->ia_file);
+- else
+- return fuse_do_setattr(inode, attr, NULL);
++ if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
++ int kill;
++
++ attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
++ ATTR_MODE);
++ /*
++ * ia_mode calculation may have used stale i_mode. Refresh and
++ * recalculate.
++ */
++ ret = fuse_do_getattr(inode, NULL, file);
++ if (ret)
++ return ret;
++
++ attr->ia_mode = inode->i_mode;
++ kill = should_remove_suid(entry);
++ if (kill & ATTR_KILL_SUID) {
++ attr->ia_valid |= ATTR_MODE;
++ attr->ia_mode &= ~S_ISUID;
++ }
++ if (kill & ATTR_KILL_SGID) {
++ attr->ia_valid |= ATTR_MODE;
++ attr->ia_mode &= ~S_ISGID;
++ }
++ }
++ if (!attr->ia_valid)
++ return 0;
++
++ ret = fuse_do_setattr(inode, attr, file);
++ if (!ret) {
++ /* Directory mode changed, may need to revalidate access */
++ if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
++ fuse_invalidate_entry_cache(entry);
++ }
++ return ret;
+ }
+
+ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
+@@ -1800,6 +1832,23 @@ static ssize_t fuse_getxattr(struct dentry *entry, struct inode *inode,
+ return ret;
+ }
+
++static int fuse_verify_xattr_list(char *list, size_t size)
++{
++ size_t origsize = size;
++
++ while (size) {
++ size_t thislen = strnlen(list, size);
++
++ if (!thislen || thislen == size)
++ return -EIO;
++
++ size -= thislen + 1;
++ list += thislen + 1;
++ }
++
++ return origsize;
++}
++
+ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+ {
+ struct inode *inode = d_inode(entry);
+@@ -1835,6 +1884,8 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+ ret = fuse_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = outarg.size;
++ if (ret > 0 && size)
++ ret = fuse_verify_xattr_list(list, ret);
+ if (ret == -ENOSYS) {
+ fc->no_listxattr = 1;
+ ret = -EOPNOTSUPP;
+diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
+index c72c16c5a60f..b810826722c8 100644
+--- a/fs/reiserfs/super.c
++++ b/fs/reiserfs/super.c
+@@ -190,7 +190,15 @@ static int remove_save_link_only(struct super_block *s,
+ static int reiserfs_quota_on_mount(struct super_block *, int);
+ #endif
+
+-/* look for uncompleted unlinks and truncates and complete them */
++/*
++ * Look for uncompleted unlinks and truncates and complete them
++ *
++ * Called with superblock write locked. If quotas are enabled, we have to
++ * release/retake lest we call dquot_quota_on_mount(), proceed to
++ * schedule_on_each_cpu() in invalidate_bdev() and deadlock waiting for the per
++ * cpu worklets to complete flush_async_commits() that in turn wait for the
++ * superblock write lock.
++ */
+ static int finish_unfinished(struct super_block *s)
+ {
+ INITIALIZE_PATH(path);
+@@ -237,7 +245,9 @@ static int finish_unfinished(struct super_block *s)
+ quota_enabled[i] = 0;
+ continue;
+ }
++ reiserfs_write_unlock(s);
+ ret = reiserfs_quota_on_mount(s, i);
++ reiserfs_write_lock(s);
+ if (ret < 0)
+ reiserfs_warning(s, "reiserfs-2500",
+ "cannot turn on journaled "
+diff --git a/fs/utimes.c b/fs/utimes.c
+index 85c40f4f373d..ba54b9e648c9 100644
+--- a/fs/utimes.c
++++ b/fs/utimes.c
+@@ -87,20 +87,7 @@ static int utimes_common(struct path *path, struct timespec *times)
+ */
+ newattrs.ia_valid |= ATTR_TIMES_SET;
+ } else {
+- /*
+- * If times is NULL (or both times are UTIME_NOW),
+- * then we need to check permissions, because
+- * inode_change_ok() won't do it.
+- */
+- error = -EACCES;
+- if (IS_IMMUTABLE(inode))
+- goto mnt_drop_write_and_out;
+-
+- if (!inode_owner_or_capable(inode)) {
+- error = inode_permission(inode, MAY_WRITE);
+- if (error)
+- goto mnt_drop_write_and_out;
+- }
++ newattrs.ia_valid |= ATTR_TOUCH;
+ }
+ retry_deleg:
+ inode_lock(inode);
+@@ -112,7 +99,6 @@ retry_deleg:
+ goto retry_deleg;
+ }
+
+-mnt_drop_write_and_out:
+ mnt_drop_write(path->mnt);
+ out:
+ return error;
+diff --git a/include/crypto/ghash.h b/include/crypto/ghash.h
+new file mode 100644
+index 000000000000..2a61c9bbab8f
+--- /dev/null
++++ b/include/crypto/ghash.h
+@@ -0,0 +1,23 @@
++/*
++ * Common values for GHASH algorithms
++ */
++
++#ifndef __CRYPTO_GHASH_H__
++#define __CRYPTO_GHASH_H__
++
++#include <linux/types.h>
++#include <crypto/gf128mul.h>
++
++#define GHASH_BLOCK_SIZE 16
++#define GHASH_DIGEST_SIZE 16
++
++struct ghash_ctx {
++ struct gf128mul_4k *gf128;
++};
++
++struct ghash_desc_ctx {
++ u8 buffer[GHASH_BLOCK_SIZE];
++ u32 bytes;
++};
++
++#endif
+diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
+index 1438e2322d5c..4d3f0d1aec73 100644
+--- a/include/linux/debugfs.h
++++ b/include/linux/debugfs.h
+@@ -45,6 +45,23 @@ extern struct dentry *arch_debugfs_dir;
+
+ extern struct srcu_struct debugfs_srcu;
+
++/**
++ * debugfs_real_fops - getter for the real file operation
++ * @filp: a pointer to a struct file
++ *
++ * Must only be called under the protection established by
++ * debugfs_use_file_start().
++ */
++static inline const struct file_operations *debugfs_real_fops(struct file *filp)
++ __must_hold(&debugfs_srcu)
++{
++ /*
++ * Neither the pointer to the struct file_operations, nor its
++ * contents ever change -- srcu_dereference() is not needed here.
++ */
++ return filp->f_path.dentry->d_fsdata;
++}
++
+ #if defined(CONFIG_DEBUG_FS)
+
+ struct dentry *debugfs_create_file(const char *name, umode_t mode,
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index dd288148a6b1..cf27c88cb933 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -228,6 +228,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
+ #define ATTR_KILL_PRIV (1 << 14)
+ #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
+ #define ATTR_TIMES_SET (1 << 16)
++#define ATTR_TOUCH (1 << 17)
+
+ /*
+ * Whiteout is represented by a char device. The following constants define the
+diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
+index 2bdd1e3e7007..409be3555a3e 100644
+--- a/include/uapi/linux/btrfs.h
++++ b/include/uapi/linux/btrfs.h
+@@ -239,7 +239,17 @@ struct btrfs_ioctl_fs_info_args {
+ * Used by:
+ * struct btrfs_ioctl_feature_flags
+ */
+-#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
++#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
++/*
++ * Older kernels (< 4.9) on big-endian systems produced broken free space tree
++ * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions
++ * < 4.7.3). If this bit is clear, then the free space tree cannot be trusted.
++ * btrfs-progs can also intentionally clear this bit to ask the kernel to
++ * rebuild the free space tree, however this might not work on older kernels
++ * that do not know about this bit. If not sure, clear the cache manually on
++ * first mount when booting older kernel versions.
++ */
++#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
+
+ #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
+ #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+diff --git a/mm/filemap.c b/mm/filemap.c
+index 20f3b1f33f0e..b510542e3077 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -1609,6 +1609,10 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
+ unsigned int prev_offset;
+ int error = 0;
+
++ if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
++ return -EINVAL;
++ iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
++
+ index = *ppos >> PAGE_SHIFT;
+ prev_index = ra->prev_pos >> PAGE_SHIFT;
+ prev_offset = ra->prev_pos & (PAGE_SIZE-1);
+diff --git a/sound/soc/intel/atom/sst/sst_pvt.c b/sound/soc/intel/atom/sst/sst_pvt.c
+index adb32fefd693..b1e6b8f34a6a 100644
+--- a/sound/soc/intel/atom/sst/sst_pvt.c
++++ b/sound/soc/intel/atom/sst/sst_pvt.c
+@@ -279,17 +279,15 @@ int sst_prepare_and_post_msg(struct intel_sst_drv *sst,
+
+ if (response) {
+ ret = sst_wait_timeout(sst, block);
+- if (ret < 0) {
++ if (ret < 0)
+ goto out;
+- } else if(block->data) {
+- if (!data)
+- goto out;
+- *data = kzalloc(block->size, GFP_KERNEL);
+- if (!(*data)) {
++
++ if (data && block->data) {
++ *data = kmemdup(block->data, block->size, GFP_KERNEL);
++ if (!*data) {
+ ret = -ENOMEM;
+ goto out;
+- } else
+- memcpy(data, (void *) block->data, block->size);
++ }
+ }
+ }
+ out:
^ permalink raw reply related [flat|nested] 20+ messages in thread
end of thread, other threads:[~2016-10-22 13:06 UTC | newest]
Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-08-20 16:33 [gentoo-commits] proj/linux-patches:4.7 commit in: / Mike Pagano
-- strict thread matches above, loose matches on Subject: below --
2016-10-22 13:06 Mike Pagano
2016-10-20 8:35 Alice Ferrazzi
2016-10-16 19:22 Mike Pagano
2016-10-08 11:37 Alice Ferrazzi
2016-10-04 4:50 Alice Ferrazzi
2016-09-24 10:40 Mike Pagano
2016-09-15 14:55 Mike Pagano
2016-09-13 12:16 Mike Pagano
2016-09-07 19:22 Tom Wijsman
2016-09-07 19:22 Tom Wijsman
2016-08-30 18:40 Mike Pagano
2016-08-22 14:48 Mike Pagano
2016-08-16 23:24 Mike Pagano
2016-08-11 23:43 Mike Pagano
2016-07-28 0:03 Mike Pagano
2016-07-27 16:21 Mike Pagano
2016-07-25 0:25 Mike Pagano
2016-07-02 15:24 Mike Pagano
2016-07-01 0:32 Mike Pagano
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox