[gentoo-commits] proj/linux-patches:4.6 commit in: /

public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-04-25 12:19 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-04-25 12:19 UTC (permalink / raw
  To: gentoo-commits

commit:     db486a2c722de027b6fb81d8e367c706d6944bbb
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Mon Apr 25 12:19:52 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Mon Apr 25 12:19:52 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=db486a2c

Linux 4.6. Patch to enable link security restrictions by default. Patch to disable Windows 8 compatibility for some Lenovo ThinkPads. Patch to ensure that /dev/root doesn't appear in /proc/mounts when bootint without an initramfs.  fbcondecor bootsplash patch. Add Gentoo Linux support config settings and defaults. Kernel patch that enables gcc < v4.9 optimizations for additional CPUs.  Kernel patch enables gcc >= v4.9 optimizations for additional CPUs.

 0000_README                                        |   28 +
 1500_XATTR_USER_PREFIX.patch                       |   69 +
 ...ble-link-security-restrictions-by-default.patch |   22 +
 2700_ThinkPad-30-brightness-control-fix.patch      |   67 +
 2900_dev-root-proc-mount-fix.patch                 |   38 +
 4200_fbcondecor-3.19.patch                         | 2119 ++++++++++++++++++++
 ...able-additional-cpu-optimizations-for-gcc.patch |  327 +++
 ...-additional-cpu-optimizations-for-gcc-4.9.patch |  402 ++++
 8 files changed, 3072 insertions(+)

diff --git a/0000_README b/0000_README
index 9018993..8e70e78 100644
--- a/0000_README
+++ b/0000_README
@@ -43,6 +43,34 @@ EXPERIMENTAL
 Individual Patch Descriptions:
 --------------------------------------------------------------------------
 
+Patch:  1500_XATTR_USER_PREFIX.patch
+From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
+Desc:   Support for namespace user.pax.* on tmpfs.
+
+Patch:  1510_fs-enable-link-security-restrictions-by-default.patch
+From:   http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch/
+Desc:   Enable link security restrictions by default.
+
+Patch:  2700_ThinkPad-30-brightness-control-fix.patch
+From:   Seth Forshee <seth.forshee@canonical.com>
+Desc:   ACPI: Disable Windows 8 compatibility for some Lenovo ThinkPads.
+
+Patch:  2900_dev-root-proc-mount-fix.patch
+From:   https://bugs.gentoo.org/show_bug.cgi?id=438380
+Desc:   Ensure that /dev/root doesn't appear in /proc/mounts when bootint without an initramfs.
+
+Patch:  4200_fbcondecor-3.19.patch
+From:   http://www.mepiscommunity.org/fbcondecor
+Desc:   Bootsplash ported by Marco. (Bug #539616)
+
 Patch:  4567_distro-Gentoo-Kconfig.patch
 From:   Tom Wijsman <TomWij@gentoo.org>
 Desc:   Add Gentoo Linux support config settings and defaults.
+
+Patch:  5000_enable-additional-cpu-optimizations-for-gcc.patch
+From:   https://github.com/graysky2/kernel_gcc_patch/
+Desc:   Kernel patch enables gcc < v4.9 optimizations for additional CPUs.
+
+Patch:  5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
+From:   https://github.com/graysky2/kernel_gcc_patch/
+Desc:   Kernel patch enables gcc >= v4.9 optimizations for additional CPUs.

diff --git a/1500_XATTR_USER_PREFIX.patch b/1500_XATTR_USER_PREFIX.patch
new file mode 100644
index 0000000..bacd032
--- /dev/null
+++ b/1500_XATTR_USER_PREFIX.patch
@@ -0,0 +1,69 @@
+From: Anthony G. Basile <blueness@gentoo.org>
+
+This patch adds support for a restricted user-controlled namespace on
+tmpfs filesystem used to house PaX flags.  The namespace must be of the
+form user.pax.* and its value cannot exceed a size of 8 bytes.
+
+This is needed even on all Gentoo systems so that XATTR_PAX flags
+are preserved for users who might build packages using portage on
+a tmpfs system with a non-hardened kernel and then switch to a
+hardened kernel with XATTR_PAX enabled.
+
+The namespace is added to any user with Extended Attribute support
+enabled for tmpfs.  Users who do not enable xattrs will not have
+the XATTR_PAX flags preserved.
+
+diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
+index 1590c49..5eab462 100644
+--- a/include/uapi/linux/xattr.h
++++ b/include/uapi/linux/xattr.h
+@@ -73,5 +73,9 @@
+ #define XATTR_POSIX_ACL_DEFAULT  "posix_acl_default"
+ #define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
+ 
++/* User namespace */
++#define XATTR_PAX_PREFIX XATTR_USER_PREFIX "pax."
++#define XATTR_PAX_FLAGS_SUFFIX "flags"
++#define XATTR_NAME_PAX_FLAGS XATTR_PAX_PREFIX XATTR_PAX_FLAGS_SUFFIX
+ 
+ #endif /* _UAPI_LINUX_XATTR_H */
+diff --git a/mm/shmem.c b/mm/shmem.c
+index 440e2a7..c377172 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2667,6 +2667,14 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
+ 	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
+ 
+ 	name = xattr_full_name(handler, name);
++
++	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
++		if (strcmp(name, XATTR_NAME_PAX_FLAGS))
++			return -EOPNOTSUPP;
++		if (size > 8)
++			return -EINVAL;
++	}
++
+ 	return simple_xattr_set(&info->xattrs, name, value, size, flags);
+ }
+ 
+@@ -2682,6 +2690,12 @@ static const struct xattr_handler shmem_trusted_xattr_handler = {
+ 	.set = shmem_xattr_handler_set,
+ };
+ 
++static const struct xattr_handler shmem_user_xattr_handler = {
++	.prefix = XATTR_USER_PREFIX,
++	.get = shmem_xattr_handler_get,
++	.set = shmem_xattr_handler_set,
++};
++
+ static const struct xattr_handler *shmem_xattr_handlers[] = {
+ #ifdef CONFIG_TMPFS_POSIX_ACL
+ 	&posix_acl_access_xattr_handler,
+@@ -2689,6 +2703,7 @@ static const struct xattr_handler *shmem_xattr_handlers[] = {
+ #endif
+ 	&shmem_security_xattr_handler,
+ 	&shmem_trusted_xattr_handler,
++	&shmem_user_xattr_handler,
+ 	NULL
+ };
+ 

diff --git a/1510_fs-enable-link-security-restrictions-by-default.patch b/1510_fs-enable-link-security-restrictions-by-default.patch
new file mode 100644
index 0000000..639fb3c
--- /dev/null
+++ b/1510_fs-enable-link-security-restrictions-by-default.patch
@@ -0,0 +1,22 @@
+From: Ben Hutchings <ben@decadent.org.uk>
+Subject: fs: Enable link security restrictions by default
+Date: Fri, 02 Nov 2012 05:32:06 +0000
+Bug-Debian: https://bugs.debian.org/609455
+Forwarded: not-needed
+
+This reverts commit 561ec64ae67ef25cac8d72bb9c4bfc955edfd415
+('VFS: don't do protected {sym,hard}links by default').
+
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -651,8 +651,8 @@ static inline void put_link(struct namei
+ 	path_put(link);
+ }
+ 
+-int sysctl_protected_symlinks __read_mostly = 0;
+-int sysctl_protected_hardlinks __read_mostly = 0;
++int sysctl_protected_symlinks __read_mostly = 1;
++int sysctl_protected_hardlinks __read_mostly = 1;
+ 
+ /**
+  * may_follow_link - Check symlink following for unsafe situations

diff --git a/2700_ThinkPad-30-brightness-control-fix.patch b/2700_ThinkPad-30-brightness-control-fix.patch
new file mode 100644
index 0000000..b548c6d
--- /dev/null
+++ b/2700_ThinkPad-30-brightness-control-fix.patch
@@ -0,0 +1,67 @@
+diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
+index cb96296..6c242ed 100644
+--- a/drivers/acpi/blacklist.c
++++ b/drivers/acpi/blacklist.c
+@@ -269,6 +276,61 @@  static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
+ 	},
+ 
+ 	/*
++	 * The following Lenovo models have a broken workaround in the
++	 * acpi_video backlight implementation to meet the Windows 8
++	 * requirement of 101 backlight levels. Reverting to pre-Win8
++	 * behavior fixes the problem.
++	 */
++	{
++	.callback = dmi_disable_osi_win8,
++	.ident = "Lenovo ThinkPad L430",
++	.matches = {
++		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L430"),
++		},
++	},
++	{
++	.callback = dmi_disable_osi_win8,
++	.ident = "Lenovo ThinkPad T430s",
++	.matches = {
++		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T430s"),
++		},
++	},
++	{
++	.callback = dmi_disable_osi_win8,
++	.ident = "Lenovo ThinkPad T530",
++	.matches = {
++		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T530"),
++		},
++	},
++	{
++	.callback = dmi_disable_osi_win8,
++	.ident = "Lenovo ThinkPad W530",
++	.matches = {
++		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W530"),
++		},
++	},
++	{
++	.callback = dmi_disable_osi_win8,
++	.ident = "Lenovo ThinkPad X1 Carbon",
++	.matches = {
++		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X1 Carbon"),
++		},
++	},
++	{
++	.callback = dmi_disable_osi_win8,
++	.ident = "Lenovo ThinkPad X230",
++	.matches = {
++		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
++		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X230"),
++		},
++	},
++
++	/*
+ 	 * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
+ 	 * Linux ignores it, except for the machines enumerated below.
+ 	 */
+

diff --git a/2900_dev-root-proc-mount-fix.patch b/2900_dev-root-proc-mount-fix.patch
new file mode 100644
index 0000000..60af1eb
--- /dev/null
+++ b/2900_dev-root-proc-mount-fix.patch
@@ -0,0 +1,38 @@
+--- a/init/do_mounts.c	2015-08-19 10:27:16.753852576 -0400
++++ b/init/do_mounts.c	2015-08-19 10:34:25.473850353 -0400
+@@ -490,7 +490,11 @@ void __init change_floppy(char *fmt, ...
+ 	va_start(args, fmt);
+ 	vsprintf(buf, fmt, args);
+ 	va_end(args);
+-	fd = sys_open("/dev/root", O_RDWR | O_NDELAY, 0);
++	if (saved_root_name[0])
++		fd = sys_open(saved_root_name, O_RDWR | O_NDELAY, 0);
++	else
++		fd = sys_open("/dev/root", O_RDWR | O_NDELAY, 0);
++
+ 	if (fd >= 0) {
+ 		sys_ioctl(fd, FDEJECT, 0);
+ 		sys_close(fd);
+@@ -534,11 +538,17 @@ void __init mount_root(void)
+ #endif
+ #ifdef CONFIG_BLOCK
+ 	{
+-		int err = create_dev("/dev/root", ROOT_DEV);
+-
+-		if (err < 0)
+-			pr_emerg("Failed to create /dev/root: %d\n", err);
+-		mount_block_root("/dev/root", root_mountflags);
++		if (saved_root_name[0] == '/') {
++	       	int err = create_dev(saved_root_name, ROOT_DEV);
++			if (err < 0)
++				pr_emerg("Failed to create %s: %d\n", saved_root_name, err);
++			mount_block_root(saved_root_name, root_mountflags);
++		} else {
++			int err = create_dev("/dev/root", ROOT_DEV);
++			if (err < 0)
++				pr_emerg("Failed to create /dev/root: %d\n", err);
++			mount_block_root("/dev/root", root_mountflags);
++		}
+ 	}
+ #endif
+ }

diff --git a/4200_fbcondecor-3.19.patch b/4200_fbcondecor-3.19.patch
new file mode 100644
index 0000000..29c379f
--- /dev/null
+++ b/4200_fbcondecor-3.19.patch
@@ -0,0 +1,2119 @@
+diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX
+index fe85e7c..2230930 100644
+--- a/Documentation/fb/00-INDEX
++++ b/Documentation/fb/00-INDEX
+@@ -23,6 +23,8 @@ ep93xx-fb.txt
+ 	- info on the driver for EP93xx LCD controller.
+ fbcon.txt
+ 	- intro to and usage guide for the framebuffer console (fbcon).
++fbcondecor.txt
++	- info on the Framebuffer Console Decoration
+ framebuffer.txt
+ 	- introduction to frame buffer devices.
+ gxfb.txt
+diff --git a/Documentation/fb/fbcondecor.txt b/Documentation/fb/fbcondecor.txt
+new file mode 100644
+index 0000000..3388c61
+--- /dev/null
++++ b/Documentation/fb/fbcondecor.txt
+@@ -0,0 +1,207 @@
++What is it?
++-----------
++
++The framebuffer decorations are a kernel feature which allows displaying a 
++background picture on selected consoles.
++
++What do I need to get it to work?
++---------------------------------
++
++To get fbcondecor up-and-running you will have to:
++ 1) get a copy of splashutils [1] or a similar program
++ 2) get some fbcondecor themes
++ 3) build the kernel helper program
++ 4) build your kernel with the FB_CON_DECOR option enabled.
++
++To get fbcondecor operational right after fbcon initialization is finished, you
++will have to include a theme and the kernel helper into your initramfs image.
++Please refer to splashutils documentation for instructions on how to do that.
++
++[1] The splashutils package can be downloaded from:
++    http://github.com/alanhaggai/fbsplash
++
++The userspace helper
++--------------------
++
++The userspace fbcondecor helper (by default: /sbin/fbcondecor_helper) is called by the
++kernel whenever an important event occurs and the kernel needs some kind of
++job to be carried out. Important events include console switches and video
++mode switches (the kernel requests background images and configuration
++parameters for the current console). The fbcondecor helper must be accessible at
++all times. If it's not, fbcondecor will be switched off automatically.
++
++It's possible to set path to the fbcondecor helper by writing it to
++/proc/sys/kernel/fbcondecor.
++
++*****************************************************************************
++
++The information below is mostly technical stuff. There's probably no need to
++read it unless you plan to develop a userspace helper.
++
++The fbcondecor protocol
++-----------------------
++
++The fbcondecor protocol defines a communication interface between the kernel and
++the userspace fbcondecor helper.
++
++The kernel side is responsible for:
++
++ * rendering console text, using an image as a background (instead of a
++   standard solid color fbcon uses),
++ * accepting commands from the user via ioctls on the fbcondecor device,
++ * calling the userspace helper to set things up as soon as the fb subsystem 
++   is initialized.
++
++The userspace helper is responsible for everything else, including parsing
++configuration files, decompressing the image files whenever the kernel needs
++it, and communicating with the kernel if necessary.
++
++The fbcondecor protocol specifies how communication is done in both ways:
++kernel->userspace and userspace->helper.
++  
++Kernel -> Userspace
++-------------------
++
++The kernel communicates with the userspace helper by calling it and specifying
++the task to be done in a series of arguments.
++
++The arguments follow the pattern:
++<fbcondecor protocol version> <command> <parameters>
++
++All commands defined in fbcondecor protocol v2 have the following parameters:
++ virtual console
++ framebuffer number
++ theme
++
++Fbcondecor protocol v1 specified an additional 'fbcondecor mode' after the
++framebuffer number. Fbcondecor protocol v1 is deprecated and should not be used.
++
++Fbcondecor protocol v2 specifies the following commands:
++
++getpic
++------
++ The kernel issues this command to request image data. It's up to the 
++ userspace  helper to find a background image appropriate for the specified 
++ theme and the current resolution. The userspace helper should respond by 
++ issuing the FBIOCONDECOR_SETPIC ioctl.
++
++init
++----
++ The kernel issues this command after the fbcondecor device is created and
++ the fbcondecor interface is initialized. Upon receiving 'init', the userspace
++ helper should parse the kernel command line (/proc/cmdline) or otherwise
++ decide whether fbcondecor is to be activated.
++
++ To activate fbcondecor on the first console the helper should issue the
++ FBIOCONDECOR_SETCFG, FBIOCONDECOR_SETPIC and FBIOCONDECOR_SETSTATE commands,
++ in the above-mentioned order.
++
++ When the userspace helper is called in an early phase of the boot process
++ (right after the initialization of fbcon), no filesystems will be mounted.
++ The helper program should mount sysfs and then create the appropriate
++ framebuffer, fbcondecor and tty0 devices (if they don't already exist) to get
++ current display settings and to be able to communicate with the kernel side.
++ It should probably also mount the procfs to be able to parse the kernel
++ command line parameters.
++
++ Note that the console sem is not held when the kernel calls fbcondecor_helper
++ with the 'init' command. The fbcondecor helper should perform all ioctls with
++ origin set to FBCON_DECOR_IO_ORIG_USER.
++
++modechange
++----------
++ The kernel issues this command on a mode change. The helper's response should
++ be similar to the response to the 'init' command. Note that this time the
++ console sem is held and all ioctls must be performed with origin set to
++ FBCON_DECOR_IO_ORIG_KERNEL.
++
++
++Userspace -> Kernel
++-------------------
++
++Userspace programs can communicate with fbcondecor via ioctls on the
++fbcondecor device. These ioctls are to be used by both the userspace helper
++(called only by the kernel) and userspace configuration tools (run by the users).
++
++The fbcondecor helper should set the origin field to FBCON_DECOR_IO_ORIG_KERNEL
++when doing the appropriate ioctls. All userspace configuration tools should
++use FBCON_DECOR_IO_ORIG_USER. Failure to set the appropriate value in the origin
++field when performing ioctls from the kernel helper will most likely result
++in a console deadlock.
++
++FBCON_DECOR_IO_ORIG_KERNEL instructs fbcondecor not to try to acquire the console
++semaphore. Not surprisingly, FBCON_DECOR_IO_ORIG_USER instructs it to acquire
++the console sem.
++
++The framebuffer console decoration provides the following ioctls (all defined in 
++linux/fb.h):
++
++FBIOCONDECOR_SETPIC
++description: loads a background picture for a virtual console
++argument: struct fbcon_decor_iowrapper*; data: struct fb_image*
++notes: 
++If called for consoles other than the current foreground one, the picture data
++will be ignored.
++
++If the current virtual console is running in a 8-bpp mode, the cmap substruct
++of fb_image has to be filled appropriately: start should be set to 16 (first
++16 colors are reserved for fbcon), len to a value <= 240 and red, green and
++blue should point to valid cmap data. The transp field is ingored. The fields
++dx, dy, bg_color, fg_color in fb_image are ignored as well.
++
++FBIOCONDECOR_SETCFG
++description: sets the fbcondecor config for a virtual console
++argument: struct fbcon_decor_iowrapper*; data: struct vc_decor*
++notes: The structure has to be filled with valid data.
++
++FBIOCONDECOR_GETCFG
++description: gets the fbcondecor config for a virtual console
++argument: struct fbcon_decor_iowrapper*; data: struct vc_decor*
++
++FBIOCONDECOR_SETSTATE
++description: sets the fbcondecor state for a virtual console
++argument: struct fbcon_decor_iowrapper*; data: unsigned int*
++          values: 0 = disabled, 1 = enabled.
++
++FBIOCONDECOR_GETSTATE
++description: gets the fbcondecor state for a virtual console
++argument: struct fbcon_decor_iowrapper*; data: unsigned int*
++          values: as in FBIOCONDECOR_SETSTATE
++
++Info on used structures:
++
++Definition of struct vc_decor can be found in linux/console_decor.h. It's
++heavily commented. Note that the 'theme' field should point to a string
++no longer than FBCON_DECOR_THEME_LEN. When FBIOCONDECOR_GETCFG call is
++performed, the theme field should point to a char buffer of length
++FBCON_DECOR_THEME_LEN.
++
++Definition of struct fbcon_decor_iowrapper can be found in linux/fb.h.
++The fields in this struct have the following meaning:
++
++vc: 
++Virtual console number.
++
++origin: 
++Specifies if the ioctl is performed as a response to a kernel request. The
++fbcondecor helper should set this field to FBCON_DECOR_IO_ORIG_KERNEL, userspace
++programs should set it to FBCON_DECOR_IO_ORIG_USER. This field is necessary to
++avoid console semaphore deadlocks.
++
++data: 
++Pointer to a data structure appropriate for the performed ioctl. Type of
++the data struct is specified in the ioctls description.
++
++*****************************************************************************
++
++Credit
++------
++
++Original 'bootsplash' project & implementation by:
++  Volker Poplawski <volker@poplawski.de>, Stefan Reinauer <stepan@suse.de>,
++  Steffen Winterfeldt <snwint@suse.de>, Michael Schroeder <mls@suse.de>,
++  Ken Wimer <wimer@suse.de>.
++
++Fbcondecor, fbcondecor protocol design, current implementation & docs by:
++  Michal Januszewski <michalj+fbcondecor@gmail.com>
++
+diff --git a/drivers/Makefile b/drivers/Makefile
+index 7183b6a..d576148 100644
+--- a/drivers/Makefile
++++ b/drivers/Makefile
+@@ -17,6 +17,10 @@ obj-y				+= pwm/
+ obj-$(CONFIG_PCI)		+= pci/
+ obj-$(CONFIG_PARISC)		+= parisc/
+ obj-$(CONFIG_RAPIDIO)		+= rapidio/
++# tty/ comes before char/ so that the VT console is the boot-time
++# default.
++obj-y				+= tty/
++obj-y				+= char/
+ obj-y				+= video/
+ obj-y				+= idle/
+ 
+@@ -42,11 +46,6 @@ obj-$(CONFIG_REGULATOR)		+= regulator/
+ # reset controllers early, since gpu drivers might rely on them to initialize
+ obj-$(CONFIG_RESET_CONTROLLER)	+= reset/
+ 
+-# tty/ comes before char/ so that the VT console is the boot-time
+-# default.
+-obj-y				+= tty/
+-obj-y				+= char/
+-
+ # iommu/ comes before gpu as gpu are using iommu controllers
+ obj-$(CONFIG_IOMMU_SUPPORT) += iommu/
+
+diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
+index fe1cd01..6d2e87a 100644
+--- a/drivers/video/console/Kconfig
++++ b/drivers/video/console/Kconfig
+@@ -126,6 +126,19 @@ config FRAMEBUFFER_CONSOLE_ROTATION
+          such that other users of the framebuffer will remain normally
+          oriented.
+ 
++config FB_CON_DECOR
++	bool "Support for the Framebuffer Console Decorations"
++	depends on FRAMEBUFFER_CONSOLE=y && !FB_TILEBLITTING
++	default n
++	---help---
++	  This option enables support for framebuffer console decorations which
++	  makes it possible to display images in the background of the system
++	  consoles.  Note that userspace utilities are necessary in order to take 
++	  advantage of these features. Refer to Documentation/fb/fbcondecor.txt 
++	  for more information.
++
++	  If unsure, say N.
++
+ config STI_CONSOLE
+         bool "STI text console"
+         depends on PARISC
+diff --git a/drivers/video/console/Makefile b/drivers/video/console/Makefile
+index 43bfa48..cc104b6f 100644
+--- a/drivers/video/console/Makefile
++++ b/drivers/video/console/Makefile
+@@ -16,4 +16,5 @@ obj-$(CONFIG_FRAMEBUFFER_CONSOLE)     += fbcon_rotate.o fbcon_cw.o fbcon_ud.o \
+                                          fbcon_ccw.o
+ endif
+ 
++obj-$(CONFIG_FB_CON_DECOR)     	  += fbcondecor.o cfbcondecor.o
+ obj-$(CONFIG_FB_STI)              += sticore.o
+diff --git a/drivers/video/console/bitblit.c b/drivers/video/console/bitblit.c
+index 61b182b..984384b 100644
+--- a/drivers/video/console/bitblit.c
++++ b/drivers/video/console/bitblit.c
+@@ -18,6 +18,7 @@
+ #include <linux/console.h>
+ #include <asm/types.h>
+ #include "fbcon.h"
++#include "fbcondecor.h"
+ 
+ /*
+  * Accelerated handlers.
+@@ -55,6 +56,13 @@ static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ 	area.height = height * vc->vc_font.height;
+ 	area.width = width * vc->vc_font.width;
+ 
++	if (fbcon_decor_active(info, vc)) {
++ 		area.sx += vc->vc_decor.tx;
++ 		area.sy += vc->vc_decor.ty;
++ 		area.dx += vc->vc_decor.tx;
++ 		area.dy += vc->vc_decor.ty;
++ 	}
++
+ 	info->fbops->fb_copyarea(info, &area);
+ }
+ 
+@@ -380,11 +388,15 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ 	cursor.image.depth = 1;
+ 	cursor.rop = ROP_XOR;
+ 
+-	if (info->fbops->fb_cursor)
+-		err = info->fbops->fb_cursor(info, &cursor);
++	if (fbcon_decor_active(info, vc)) {
++		fbcon_decor_cursor(info, &cursor);
++	} else {
++		if (info->fbops->fb_cursor)
++			err = info->fbops->fb_cursor(info, &cursor);
+ 
+-	if (err)
+-		soft_cursor(info, &cursor);
++		if (err)
++			soft_cursor(info, &cursor);
++	}
+ 
+ 	ops->cursor_reset = 0;
+ }
+diff --git a/drivers/video/console/cfbcondecor.c b/drivers/video/console/cfbcondecor.c
+new file mode 100644
+index 0000000..a2b4497
+--- /dev/null
++++ b/drivers/video/console/cfbcondecor.c
+@@ -0,0 +1,471 @@
++/*
++ *  linux/drivers/video/cfbcon_decor.c -- Framebuffer decor render functions
++ *
++ *  Copyright (C) 2004 Michal Januszewski <michalj+fbcondecor@gmail.com>
++ *
++ *  Code based upon "Bootdecor" (C) 2001-2003
++ *       Volker Poplawski <volker@poplawski.de>,
++ *       Stefan Reinauer <stepan@suse.de>,
++ *       Steffen Winterfeldt <snwint@suse.de>,
++ *       Michael Schroeder <mls@suse.de>,
++ *       Ken Wimer <wimer@suse.de>.
++ *
++ *  This file is subject to the terms and conditions of the GNU General Public
++ *  License.  See the file COPYING in the main directory of this archive for
++ *  more details.
++ */
++#include <linux/module.h>
++#include <linux/types.h>
++#include <linux/fb.h>
++#include <linux/selection.h>
++#include <linux/slab.h>
++#include <linux/vt_kern.h>
++#include <asm/irq.h>
++
++#include "fbcon.h"
++#include "fbcondecor.h"
++
++#define parse_pixel(shift,bpp,type)						\
++	do {									\
++		if (d & (0x80 >> (shift)))					\
++			dd2[(shift)] = fgx;					\
++		else								\
++			dd2[(shift)] = transparent ? *(type *)decor_src : bgx;	\
++		decor_src += (bpp);						\
++	} while (0)								\
++
++extern int get_color(struct vc_data *vc, struct fb_info *info,
++		     u16 c, int is_fg);
++
++void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc)
++{
++	int i, j, k;
++	int minlen = min(min(info->var.red.length, info->var.green.length),
++			     info->var.blue.length);
++	u32 col;
++
++	for (j = i = 0; i < 16; i++) {
++		k = color_table[i];
++
++		col = ((vc->vc_palette[j++]  >> (8-minlen))
++			<< info->var.red.offset);
++		col |= ((vc->vc_palette[j++] >> (8-minlen))
++			<< info->var.green.offset);
++		col |= ((vc->vc_palette[j++] >> (8-minlen))
++			<< info->var.blue.offset);
++			((u32 *)info->pseudo_palette)[k] = col;
++	}
++}
++
++void fbcon_decor_renderc(struct fb_info *info, int ypos, int xpos, int height,
++		      int width, u8* src, u32 fgx, u32 bgx, u8 transparent)
++{
++	unsigned int x, y;
++	u32 dd;
++	int bytespp = ((info->var.bits_per_pixel + 7) >> 3);
++	unsigned int d = ypos * info->fix.line_length + xpos * bytespp;
++	unsigned int ds = (ypos * info->var.xres + xpos) * bytespp;
++	u16 dd2[4];
++
++	u8* decor_src = (u8 *)(info->bgdecor.data + ds);
++	u8* dst = (u8 *)(info->screen_base + d);
++
++	if ((ypos + height) > info->var.yres || (xpos + width) > info->var.xres)
++		return;
++
++	for (y = 0; y < height; y++) {
++		switch (info->var.bits_per_pixel) {
++
++		case 32:
++			for (x = 0; x < width; x++) {
++
++				if ((x & 7) == 0)
++					d = *src++;
++				if (d & 0x80)
++					dd = fgx;
++				else
++					dd = transparent ?
++					     *(u32 *)decor_src : bgx;
++
++				d <<= 1;
++				decor_src += 4;
++				fb_writel(dd, dst);
++				dst += 4;
++			}
++			break;
++		case 24:
++			for (x = 0; x < width; x++) {
++
++				if ((x & 7) == 0)
++					d = *src++;
++				if (d & 0x80)
++					dd = fgx;
++				else
++					dd = transparent ?
++					     (*(u32 *)decor_src & 0xffffff) : bgx;
++
++				d <<= 1;
++				decor_src += 3;
++#ifdef __LITTLE_ENDIAN
++				fb_writew(dd & 0xffff, dst);
++				dst += 2;
++				fb_writeb((dd >> 16), dst);
++#else
++				fb_writew(dd >> 8, dst);
++				dst += 2;
++				fb_writeb(dd & 0xff, dst);
++#endif
++				dst++;
++			}
++			break;
++		case 16:
++			for (x = 0; x < width; x += 2) {
++				if ((x & 7) == 0)
++					d = *src++;
++
++				parse_pixel(0, 2, u16);
++				parse_pixel(1, 2, u16);
++#ifdef __LITTLE_ENDIAN
++				dd = dd2[0] | (dd2[1] << 16);
++#else
++				dd = dd2[1] | (dd2[0] << 16);
++#endif
++				d <<= 2;
++				fb_writel(dd, dst);
++				dst += 4;
++			}
++			break;
++
++		case 8:
++			for (x = 0; x < width; x += 4) {
++				if ((x & 7) == 0)
++					d = *src++;
++
++				parse_pixel(0, 1, u8);
++				parse_pixel(1, 1, u8);
++				parse_pixel(2, 1, u8);
++				parse_pixel(3, 1, u8);
++
++#ifdef __LITTLE_ENDIAN
++				dd = dd2[0] | (dd2[1] << 8) | (dd2[2] << 16) | (dd2[3] << 24);
++#else
++				dd = dd2[3] | (dd2[2] << 8) | (dd2[1] << 16) | (dd2[0] << 24);
++#endif
++				d <<= 4;
++				fb_writel(dd, dst);
++				dst += 4;
++			}
++		}
++
++		dst += info->fix.line_length - width * bytespp;
++		decor_src += (info->var.xres - width) * bytespp;
++	}
++}
++
++#define cc2cx(a) 						\
++	((info->fix.visual == FB_VISUAL_TRUECOLOR || 		\
++	  info->fix.visual == FB_VISUAL_DIRECTCOLOR) ? 		\
++	 ((u32*)info->pseudo_palette)[a] : a)
++
++void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info,
++		   const unsigned short *s, int count, int yy, int xx)
++{
++	unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff;
++	struct fbcon_ops *ops = info->fbcon_par;
++	int fg_color, bg_color, transparent;
++	u8 *src;
++	u32 bgx, fgx;
++	u16 c = scr_readw(s);
++
++	fg_color = get_color(vc, info, c, 1);
++        bg_color = get_color(vc, info, c, 0);
++
++	/* Don't paint the background image if console is blanked */
++	transparent = ops->blank_state ? 0 :
++		(vc->vc_decor.bg_color == bg_color);
++
++	xx = xx * vc->vc_font.width + vc->vc_decor.tx;
++	yy = yy * vc->vc_font.height + vc->vc_decor.ty;
++
++	fgx = cc2cx(fg_color);
++	bgx = cc2cx(bg_color);
++
++	while (count--) {
++		c = scr_readw(s++);
++		src = vc->vc_font.data + (c & charmask) * vc->vc_font.height *
++		      ((vc->vc_font.width + 7) >> 3);
++
++		fbcon_decor_renderc(info, yy, xx, vc->vc_font.height,
++			       vc->vc_font.width, src, fgx, bgx, transparent);
++		xx += vc->vc_font.width;
++	}
++}
++
++void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor)
++{
++	int i;
++	unsigned int dsize, s_pitch;
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct vc_data* vc;
++	u8 *src;
++
++	/* we really don't need any cursors while the console is blanked */
++	if (info->state != FBINFO_STATE_RUNNING || ops->blank_state)
++		return;
++
++	vc = vc_cons[ops->currcon].d;
++
++	src = kmalloc(64 + sizeof(struct fb_image), GFP_ATOMIC);
++	if (!src)
++		return;
++
++	s_pitch = (cursor->image.width + 7) >> 3;
++	dsize = s_pitch * cursor->image.height;
++	if (cursor->enable) {
++		switch (cursor->rop) {
++		case ROP_XOR:
++			for (i = 0; i < dsize; i++)
++				src[i] = cursor->image.data[i] ^ cursor->mask[i];
++                        break;
++		case ROP_COPY:
++		default:
++			for (i = 0; i < dsize; i++)
++				src[i] = cursor->image.data[i] & cursor->mask[i];
++			break;
++		}
++	} else
++		memcpy(src, cursor->image.data, dsize);
++
++	fbcon_decor_renderc(info,
++			cursor->image.dy + vc->vc_decor.ty,
++			cursor->image.dx + vc->vc_decor.tx,
++			cursor->image.height,
++			cursor->image.width,
++			(u8*)src,
++			cc2cx(cursor->image.fg_color),
++			cc2cx(cursor->image.bg_color),
++			cursor->image.bg_color == vc->vc_decor.bg_color);
++
++	kfree(src);
++}
++
++static void decorset(u8 *dst, int height, int width, int dstbytes,
++		        u32 bgx, int bpp)
++{
++	int i;
++
++	if (bpp == 8)
++		bgx |= bgx << 8;
++	if (bpp == 16 || bpp == 8)
++		bgx |= bgx << 16;
++
++	while (height-- > 0) {
++		u8 *p = dst;
++
++		switch (bpp) {
++
++		case 32:
++			for (i=0; i < width; i++) {
++				fb_writel(bgx, p); p += 4;
++			}
++			break;
++		case 24:
++			for (i=0; i < width; i++) {
++#ifdef __LITTLE_ENDIAN
++				fb_writew((bgx & 0xffff),(u16*)p); p += 2;
++				fb_writeb((bgx >> 16),p++);
++#else
++				fb_writew((bgx >> 8),(u16*)p); p += 2;
++				fb_writeb((bgx & 0xff),p++);
++#endif
++			}
++		case 16:
++			for (i=0; i < width/4; i++) {
++				fb_writel(bgx,p); p += 4;
++				fb_writel(bgx,p); p += 4;
++			}
++			if (width & 2) {
++				fb_writel(bgx,p); p += 4;
++			}
++			if (width & 1)
++				fb_writew(bgx,(u16*)p);
++			break;
++		case 8:
++			for (i=0; i < width/4; i++) {
++				fb_writel(bgx,p); p += 4;
++			}
++
++			if (width & 2) {
++				fb_writew(bgx,p); p += 2;
++			}
++			if (width & 1)
++				fb_writeb(bgx,(u8*)p);
++			break;
++
++		}
++		dst += dstbytes;
++	}
++}
++
++void fbcon_decor_copy(u8 *dst, u8 *src, int height, int width, int linebytes,
++		   int srclinebytes, int bpp)
++{
++	int i;
++
++	while (height-- > 0) {
++		u32 *p = (u32 *)dst;
++		u32 *q = (u32 *)src;
++
++		switch (bpp) {
++
++		case 32:
++			for (i=0; i < width; i++)
++				fb_writel(*q++, p++);
++			break;
++		case 24:
++			for (i=0; i < (width*3/4); i++)
++				fb_writel(*q++, p++);
++			if ((width*3) % 4) {
++				if (width & 2) {
++					fb_writeb(*(u8*)q, (u8*)p);
++				} else if (width & 1) {
++					fb_writew(*(u16*)q, (u16*)p);
++					fb_writeb(*(u8*)((u16*)q+1),(u8*)((u16*)p+2));
++				}
++			}
++			break;
++		case 16:
++			for (i=0; i < width/4; i++) {
++				fb_writel(*q++, p++);
++				fb_writel(*q++, p++);
++			}
++			if (width & 2)
++				fb_writel(*q++, p++);
++			if (width & 1)
++				fb_writew(*(u16*)q, (u16*)p);
++			break;
++		case 8:
++			for (i=0; i < width/4; i++)
++				fb_writel(*q++, p++);
++
++			if (width & 2) {
++				fb_writew(*(u16*)q, (u16*)p);
++				q = (u32*) ((u16*)q + 1);
++				p = (u32*) ((u16*)p + 1);
++			}
++			if (width & 1)
++				fb_writeb(*(u8*)q, (u8*)p);
++			break;
++		}
++
++		dst += linebytes;
++		src += srclinebytes;
++	}
++}
++
++static void decorfill(struct fb_info *info, int sy, int sx, int height,
++		       int width)
++{
++	int bytespp = ((info->var.bits_per_pixel + 7) >> 3);
++	int d  = sy * info->fix.line_length + sx * bytespp;
++	int ds = (sy * info->var.xres + sx) * bytespp;
++
++	fbcon_decor_copy((u8 *)(info->screen_base + d), (u8 *)(info->bgdecor.data + ds),
++		    height, width, info->fix.line_length, info->var.xres * bytespp,
++		    info->var.bits_per_pixel);
++}
++
++void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx,
++		    int height, int width)
++{
++	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
++	struct fbcon_ops *ops = info->fbcon_par;
++	u8 *dst;
++	int transparent, bg_color = attr_bgcol_ec(bgshift, vc, info);
++
++	transparent = (vc->vc_decor.bg_color == bg_color);
++	sy = sy * vc->vc_font.height + vc->vc_decor.ty;
++	sx = sx * vc->vc_font.width + vc->vc_decor.tx;
++	height *= vc->vc_font.height;
++	width *= vc->vc_font.width;
++
++	/* Don't paint the background image if console is blanked */
++	if (transparent && !ops->blank_state) {
++		decorfill(info, sy, sx, height, width);
++	} else {
++		dst = (u8 *)(info->screen_base + sy * info->fix.line_length +
++			     sx * ((info->var.bits_per_pixel + 7) >> 3));
++		decorset(dst, height, width, info->fix.line_length, cc2cx(bg_color),
++			  info->var.bits_per_pixel);
++	}
++}
++
++void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info,
++			    int bottom_only)
++{
++	unsigned int tw = vc->vc_cols*vc->vc_font.width;
++	unsigned int th = vc->vc_rows*vc->vc_font.height;
++
++	if (!bottom_only) {
++		/* top margin */
++		decorfill(info, 0, 0, vc->vc_decor.ty, info->var.xres);
++		/* left margin */
++		decorfill(info, vc->vc_decor.ty, 0, th, vc->vc_decor.tx);
++		/* right margin */
++		decorfill(info, vc->vc_decor.ty, vc->vc_decor.tx + tw, th, 
++			   info->var.xres - vc->vc_decor.tx - tw);
++	}
++	decorfill(info, vc->vc_decor.ty + th, 0, 
++		   info->var.yres - vc->vc_decor.ty - th, info->var.xres);
++}
++
++void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, 
++			   int sx, int dx, int width)
++{
++	u16 *d = (u16 *) (vc->vc_origin + vc->vc_size_row * y + dx * 2);
++	u16 *s = d + (dx - sx);
++	u16 *start = d;
++	u16 *ls = d;
++	u16 *le = d + width;
++	u16 c;
++	int x = dx;
++	u16 attr = 1;
++
++	do {
++		c = scr_readw(d);
++		if (attr != (c & 0xff00)) {
++			attr = c & 0xff00;
++			if (d > start) {
++				fbcon_decor_putcs(vc, info, start, d - start, y, x);
++				x += d - start;
++				start = d;
++			}
++		}
++		if (s >= ls && s < le && c == scr_readw(s)) {
++			if (d > start) {
++				fbcon_decor_putcs(vc, info, start, d - start, y, x);
++				x += d - start + 1;
++				start = d + 1;
++			} else {
++				x++;
++				start++;
++			}
++		}
++		s++;
++		d++;
++	} while (d < le);
++	if (d > start)
++		fbcon_decor_putcs(vc, info, start, d - start, y, x);
++}
++
++void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank)
++{
++	if (blank) {
++		decorset((u8 *)info->screen_base, info->var.yres, info->var.xres,
++			  info->fix.line_length, 0, info->var.bits_per_pixel);
++	} else {
++		update_screen(vc);
++		fbcon_decor_clear_margins(vc, info, 0);
++	}
++}
++
+diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
+index f447734..da50d61 100644
+--- a/drivers/video/console/fbcon.c
++++ b/drivers/video/console/fbcon.c
+@@ -79,6 +79,7 @@
+ #include <asm/irq.h>
+ 
+ #include "fbcon.h"
++#include "../console/fbcondecor.h"
+ 
+ #ifdef FBCONDEBUG
+ #  define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __func__ , ## args)
+@@ -94,7 +95,7 @@ enum {
+ 
+ static struct display fb_display[MAX_NR_CONSOLES];
+ 
+-static signed char con2fb_map[MAX_NR_CONSOLES];
++signed char con2fb_map[MAX_NR_CONSOLES];
+ static signed char con2fb_map_boot[MAX_NR_CONSOLES];
+ 
+ static int logo_lines;
+@@ -286,7 +287,7 @@ static inline int fbcon_is_inactive(struct vc_data *vc, struct fb_info *info)
+ 		!vt_force_oops_output(vc);
+ }
+ 
+-static int get_color(struct vc_data *vc, struct fb_info *info,
++int get_color(struct vc_data *vc, struct fb_info *info,
+ 	      u16 c, int is_fg)
+ {
+ 	int depth = fb_get_color_depth(&info->var, &info->fix);
+@@ -551,6 +552,9 @@ static int do_fbcon_takeover(int show_logo)
+ 		info_idx = -1;
+ 	} else {
+ 		fbcon_has_console_bind = 1;
++#ifdef CONFIG_FB_CON_DECOR
++		fbcon_decor_init();
++#endif
+ 	}
+ 
+ 	return err;
+@@ -1007,6 +1011,12 @@ static const char *fbcon_startup(void)
+ 	rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+ 	cols /= vc->vc_font.width;
+ 	rows /= vc->vc_font.height;
++
++	if (fbcon_decor_active(info, vc)) {
++		cols = vc->vc_decor.twidth / vc->vc_font.width;
++		rows = vc->vc_decor.theight / vc->vc_font.height;
++	}
++
+ 	vc_resize(vc, cols, rows);
+ 
+ 	DPRINTK("mode:   %s\n", info->fix.id);
+@@ -1036,7 +1046,7 @@ static void fbcon_init(struct vc_data *vc, int init)
+ 	cap = info->flags;
+ 
+ 	if (vc != svc || logo_shown == FBCON_LOGO_DONTSHOW ||
+-	    (info->fix.type == FB_TYPE_TEXT))
++	    (info->fix.type == FB_TYPE_TEXT) || fbcon_decor_active(info, vc))
+ 		logo = 0;
+ 
+ 	if (var_to_display(p, &info->var, info))
+@@ -1260,6 +1270,11 @@ static void fbcon_clear(struct vc_data *vc, int sy, int sx, int height,
+ 		fbcon_clear_margins(vc, 0);
+ 	}
+ 
++ 	if (fbcon_decor_active(info, vc)) {
++ 		fbcon_decor_clear(vc, info, sy, sx, height, width);
++ 		return;
++ 	}
++
+ 	/* Split blits that cross physical y_wrap boundary */
+ 
+ 	y_break = p->vrows - p->yscroll;
+@@ -1279,10 +1294,15 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s,
+ 	struct display *p = &fb_display[vc->vc_num];
+ 	struct fbcon_ops *ops = info->fbcon_par;
+ 
+-	if (!fbcon_is_inactive(vc, info))
+-		ops->putcs(vc, info, s, count, real_y(p, ypos), xpos,
+-			   get_color(vc, info, scr_readw(s), 1),
+-			   get_color(vc, info, scr_readw(s), 0));
++	if (!fbcon_is_inactive(vc, info)) {
++
++		if (fbcon_decor_active(info, vc))
++			fbcon_decor_putcs(vc, info, s, count, ypos, xpos);
++		else
++			ops->putcs(vc, info, s, count, real_y(p, ypos), xpos,
++				   get_color(vc, info, scr_readw(s), 1),
++				   get_color(vc, info, scr_readw(s), 0));
++	}
+ }
+ 
+ static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos)
+@@ -1298,8 +1318,13 @@ static void fbcon_clear_margins(struct vc_data *vc, int bottom_only)
+ 	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ 	struct fbcon_ops *ops = info->fbcon_par;
+ 
+-	if (!fbcon_is_inactive(vc, info))
+-		ops->clear_margins(vc, info, bottom_only);
++	if (!fbcon_is_inactive(vc, info)) {
++	 	if (fbcon_decor_active(info, vc)) {
++	 		fbcon_decor_clear_margins(vc, info, bottom_only);
++ 		} else {
++			ops->clear_margins(vc, info, bottom_only);
++		}
++	}
+ }
+ 
+ static void fbcon_cursor(struct vc_data *vc, int mode)
+@@ -1819,7 +1844,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
+ 			count = vc->vc_rows;
+ 		if (softback_top)
+ 			fbcon_softback_note(vc, t, count);
+-		if (logo_shown >= 0)
++		if (logo_shown >= 0 || fbcon_decor_active(info, vc))
+ 			goto redraw_up;
+ 		switch (p->scrollmode) {
+ 		case SCROLL_MOVE:
+@@ -1912,6 +1937,8 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir,
+ 			count = vc->vc_rows;
+ 		if (logo_shown >= 0)
+ 			goto redraw_down;
++		if (fbcon_decor_active(info, vc))
++			goto redraw_down;
+ 		switch (p->scrollmode) {
+ 		case SCROLL_MOVE:
+ 			fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
+@@ -2060,6 +2087,13 @@ static void fbcon_bmove_rec(struct vc_data *vc, struct display *p, int sy, int s
+ 		}
+ 		return;
+ 	}
++
++	if (fbcon_decor_active(info, vc) && sy == dy && height == 1) {
++ 		/* must use slower redraw bmove to keep background pic intact */
++ 		fbcon_decor_bmove_redraw(vc, info, sy, sx, dx, width);
++ 		return;
++ 	}
++
+ 	ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx,
+ 		   height, width);
+ }
+@@ -2130,8 +2164,8 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
+ 	var.yres = virt_h * virt_fh;
+ 	x_diff = info->var.xres - var.xres;
+ 	y_diff = info->var.yres - var.yres;
+-	if (x_diff < 0 || x_diff > virt_fw ||
+-	    y_diff < 0 || y_diff > virt_fh) {
++	if ((x_diff < 0 || x_diff > virt_fw ||
++		y_diff < 0 || y_diff > virt_fh) && !vc->vc_decor.state) {
+ 		const struct fb_videomode *mode;
+ 
+ 		DPRINTK("attempting resize %ix%i\n", var.xres, var.yres);
+@@ -2167,6 +2201,21 @@ static int fbcon_switch(struct vc_data *vc)
+ 
+ 	info = registered_fb[con2fb_map[vc->vc_num]];
+ 	ops = info->fbcon_par;
++	prev_console = ops->currcon;
++	if (prev_console != -1)
++		old_info = registered_fb[con2fb_map[prev_console]];
++
++#ifdef CONFIG_FB_CON_DECOR
++	if (!fbcon_decor_active_vc(vc) && info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
++		struct vc_data *vc_curr = vc_cons[prev_console].d;
++		if (vc_curr && fbcon_decor_active_vc(vc_curr)) {
++			/* Clear the screen to avoid displaying funky colors during
++			 * palette updates. */
++			memset((u8*)info->screen_base + info->fix.line_length * info->var.yoffset,
++			       0, info->var.yres * info->fix.line_length);
++		}
++	}
++#endif
+ 
+ 	if (softback_top) {
+ 		if (softback_lines)
+@@ -2185,9 +2234,6 @@ static int fbcon_switch(struct vc_data *vc)
+ 		logo_shown = FBCON_LOGO_CANSHOW;
+ 	}
+ 
+-	prev_console = ops->currcon;
+-	if (prev_console != -1)
+-		old_info = registered_fb[con2fb_map[prev_console]];
+ 	/*
+ 	 * FIXME: If we have multiple fbdev's loaded, we need to
+ 	 * update all info->currcon.  Perhaps, we can place this
+@@ -2231,6 +2277,18 @@ static int fbcon_switch(struct vc_data *vc)
+ 			fbcon_del_cursor_timer(old_info);
+ 	}
+ 
++	if (fbcon_decor_active_vc(vc)) {
++		struct vc_data *vc_curr = vc_cons[prev_console].d;
++
++		if (!vc_curr->vc_decor.theme ||
++			strcmp(vc->vc_decor.theme, vc_curr->vc_decor.theme) ||
++			(fbcon_decor_active_nores(info, vc_curr) &&
++			 !fbcon_decor_active(info, vc_curr))) {
++			fbcon_decor_disable(vc, 0);
++			fbcon_decor_call_helper("modechange", vc->vc_num);
++		}
++	}
++
+ 	if (fbcon_is_inactive(vc, info) ||
+ 	    ops->blank_state != FB_BLANK_UNBLANK)
+ 		fbcon_del_cursor_timer(info);
+@@ -2339,15 +2397,20 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
+ 		}
+ 	}
+ 
+- 	if (!fbcon_is_inactive(vc, info)) {
++	if (!fbcon_is_inactive(vc, info)) {
+ 		if (ops->blank_state != blank) {
+ 			ops->blank_state = blank;
+ 			fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW);
+ 			ops->cursor_flash = (!blank);
+ 
+-			if (!(info->flags & FBINFO_MISC_USEREVENT))
+-				if (fb_blank(info, blank))
+-					fbcon_generic_blank(vc, info, blank);
++			if (!(info->flags & FBINFO_MISC_USEREVENT)) {
++				if (fb_blank(info, blank)) {
++					if (fbcon_decor_active(info, vc))
++						fbcon_decor_blank(vc, info, blank);
++					else
++						fbcon_generic_blank(vc, info, blank);
++				}
++			}
+ 		}
+ 
+ 		if (!blank)
+@@ -2522,13 +2585,22 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h,
+ 	}
+ 
+ 	if (resize) {
++		/* reset wrap/pan */
+ 		int cols, rows;
+ 
+ 		cols = FBCON_SWAP(ops->rotate, info->var.xres, info->var.yres);
+ 		rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
++
++		if (fbcon_decor_active(info, vc)) {
++			info->var.xoffset = info->var.yoffset = p->yscroll = 0;
++			cols = vc->vc_decor.twidth;
++			rows = vc->vc_decor.theight;
++		}
+ 		cols /= w;
+ 		rows /= h;
++
+ 		vc_resize(vc, cols, rows);
++
+ 		if (CON_IS_VISIBLE(vc) && softback_buf)
+ 			fbcon_update_softback(vc);
+ 	} else if (CON_IS_VISIBLE(vc)
+@@ -2657,7 +2729,11 @@ static int fbcon_set_palette(struct vc_data *vc, unsigned char *table)
+ 	int i, j, k, depth;
+ 	u8 val;
+ 
+-	if (fbcon_is_inactive(vc, info))
++	if (fbcon_is_inactive(vc, info)
++#ifdef CONFIG_FB_CON_DECOR
++			|| vc->vc_num != fg_console
++#endif
++		)
+ 		return -EINVAL;
+ 
+ 	if (!CON_IS_VISIBLE(vc))
+@@ -2683,14 +2759,56 @@ static int fbcon_set_palette(struct vc_data *vc, unsigned char *table)
+ 	} else
+ 		fb_copy_cmap(fb_default_cmap(1 << depth), &palette_cmap);
+ 
+-	return fb_set_cmap(&palette_cmap, info);
++	if (fbcon_decor_active(info, vc_cons[fg_console].d) &&
++	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
++
++		u16 *red, *green, *blue;
++		int minlen = min(min(info->var.red.length, info->var.green.length),
++				     info->var.blue.length);
++		int h;
++
++		struct fb_cmap cmap = {
++			.start = 0,
++			.len = (1 << minlen),
++			.red = NULL,
++			.green = NULL,
++			.blue = NULL,
++			.transp = NULL
++		};
++
++		red = kmalloc(256 * sizeof(u16) * 3, GFP_KERNEL);
++
++		if (!red)
++			goto out;
++
++		green = red + 256;
++		blue = green + 256;
++		cmap.red = red;
++		cmap.green = green;
++		cmap.blue = blue;
++
++		for (i = 0; i < cmap.len; i++) {
++			red[i] = green[i] = blue[i] = (0xffff * i)/(cmap.len-1);
++		}
++
++		h = fb_set_cmap(&cmap, info);
++		fbcon_decor_fix_pseudo_pal(info, vc_cons[fg_console].d);
++		kfree(red);
++
++		return h;
++
++	} else if (fbcon_decor_active(info, vc_cons[fg_console].d) &&
++		   info->var.bits_per_pixel == 8 && info->bgdecor.cmap.red != NULL)
++		fb_set_cmap(&info->bgdecor.cmap, info);
++
++out:	return fb_set_cmap(&palette_cmap, info);
+ }
+ 
+ static u16 *fbcon_screen_pos(struct vc_data *vc, int offset)
+ {
+ 	unsigned long p;
+ 	int line;
+-	
++
+ 	if (vc->vc_num != fg_console || !softback_lines)
+ 		return (u16 *) (vc->vc_origin + offset);
+ 	line = offset / vc->vc_size_row;
+@@ -2909,7 +3027,14 @@ static void fbcon_modechanged(struct fb_info *info)
+ 		rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+ 		cols /= vc->vc_font.width;
+ 		rows /= vc->vc_font.height;
+-		vc_resize(vc, cols, rows);
++
++		if (!fbcon_decor_active_nores(info, vc)) {
++			vc_resize(vc, cols, rows);
++		} else {
++			fbcon_decor_disable(vc, 0);
++			fbcon_decor_call_helper("modechange", vc->vc_num);
++		}
++
+ 		updatescrollmode(p, info, vc);
+ 		scrollback_max = 0;
+ 		scrollback_current = 0;
+@@ -2954,7 +3079,9 @@ static void fbcon_set_all_vcs(struct fb_info *info)
+ 		rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+ 		cols /= vc->vc_font.width;
+ 		rows /= vc->vc_font.height;
+-		vc_resize(vc, cols, rows);
++		if (!fbcon_decor_active_nores(info, vc)) {
++			vc_resize(vc, cols, rows);
++		}
+ 	}
+ 
+ 	if (fg != -1)
+@@ -3596,6 +3723,7 @@ static void fbcon_exit(void)
+ 		}
+ 	}
+ 
++	fbcon_decor_exit();
+ 	fbcon_has_exited = 1;
+ }
+ 
+diff --git a/drivers/video/console/fbcondecor.c b/drivers/video/console/fbcondecor.c
+new file mode 100644
+index 0000000..babc8c5
+--- /dev/null
++++ b/drivers/video/console/fbcondecor.c
+@@ -0,0 +1,555 @@
++/*
++ *  linux/drivers/video/console/fbcondecor.c -- Framebuffer console decorations
++ *
++ *  Copyright (C) 2004-2009 Michal Januszewski <michalj+fbcondecor@gmail.com>
++ *
++ *  Code based upon "Bootsplash" (C) 2001-2003
++ *       Volker Poplawski <volker@poplawski.de>,
++ *       Stefan Reinauer <stepan@suse.de>,
++ *       Steffen Winterfeldt <snwint@suse.de>,
++ *       Michael Schroeder <mls@suse.de>,
++ *       Ken Wimer <wimer@suse.de>.
++ *
++ *  Compat ioctl support by Thorsten Klein <TK@Thorsten-Klein.de>.
++ *
++ *  This file is subject to the terms and conditions of the GNU General Public
++ *  License.  See the file COPYING in the main directory of this archive for
++ *  more details.
++ *
++ */
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/fb.h>
++#include <linux/vt_kern.h>
++#include <linux/vmalloc.h>
++#include <linux/unistd.h>
++#include <linux/syscalls.h>
++#include <linux/init.h>
++#include <linux/proc_fs.h>
++#include <linux/workqueue.h>
++#include <linux/kmod.h>
++#include <linux/miscdevice.h>
++#include <linux/device.h>
++#include <linux/fs.h>
++#include <linux/compat.h>
++#include <linux/console.h>
++
++#include <asm/uaccess.h>
++#include <asm/irq.h>
++
++#include "fbcon.h"
++#include "fbcondecor.h"
++
++extern signed char con2fb_map[];
++static int fbcon_decor_enable(struct vc_data *vc);
++char fbcon_decor_path[KMOD_PATH_LEN] = "/sbin/fbcondecor_helper";
++static int initialized = 0;
++
++int fbcon_decor_call_helper(char* cmd, unsigned short vc)
++{
++	char *envp[] = {
++		"HOME=/",
++		"PATH=/sbin:/bin",
++		NULL
++	};
++
++	char tfb[5];
++	char tcons[5];
++	unsigned char fb = (int) con2fb_map[vc];
++
++	char *argv[] = {
++		fbcon_decor_path,
++		"2",
++		cmd,
++		tcons,
++		tfb,
++		vc_cons[vc].d->vc_decor.theme,
++		NULL
++	};
++
++	snprintf(tfb,5,"%d",fb);
++	snprintf(tcons,5,"%d",vc);
++
++	return call_usermodehelper(fbcon_decor_path, argv, envp, UMH_WAIT_EXEC);
++}
++
++/* Disables fbcondecor on a virtual console; called with console sem held. */
++int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw)
++{
++	struct fb_info* info;
++
++	if (!vc->vc_decor.state)
++		return -EINVAL;
++
++	info = registered_fb[(int) con2fb_map[vc->vc_num]];
++
++	if (info == NULL)
++		return -EINVAL;
++
++	vc->vc_decor.state = 0;
++	vc_resize(vc, info->var.xres / vc->vc_font.width,
++		  info->var.yres / vc->vc_font.height);
++
++	if (fg_console == vc->vc_num && redraw) {
++		redraw_screen(vc, 0);
++		update_region(vc, vc->vc_origin +
++			      vc->vc_size_row * vc->vc_top,
++			      vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);
++	}
++
++	printk(KERN_INFO "fbcondecor: switched decor state to 'off' on console %d\n",
++			 vc->vc_num);
++
++	return 0;
++}
++
++/* Enables fbcondecor on a virtual console; called with console sem held. */
++static int fbcon_decor_enable(struct vc_data *vc)
++{
++	struct fb_info* info;
++
++	info = registered_fb[(int) con2fb_map[vc->vc_num]];
++
++	if (vc->vc_decor.twidth == 0 || vc->vc_decor.theight == 0 ||
++	    info == NULL || vc->vc_decor.state || (!info->bgdecor.data &&
++	    vc->vc_num == fg_console))
++		return -EINVAL;
++
++	vc->vc_decor.state = 1;
++	vc_resize(vc, vc->vc_decor.twidth / vc->vc_font.width,
++		  vc->vc_decor.theight / vc->vc_font.height);
++
++	if (fg_console == vc->vc_num) {
++		redraw_screen(vc, 0);
++		update_region(vc, vc->vc_origin +
++			      vc->vc_size_row * vc->vc_top,
++			      vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);
++		fbcon_decor_clear_margins(vc, info, 0);
++	}
++
++	printk(KERN_INFO "fbcondecor: switched decor state to 'on' on console %d\n",
++			 vc->vc_num);
++
++	return 0;
++}
++
++static inline int fbcon_decor_ioctl_dosetstate(struct vc_data *vc, unsigned int state, unsigned char origin)
++{
++	int ret;
++
++//	if (origin == FBCON_DECOR_IO_ORIG_USER)
++		console_lock();
++	if (!state)
++		ret = fbcon_decor_disable(vc, 1);
++	else
++		ret = fbcon_decor_enable(vc);
++//	if (origin == FBCON_DECOR_IO_ORIG_USER)
++		console_unlock();
++
++	return ret;
++}
++
++static inline void fbcon_decor_ioctl_dogetstate(struct vc_data *vc, unsigned int *state)
++{
++	*state = vc->vc_decor.state;
++}
++
++static int fbcon_decor_ioctl_dosetcfg(struct vc_data *vc, struct vc_decor *cfg, unsigned char origin)
++{
++	struct fb_info *info;
++	int len;
++	char *tmp;
++
++	info = registered_fb[(int) con2fb_map[vc->vc_num]];
++
++	if (info == NULL || !cfg->twidth || !cfg->theight ||
++	    cfg->tx + cfg->twidth  > info->var.xres ||
++	    cfg->ty + cfg->theight > info->var.yres)
++		return -EINVAL;
++
++	len = strlen_user(cfg->theme);
++	if (!len || len > FBCON_DECOR_THEME_LEN)
++		return -EINVAL;
++	tmp = kmalloc(len, GFP_KERNEL);
++	if (!tmp)
++		return -ENOMEM;
++	if (copy_from_user(tmp, (void __user *)cfg->theme, len))
++		return -EFAULT;
++	cfg->theme = tmp;
++	cfg->state = 0;
++
++	/* If this ioctl is a response to a request from kernel, the console sem
++	 * is already held; we also don't need to disable decor because either the
++	 * new config and background picture will be successfully loaded, and the
++	 * decor will stay on, or in case of a failure it'll be turned off in fbcon. */
++//	if (origin == FBCON_DECOR_IO_ORIG_USER) {
++		console_lock();
++		if (vc->vc_decor.state)
++			fbcon_decor_disable(vc, 1);
++//	}
++
++	if (vc->vc_decor.theme)
++		kfree(vc->vc_decor.theme);
++
++	vc->vc_decor = *cfg;
++
++//	if (origin == FBCON_DECOR_IO_ORIG_USER)
++		console_unlock();
++
++	printk(KERN_INFO "fbcondecor: console %d using theme '%s'\n",
++			 vc->vc_num, vc->vc_decor.theme);
++	return 0;
++}
++
++static int fbcon_decor_ioctl_dogetcfg(struct vc_data *vc, struct vc_decor *decor)
++{
++	char __user *tmp;
++
++	tmp = decor->theme;
++	*decor = vc->vc_decor;
++	decor->theme = tmp;
++
++	if (vc->vc_decor.theme) {
++		if (copy_to_user(tmp, vc->vc_decor.theme, strlen(vc->vc_decor.theme) + 1))
++			return -EFAULT;
++	} else
++		if (put_user(0, tmp))
++			return -EFAULT;
++
++	return 0;
++}
++
++static int fbcon_decor_ioctl_dosetpic(struct vc_data *vc, struct fb_image *img, unsigned char origin)
++{
++	struct fb_info *info;
++	int len;
++	u8 *tmp;
++
++	if (vc->vc_num != fg_console)
++		return -EINVAL;
++
++	info = registered_fb[(int) con2fb_map[vc->vc_num]];
++
++	if (info == NULL)
++		return -EINVAL;
++
++	if (img->width != info->var.xres || img->height != info->var.yres) {
++		printk(KERN_ERR "fbcondecor: picture dimensions mismatch\n");
++		printk(KERN_ERR "%dx%d vs %dx%d\n", img->width, img->height, info->var.xres, info->var.yres);
++		return -EINVAL;
++	}
++
++	if (img->depth != info->var.bits_per_pixel) {
++		printk(KERN_ERR "fbcondecor: picture depth mismatch\n");
++		return -EINVAL;
++	}
++
++	if (img->depth == 8) {
++		if (!img->cmap.len || !img->cmap.red || !img->cmap.green ||
++		    !img->cmap.blue)
++			return -EINVAL;
++
++		tmp = vmalloc(img->cmap.len * 3 * 2);
++		if (!tmp)
++			return -ENOMEM;
++
++		if (copy_from_user(tmp,
++			    	   (void __user*)img->cmap.red, (img->cmap.len << 1)) ||
++		    copy_from_user(tmp + (img->cmap.len << 1),
++			    	   (void __user*)img->cmap.green, (img->cmap.len << 1)) ||
++		    copy_from_user(tmp + (img->cmap.len << 2),
++			    	   (void __user*)img->cmap.blue, (img->cmap.len << 1))) {
++			vfree(tmp);
++			return -EFAULT;
++		}
++
++		img->cmap.transp = NULL;
++		img->cmap.red = (u16*)tmp;
++		img->cmap.green = img->cmap.red + img->cmap.len;
++		img->cmap.blue = img->cmap.green + img->cmap.len;
++	} else {
++		img->cmap.red = NULL;
++	}
++
++	len = ((img->depth + 7) >> 3) * img->width * img->height;
++
++	/*
++	 * Allocate an additional byte so that we never go outside of the
++	 * buffer boundaries in the rendering functions in a 24 bpp mode.
++	 */
++	tmp = vmalloc(len + 1);
++
++	if (!tmp)
++		goto out;
++
++	if (copy_from_user(tmp, (void __user*)img->data, len))
++		goto out;
++
++	img->data = tmp;
++
++	/* If this ioctl is a response to a request from kernel, the console sem
++	 * is already held. */
++//	if (origin == FBCON_DECOR_IO_ORIG_USER)
++		console_lock();
++
++	if (info->bgdecor.data)
++		vfree((u8*)info->bgdecor.data);
++	if (info->bgdecor.cmap.red)
++		vfree(info->bgdecor.cmap.red);
++
++	info->bgdecor = *img;
++
++	if (fbcon_decor_active_vc(vc) && fg_console == vc->vc_num) {
++		redraw_screen(vc, 0);
++		update_region(vc, vc->vc_origin +
++			      vc->vc_size_row * vc->vc_top,
++			      vc->vc_size_row * (vc->vc_bottom - vc->vc_top) / 2);
++		fbcon_decor_clear_margins(vc, info, 0);
++	}
++
++//	if (origin == FBCON_DECOR_IO_ORIG_USER)
++		console_unlock();
++
++	return 0;
++
++out:	if (img->cmap.red)
++		vfree(img->cmap.red);
++
++	if (tmp)
++		vfree(tmp);
++	return -ENOMEM;
++}
++
++static long fbcon_decor_ioctl(struct file *filp, u_int cmd, u_long arg)
++{
++	struct fbcon_decor_iowrapper __user *wrapper = (void __user*) arg;
++	struct vc_data *vc = NULL;
++	unsigned short vc_num = 0;
++	unsigned char origin = 0;
++	void __user *data = NULL;
++
++	if (!access_ok(VERIFY_READ, wrapper,
++			sizeof(struct fbcon_decor_iowrapper)))
++		return -EFAULT;
++
++	__get_user(vc_num, &wrapper->vc);
++	__get_user(origin, &wrapper->origin);
++	__get_user(data, &wrapper->data);
++
++	if (!vc_cons_allocated(vc_num))
++		return -EINVAL;
++
++	vc = vc_cons[vc_num].d;
++
++	switch (cmd) {
++	case FBIOCONDECOR_SETPIC:
++	{
++		struct fb_image img;
++		if (copy_from_user(&img, (struct fb_image __user *)data, sizeof(struct fb_image)))
++			return -EFAULT;
++
++		return fbcon_decor_ioctl_dosetpic(vc, &img, origin);
++	}
++	case FBIOCONDECOR_SETCFG:
++	{
++		struct vc_decor cfg;
++		if (copy_from_user(&cfg, (struct vc_decor __user *)data, sizeof(struct vc_decor)))
++			return -EFAULT;
++
++		return fbcon_decor_ioctl_dosetcfg(vc, &cfg, origin);
++	}
++	case FBIOCONDECOR_GETCFG:
++	{
++		int rval;
++		struct vc_decor cfg;
++
++		if (copy_from_user(&cfg, (struct vc_decor __user *)data, sizeof(struct vc_decor)))
++			return -EFAULT;
++
++		rval = fbcon_decor_ioctl_dogetcfg(vc, &cfg);
++
++		if (copy_to_user(data, &cfg, sizeof(struct vc_decor)))
++			return -EFAULT;
++		return rval;
++	}
++	case FBIOCONDECOR_SETSTATE:
++	{
++		unsigned int state = 0;
++		if (get_user(state, (unsigned int __user *)data))
++			return -EFAULT;
++		return fbcon_decor_ioctl_dosetstate(vc, state, origin);
++	}
++	case FBIOCONDECOR_GETSTATE:
++	{
++		unsigned int state = 0;
++		fbcon_decor_ioctl_dogetstate(vc, &state);
++		return put_user(state, (unsigned int __user *)data);
++	}
++
++	default:
++		return -ENOIOCTLCMD;
++	}
++}
++
++#ifdef CONFIG_COMPAT
++
++static long fbcon_decor_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) {
++
++	struct fbcon_decor_iowrapper32 __user *wrapper = (void __user *)arg;
++	struct vc_data *vc = NULL;
++	unsigned short vc_num = 0;
++	unsigned char origin = 0;
++	compat_uptr_t data_compat = 0;
++	void __user *data = NULL;
++
++	if (!access_ok(VERIFY_READ, wrapper,
++                       sizeof(struct fbcon_decor_iowrapper32)))
++		return -EFAULT;
++
++	__get_user(vc_num, &wrapper->vc);
++	__get_user(origin, &wrapper->origin);
++	__get_user(data_compat, &wrapper->data);
++	data = compat_ptr(data_compat);
++
++	if (!vc_cons_allocated(vc_num))
++		return -EINVAL;
++
++	vc = vc_cons[vc_num].d;
++
++	switch (cmd) {
++	case FBIOCONDECOR_SETPIC32:
++	{
++		struct fb_image32 img_compat;
++		struct fb_image img;
++
++		if (copy_from_user(&img_compat, (struct fb_image32 __user *)data, sizeof(struct fb_image32)))
++			return -EFAULT;
++
++		fb_image_from_compat(img, img_compat);
++
++		return fbcon_decor_ioctl_dosetpic(vc, &img, origin);
++	}
++
++	case FBIOCONDECOR_SETCFG32:
++	{
++		struct vc_decor32 cfg_compat;
++		struct vc_decor cfg;
++
++		if (copy_from_user(&cfg_compat, (struct vc_decor32 __user *)data, sizeof(struct vc_decor32)))
++			return -EFAULT;
++
++		vc_decor_from_compat(cfg, cfg_compat);
++
++		return fbcon_decor_ioctl_dosetcfg(vc, &cfg, origin);
++	}
++
++	case FBIOCONDECOR_GETCFG32:
++	{
++		int rval;
++		struct vc_decor32 cfg_compat;
++		struct vc_decor cfg;
++
++		if (copy_from_user(&cfg_compat, (struct vc_decor32 __user *)data, sizeof(struct vc_decor32)))
++			return -EFAULT;
++		cfg.theme = compat_ptr(cfg_compat.theme);
++
++		rval = fbcon_decor_ioctl_dogetcfg(vc, &cfg);
++
++		vc_decor_to_compat(cfg_compat, cfg);
++
++		if (copy_to_user((struct vc_decor32 __user *)data, &cfg_compat, sizeof(struct vc_decor32)))
++			return -EFAULT;
++		return rval;
++	}
++
++	case FBIOCONDECOR_SETSTATE32:
++	{
++		compat_uint_t state_compat = 0;
++		unsigned int state = 0;
++
++		if (get_user(state_compat, (compat_uint_t __user *)data))
++			return -EFAULT;
++
++		state = (unsigned int)state_compat;
++
++		return fbcon_decor_ioctl_dosetstate(vc, state, origin);
++	}
++
++	case FBIOCONDECOR_GETSTATE32:
++	{
++		compat_uint_t state_compat = 0;
++		unsigned int state = 0;
++
++		fbcon_decor_ioctl_dogetstate(vc, &state);
++		state_compat = (compat_uint_t)state;
++
++		return put_user(state_compat, (compat_uint_t __user *)data);
++	}
++
++	default:
++		return -ENOIOCTLCMD;
++	}
++}
++#else
++  #define fbcon_decor_compat_ioctl NULL
++#endif
++
++static struct file_operations fbcon_decor_ops = {
++	.owner = THIS_MODULE,
++	.unlocked_ioctl = fbcon_decor_ioctl,
++	.compat_ioctl = fbcon_decor_compat_ioctl
++};
++
++static struct miscdevice fbcon_decor_dev = {
++	.minor = MISC_DYNAMIC_MINOR,
++	.name = "fbcondecor",
++	.fops = &fbcon_decor_ops
++};
++
++void fbcon_decor_reset(void)
++{
++	int i;
++
++	for (i = 0; i < num_registered_fb; i++) {
++		registered_fb[i]->bgdecor.data = NULL;
++		registered_fb[i]->bgdecor.cmap.red = NULL;
++	}
++
++	for (i = 0; i < MAX_NR_CONSOLES && vc_cons[i].d; i++) {
++		vc_cons[i].d->vc_decor.state = vc_cons[i].d->vc_decor.twidth =
++						vc_cons[i].d->vc_decor.theight = 0;
++		vc_cons[i].d->vc_decor.theme = NULL;
++	}
++
++	return;
++}
++
++int fbcon_decor_init(void)
++{
++	int i;
++
++	fbcon_decor_reset();
++
++	if (initialized)
++		return 0;
++
++	i = misc_register(&fbcon_decor_dev);
++	if (i) {
++		printk(KERN_ERR "fbcondecor: failed to register device\n");
++		return i;
++	}
++
++	fbcon_decor_call_helper("init", 0);
++	initialized = 1;
++	return 0;
++}
++
++int fbcon_decor_exit(void)
++{
++	fbcon_decor_reset();
++	return 0;
++}
++
++EXPORT_SYMBOL(fbcon_decor_path);
+diff --git a/drivers/video/console/fbcondecor.h b/drivers/video/console/fbcondecor.h
+new file mode 100644
+index 0000000..3b3724b
+--- /dev/null
++++ b/drivers/video/console/fbcondecor.h
+@@ -0,0 +1,78 @@
++/* 
++ *  linux/drivers/video/console/fbcondecor.h -- Framebuffer Console Decoration headers
++ *
++ *  Copyright (C) 2004 Michal Januszewski <michalj+fbcondecor@gmail.com>
++ *
++ */
++
++#ifndef __FBCON_DECOR_H
++#define __FBCON_DECOR_H
++
++#ifndef _LINUX_FB_H
++#include <linux/fb.h>
++#endif
++
++/* This is needed for vc_cons in fbcmap.c */
++#include <linux/vt_kern.h>
++
++struct fb_cursor;
++struct fb_info;
++struct vc_data;
++
++#ifdef CONFIG_FB_CON_DECOR
++/* fbcondecor.c */
++int fbcon_decor_init(void);
++int fbcon_decor_exit(void);
++int fbcon_decor_call_helper(char* cmd, unsigned short cons);
++int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw);
++
++/* cfbcondecor.c */
++void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx);
++void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor);
++void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width);
++void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only);
++void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank);
++void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, int sx, int dx, int width);
++void fbcon_decor_copy(u8 *dst, u8 *src, int height, int width, int linebytes, int srclinesbytes, int bpp);
++void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc);
++
++/* vt.c */
++void acquire_console_sem(void);
++void release_console_sem(void);
++void do_unblank_screen(int entering_gfx);
++
++/* struct vc_data *y */
++#define fbcon_decor_active_vc(y) (y->vc_decor.state && y->vc_decor.theme) 
++
++/* struct fb_info *x, struct vc_data *y */
++#define fbcon_decor_active_nores(x,y) (x->bgdecor.data && fbcon_decor_active_vc(y))
++
++/* struct fb_info *x, struct vc_data *y */
++#define fbcon_decor_active(x,y) (fbcon_decor_active_nores(x,y) &&		\
++			      x->bgdecor.width == x->var.xres && 	\
++			      x->bgdecor.height == x->var.yres &&	\
++			      x->bgdecor.depth == x->var.bits_per_pixel)
++
++
++#else /* CONFIG_FB_CON_DECOR */
++
++static inline void fbcon_decor_putcs(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx) {}
++static inline void fbcon_decor_putc(struct vc_data *vc, struct fb_info *info, int c, int ypos, int xpos) {}
++static inline void fbcon_decor_cursor(struct fb_info *info, struct fb_cursor *cursor) {}
++static inline void fbcon_decor_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) {}
++static inline void fbcon_decor_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only) {}
++static inline void fbcon_decor_blank(struct vc_data *vc, struct fb_info *info, int blank) {}
++static inline void fbcon_decor_bmove_redraw(struct vc_data *vc, struct fb_info *info, int y, int sx, int dx, int width) {}
++static inline void fbcon_decor_fix_pseudo_pal(struct fb_info *info, struct vc_data *vc) {}
++static inline int fbcon_decor_call_helper(char* cmd, unsigned short cons) { return 0; }
++static inline int fbcon_decor_init(void) { return 0; }
++static inline int fbcon_decor_exit(void) { return 0; }
++static inline int fbcon_decor_disable(struct vc_data *vc, unsigned char redraw) { return 0; }
++
++#define fbcon_decor_active_vc(y) (0)
++#define fbcon_decor_active_nores(x,y) (0)
++#define fbcon_decor_active(x,y) (0)
++
++#endif /* CONFIG_FB_CON_DECOR */
++
++#endif /* __FBCON_DECOR_H */
+diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
+index e1f4727..2952e33 100644
+--- a/drivers/video/fbdev/Kconfig
++++ b/drivers/video/fbdev/Kconfig
+@@ -1204,7 +1204,6 @@ config FB_MATROX
+ 	select FB_CFB_FILLRECT
+ 	select FB_CFB_COPYAREA
+ 	select FB_CFB_IMAGEBLIT
+-	select FB_TILEBLITTING
+ 	select FB_MACMODES if PPC_PMAC
+ 	---help---
+ 	  Say Y here if you have a Matrox Millennium, Matrox Millennium II,
+diff --git a/drivers/video/fbdev/core/fbcmap.c b/drivers/video/fbdev/core/fbcmap.c
+index f89245b..05e036c 100644
+--- a/drivers/video/fbdev/core/fbcmap.c
++++ b/drivers/video/fbdev/core/fbcmap.c
+@@ -17,6 +17,8 @@
+ #include <linux/slab.h>
+ #include <linux/uaccess.h>
+ 
++#include "../../console/fbcondecor.h"
++
+ static u16 red2[] __read_mostly = {
+     0x0000, 0xaaaa
+ };
+@@ -249,14 +251,17 @@ int fb_set_cmap(struct fb_cmap *cmap, struct fb_info *info)
+ 			if (transp)
+ 				htransp = *transp++;
+ 			if (info->fbops->fb_setcolreg(start++,
+-						      hred, hgreen, hblue,
++						      hred, hgreen, hblue, 
+ 						      htransp, info))
+ 				break;
+ 		}
+ 	}
+-	if (rc == 0)
++	if (rc == 0) {
+ 		fb_copy_cmap(cmap, &info->cmap);
+-
++		if (fbcon_decor_active(info, vc_cons[fg_console].d) &&
++		    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
++			fbcon_decor_fix_pseudo_pal(info, vc_cons[fg_console].d);
++	}
+ 	return rc;
+ }
+ 
+diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
+index b6d5008..d6703f2 100644
+--- a/drivers/video/fbdev/core/fbmem.c
++++ b/drivers/video/fbdev/core/fbmem.c
+@@ -1250,15 +1250,6 @@ struct fb_fix_screeninfo32 {
+ 	u16			reserved[3];
+ };
+ 
+-struct fb_cmap32 {
+-	u32			start;
+-	u32			len;
+-	compat_caddr_t	red;
+-	compat_caddr_t	green;
+-	compat_caddr_t	blue;
+-	compat_caddr_t	transp;
+-};
+-
+ static int fb_getput_cmap(struct fb_info *info, unsigned int cmd,
+ 			  unsigned long arg)
+ {
+diff --git a/include/linux/console_decor.h b/include/linux/console_decor.h
+new file mode 100644
+index 0000000..04b8d80
+--- /dev/null
++++ b/include/linux/console_decor.h
+@@ -0,0 +1,46 @@
++#ifndef _LINUX_CONSOLE_DECOR_H_
++#define _LINUX_CONSOLE_DECOR_H_ 1
++
++/* A structure used by the framebuffer console decorations (drivers/video/console/fbcondecor.c) */
++struct vc_decor {
++	__u8 bg_color;				/* The color that is to be treated as transparent */
++	__u8 state;				/* Current decor state: 0 = off, 1 = on */
++	__u16 tx, ty;				/* Top left corner coordinates of the text field */
++	__u16 twidth, theight;			/* Width and height of the text field */
++	char* theme;
++};
++
++#ifdef __KERNEL__
++#ifdef CONFIG_COMPAT
++#include <linux/compat.h>
++
++struct vc_decor32 {
++	__u8 bg_color;				/* The color that is to be treated as transparent */
++	__u8 state;				/* Current decor state: 0 = off, 1 = on */
++	__u16 tx, ty;				/* Top left corner coordinates of the text field */
++	__u16 twidth, theight;			/* Width and height of the text field */
++	compat_uptr_t theme;
++};
++
++#define vc_decor_from_compat(to, from) \
++	(to).bg_color = (from).bg_color; \
++	(to).state    = (from).state; \
++	(to).tx       = (from).tx; \
++	(to).ty       = (from).ty; \
++	(to).twidth   = (from).twidth; \
++	(to).theight  = (from).theight; \
++	(to).theme    = compat_ptr((from).theme)
++
++#define vc_decor_to_compat(to, from) \
++	(to).bg_color = (from).bg_color; \
++	(to).state    = (from).state; \
++	(to).tx       = (from).tx; \
++	(to).ty       = (from).ty; \
++	(to).twidth   = (from).twidth; \
++	(to).theight  = (from).theight; \
++	(to).theme    = ptr_to_compat((from).theme)
++
++#endif /* CONFIG_COMPAT */
++#endif /* __KERNEL__ */
++
++#endif
+diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
+index 7f0c329..98f5d60 100644
+--- a/include/linux/console_struct.h
++++ b/include/linux/console_struct.h
+@@ -19,6 +19,7 @@
+ struct vt_struct;
+ 
+ #define NPAR 16
++#include <linux/console_decor.h>
+ 
+ struct vc_data {
+ 	struct tty_port port;			/* Upper level data */
+@@ -107,6 +108,8 @@ struct vc_data {
+ 	unsigned long	vc_uni_pagedir;
+ 	unsigned long	*vc_uni_pagedir_loc;  /* [!] Location of uni_pagedir variable for this console */
+ 	bool vc_panic_force_write; /* when oops/panic this VC can accept forced output/blanking */
++
++	struct vc_decor vc_decor;
+ 	/* additional information is in vt_kern.h */
+ };
+ 
+diff --git a/include/linux/fb.h b/include/linux/fb.h
+index fe6ac95..1e36b03 100644
+--- a/include/linux/fb.h
++++ b/include/linux/fb.h
+@@ -219,6 +219,34 @@ struct fb_deferred_io {
+ };
+ #endif
+ 
++#ifdef __KERNEL__
++#ifdef CONFIG_COMPAT
++struct fb_image32 {
++	__u32 dx;			/* Where to place image */
++	__u32 dy;
++	__u32 width;			/* Size of image */
++	__u32 height;
++	__u32 fg_color;			/* Only used when a mono bitmap */
++	__u32 bg_color;
++	__u8  depth;			/* Depth of the image */
++	const compat_uptr_t data;	/* Pointer to image data */
++	struct fb_cmap32 cmap;		/* color map info */
++};
++
++#define fb_image_from_compat(to, from) \
++	(to).dx       = (from).dx; \
++	(to).dy       = (from).dy; \
++	(to).width    = (from).width; \
++	(to).height   = (from).height; \
++	(to).fg_color = (from).fg_color; \
++	(to).bg_color = (from).bg_color; \
++	(to).depth    = (from).depth; \
++	(to).data     = compat_ptr((from).data); \
++	fb_cmap_from_compat((to).cmap, (from).cmap)
++
++#endif /* CONFIG_COMPAT */
++#endif /* __KERNEL__ */
++
+ /*
+  * Frame buffer operations
+  *
+@@ -489,6 +517,9 @@ struct fb_info {
+ #define FBINFO_STATE_SUSPENDED	1
+ 	u32 state;			/* Hardware state i.e suspend */
+ 	void *fbcon_par;                /* fbcon use-only private area */
++
++	struct fb_image bgdecor;
++
+ 	/* From here on everything is device dependent */
+ 	void *par;
+ 	/* we need the PCI or similar aperture base/size not
+diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h
+index fb795c3..dc77a03 100644
+--- a/include/uapi/linux/fb.h
++++ b/include/uapi/linux/fb.h
+@@ -8,6 +8,25 @@
+ 
+ #define FB_MAX			32	/* sufficient for now */
+ 
++struct fbcon_decor_iowrapper
++{
++	unsigned short vc;		/* Virtual console */
++	unsigned char origin;		/* Point of origin of the request */
++	void *data;
++};
++
++#ifdef __KERNEL__
++#ifdef CONFIG_COMPAT
++#include <linux/compat.h>
++struct fbcon_decor_iowrapper32
++{
++	unsigned short vc;		/* Virtual console */
++	unsigned char origin;		/* Point of origin of the request */
++	compat_uptr_t data;
++};
++#endif /* CONFIG_COMPAT */
++#endif /* __KERNEL__ */
++
+ /* ioctls
+    0x46 is 'F'								*/
+ #define FBIOGET_VSCREENINFO	0x4600
+@@ -35,6 +54,25 @@
+ #define FBIOGET_DISPINFO        0x4618
+ #define FBIO_WAITFORVSYNC	_IOW('F', 0x20, __u32)
+ 
++#define FBIOCONDECOR_SETCFG	_IOWR('F', 0x19, struct fbcon_decor_iowrapper)
++#define FBIOCONDECOR_GETCFG	_IOR('F', 0x1A, struct fbcon_decor_iowrapper)
++#define FBIOCONDECOR_SETSTATE	_IOWR('F', 0x1B, struct fbcon_decor_iowrapper)
++#define FBIOCONDECOR_GETSTATE	_IOR('F', 0x1C, struct fbcon_decor_iowrapper)
++#define FBIOCONDECOR_SETPIC 	_IOWR('F', 0x1D, struct fbcon_decor_iowrapper)
++#ifdef __KERNEL__
++#ifdef CONFIG_COMPAT
++#define FBIOCONDECOR_SETCFG32	_IOWR('F', 0x19, struct fbcon_decor_iowrapper32)
++#define FBIOCONDECOR_GETCFG32	_IOR('F', 0x1A, struct fbcon_decor_iowrapper32)
++#define FBIOCONDECOR_SETSTATE32	_IOWR('F', 0x1B, struct fbcon_decor_iowrapper32)
++#define FBIOCONDECOR_GETSTATE32	_IOR('F', 0x1C, struct fbcon_decor_iowrapper32)
++#define FBIOCONDECOR_SETPIC32	_IOWR('F', 0x1D, struct fbcon_decor_iowrapper32)
++#endif /* CONFIG_COMPAT */
++#endif /* __KERNEL__ */
++
++#define FBCON_DECOR_THEME_LEN		128	/* Maximum lenght of a theme name */
++#define FBCON_DECOR_IO_ORIG_KERNEL	0	/* Kernel ioctl origin */
++#define FBCON_DECOR_IO_ORIG_USER	1	/* User ioctl origin */
++ 
+ #define FB_TYPE_PACKED_PIXELS		0	/* Packed Pixels	*/
+ #define FB_TYPE_PLANES			1	/* Non interleaved planes */
+ #define FB_TYPE_INTERLEAVED_PLANES	2	/* Interleaved planes	*/
+@@ -277,6 +315,29 @@ struct fb_var_screeninfo {
+ 	__u32 reserved[4];		/* Reserved for future compatibility */
+ };
+ 
++#ifdef __KERNEL__
++#ifdef CONFIG_COMPAT
++struct fb_cmap32 {
++	__u32 start;
++	__u32 len;			/* Number of entries */
++	compat_uptr_t red;		/* Red values	*/
++	compat_uptr_t green;
++	compat_uptr_t blue;
++	compat_uptr_t transp;		/* transparency, can be NULL */
++};
++
++#define fb_cmap_from_compat(to, from) \
++	(to).start  = (from).start; \
++	(to).len    = (from).len; \
++	(to).red    = compat_ptr((from).red); \
++	(to).green  = compat_ptr((from).green); \
++	(to).blue   = compat_ptr((from).blue); \
++	(to).transp = compat_ptr((from).transp)
++
++#endif /* CONFIG_COMPAT */
++#endif /* __KERNEL__ */
++
++
+ struct fb_cmap {
+ 	__u32 start;			/* First entry	*/
+ 	__u32 len;			/* Number of entries */
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 74f5b58..6386ab0 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -146,6 +146,10 @@ static const int cap_last_cap = CAP_LAST_CAP;
+ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
+ #endif
+ 
++#ifdef CONFIG_FB_CON_DECOR
++extern char fbcon_decor_path[];
++#endif
++
+ #ifdef CONFIG_INOTIFY_USER
+ #include <linux/inotify.h>
+ #endif
+@@ -255,6 +259,15 @@ static struct ctl_table sysctl_base_table[] = {
+ 		.mode		= 0555,
+ 		.child		= dev_table,
+ 	},
++#ifdef CONFIG_FB_CON_DECOR
++	{
++		.procname	= "fbcondecor",
++		.data		= &fbcon_decor_path,
++		.maxlen		= KMOD_PATH_LEN,
++		.mode		= 0644,
++		.proc_handler	= &proc_dostring,
++	},
++#endif
+ 	{ }
+ };
+ 

diff --git a/5000_enable-additional-cpu-optimizations-for-gcc.patch b/5000_enable-additional-cpu-optimizations-for-gcc.patch
new file mode 100644
index 0000000..f7ab6f0
--- /dev/null
+++ b/5000_enable-additional-cpu-optimizations-for-gcc.patch
@@ -0,0 +1,327 @@
+This patch has been tested on and known to work with kernel versions from 3.2
+up to the latest git version (pulled on 12/14/2013).
+
+This patch will expand the number of microarchitectures to include new
+processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
+14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
+Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 2nd Gen Core
+i3/i5/i7 (Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), and Intel 4th
+Gen Core i3/i5/i7 (Haswell). It also offers the compiler the 'native' flag.
+
+Small but real speed increases are measurable using a make endpoint comparing
+a generic kernel to one built with one of the respective microarchs.
+
+See the following experimental evidence supporting this statement:
+https://github.com/graysky2/kernel_gcc_patch
+
+REQUIREMENTS
+linux version >=3.15
+gcc version <4.9
+
+---
+diff -uprN a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
+--- a/arch/x86/include/asm/module.h	2013-11-03 18:41:51.000000000 -0500
++++ b/arch/x86/include/asm/module.h	2013-12-15 06:21:24.351122516 -0500
+@@ -15,6 +15,16 @@
+ #define MODULE_PROC_FAMILY "586MMX "
+ #elif defined CONFIG_MCORE2
+ #define MODULE_PROC_FAMILY "CORE2 "
++#elif defined CONFIG_MNATIVE
++#define MODULE_PROC_FAMILY "NATIVE "
++#elif defined CONFIG_MCOREI7
++#define MODULE_PROC_FAMILY "COREI7 "
++#elif defined CONFIG_MCOREI7AVX
++#define MODULE_PROC_FAMILY "COREI7AVX "
++#elif defined CONFIG_MCOREAVXI
++#define MODULE_PROC_FAMILY "COREAVXI "
++#elif defined CONFIG_MCOREAVX2
++#define MODULE_PROC_FAMILY "COREAVX2 "
+ #elif defined CONFIG_MATOM
+ #define MODULE_PROC_FAMILY "ATOM "
+ #elif defined CONFIG_M686
+@@ -33,6 +43,18 @@
+ #define MODULE_PROC_FAMILY "K7 "
+ #elif defined CONFIG_MK8
+ #define MODULE_PROC_FAMILY "K8 "
++#elif defined CONFIG_MK10
++#define MODULE_PROC_FAMILY "K10 "
++#elif defined CONFIG_MBARCELONA
++#define MODULE_PROC_FAMILY "BARCELONA "
++#elif defined CONFIG_MBOBCAT
++#define MODULE_PROC_FAMILY "BOBCAT "
++#elif defined CONFIG_MBULLDOZER
++#define MODULE_PROC_FAMILY "BULLDOZER "
++#elif defined CONFIG_MPILEDRIVER
++#define MODULE_PROC_FAMILY "PILEDRIVER "
++#elif defined CONFIG_MJAGUAR
++#define MODULE_PROC_FAMILY "JAGUAR "
+ #elif defined CONFIG_MELAN
+ #define MODULE_PROC_FAMILY "ELAN "
+ #elif defined CONFIG_MCRUSOE
+diff -uprN a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
+--- a/arch/x86/Kconfig.cpu	2013-11-03 18:41:51.000000000 -0500
++++ b/arch/x86/Kconfig.cpu	2013-12-15 06:21:24.351122516 -0500
+@@ -139,7 +139,7 @@ config MPENTIUM4
+ 
+ 
+ config MK6
+-	bool "K6/K6-II/K6-III"
++	bool "AMD K6/K6-II/K6-III"
+ 	depends on X86_32
+ 	---help---
+ 	  Select this for an AMD K6-family processor.  Enables use of
+@@ -147,7 +147,7 @@ config MK6
+ 	  flags to GCC.
+ 
+ config MK7
+-	bool "Athlon/Duron/K7"
++	bool "AMD Athlon/Duron/K7"
+ 	depends on X86_32
+ 	---help---
+ 	  Select this for an AMD Athlon K7-family processor.  Enables use of
+@@ -155,12 +155,55 @@ config MK7
+ 	  flags to GCC.
+ 
+ config MK8
+-	bool "Opteron/Athlon64/Hammer/K8"
++	bool "AMD Opteron/Athlon64/Hammer/K8"
+ 	---help---
+ 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
+ 	  Enables use of some extended instructions, and passes appropriate
+ 	  optimization flags to GCC.
+ 
++config MK10
++	bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
++	---help---
++	  Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
++		Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
++	  Enables use of some extended instructions, and passes appropriate
++	  optimization flags to GCC.
++
++config MBARCELONA
++	bool "AMD Barcelona"
++	---help---
++	  Select this for AMD Barcelona and newer processors.
++
++	  Enables -march=barcelona
++
++config MBOBCAT
++	bool "AMD Bobcat"
++	---help---
++	  Select this for AMD Bobcat processors.
++
++	  Enables -march=btver1
++
++config MBULLDOZER
++	bool "AMD Bulldozer"
++	---help---
++	  Select this for AMD Bulldozer processors.
++
++	  Enables -march=bdver1
++
++config MPILEDRIVER
++	bool "AMD Piledriver"
++	---help---
++	  Select this for AMD Piledriver processors.
++
++	  Enables -march=bdver2
++
++config MJAGUAR
++	bool "AMD Jaguar"
++	---help---
++	  Select this for AMD Jaguar processors.
++
++	  Enables -march=btver2
++
+ config MCRUSOE
+ 	bool "Crusoe"
+ 	depends on X86_32
+@@ -251,8 +294,17 @@ config MPSC
+ 	  using the cpu family field
+ 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
+ 
++config MATOM
++	bool "Intel Atom"
++	---help---
++
++	  Select this for the Intel Atom platform. Intel Atom CPUs have an
++	  in-order pipelining architecture and thus can benefit from
++	  accordingly optimized code. Use a recent GCC with specific Atom
++	  support in order to fully benefit from selecting this option.
++
+ config MCORE2
+-	bool "Core 2/newer Xeon"
++	bool "Intel Core 2"
+ 	---help---
+ 
+ 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
+@@ -260,14 +312,40 @@ config MCORE2
+ 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
+ 	  (not a typo)
+ 
+-config MATOM
+-	bool "Intel Atom"
++	  Enables -march=core2
++
++config MCOREI7
++	bool "Intel Core i7"
+ 	---help---
+ 
+-	  Select this for the Intel Atom platform. Intel Atom CPUs have an
+-	  in-order pipelining architecture and thus can benefit from
+-	  accordingly optimized code. Use a recent GCC with specific Atom
+-	  support in order to fully benefit from selecting this option.
++	  Select this for the Intel Nehalem platform. Intel Nehalem proecessors
++	  include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors.
++
++	  Enables -march=corei7
++
++config MCOREI7AVX
++	bool "Intel Core 2nd Gen AVX"
++	---help---
++
++	  Select this for 2nd Gen Core processors including Sandy Bridge.
++
++	  Enables -march=corei7-avx
++
++config MCOREAVXI
++	bool "Intel Core 3rd Gen AVX"
++	---help---
++
++	  Select this for 3rd Gen Core processors including Ivy Bridge.
++
++	  Enables -march=core-avx-i
++
++config MCOREAVX2
++	bool "Intel Core AVX2"
++	---help---
++
++	  Select this for AVX2 enabled processors including Haswell.
++
++	  Enables -march=core-avx2
+ 
+ config GENERIC_CPU
+ 	bool "Generic-x86-64"
+@@ -276,6 +354,19 @@ config GENERIC_CPU
+ 	  Generic x86-64 CPU.
+ 	  Run equally well on all x86-64 CPUs.
+ 
++config MNATIVE
++ bool "Native optimizations autodetected by GCC"
++ ---help---
++
++   GCC 4.2 and above support -march=native, which automatically detects
++   the optimum settings to use based on your processor. -march=native
++   also detects and applies additional settings beyond -march specific
++   to your CPU, (eg. -msse4). Unless you have a specific reason not to
++   (e.g. distcc cross-compiling), you should probably be using
++   -march=native rather than anything listed below.
++
++   Enables -march=native
++
+ endchoice
+ 
+ config X86_GENERIC
+@@ -300,7 +391,7 @@ config X86_INTERNODE_CACHE_SHIFT
+ config X86_L1_CACHE_SHIFT
+ 	int
+ 	default "7" if MPENTIUM4 || MPSC
+-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
++	default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU
+ 	default "4" if MELAN || M486 || MGEODEGX1
+ 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
+ 
+@@ -331,11 +422,11 @@ config X86_ALIGNMENT_16
+ 
+ config X86_INTEL_USERCOPY
+ 	def_bool y
+-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
++	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2
+ 
+ config X86_USE_PPRO_CHECKSUM
+ 	def_bool y
+-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
++	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE
+ 
+ config X86_USE_3DNOW
+ 	def_bool y
+@@ -363,17 +454,17 @@ config X86_P6_NOP
+ 
+ config X86_TSC
+ 	def_bool y
+-	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
++	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) || X86_64 || MNATIVE
+ 
+ config X86_CMPXCHG64
+ 	def_bool y
+-	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
++	depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
+ 
+ # this should be set for all -march=.. options where the compiler
+ # generates cmov.
+ config X86_CMOV
+ 	def_bool y
+-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
++	depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
+ 
+ config X86_MINIMUM_CPU_FAMILY
+ 	int
+diff -uprN a/arch/x86/Makefile b/arch/x86/Makefile
+--- a/arch/x86/Makefile	2013-11-03 18:41:51.000000000 -0500
++++ b/arch/x86/Makefile	2013-12-15 06:21:24.354455723 -0500
+@@ -61,11 +61,26 @@ else
+ 	KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
+ 
+         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
++        cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
+         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
++        cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
++        cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
++        cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
++        cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
++        cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
++        cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
+         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+ 
+         cflags-$(CONFIG_MCORE2) += \
+-                $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
++                $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
++        cflags-$(CONFIG_MCOREI7) += \
++                $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7))
++        cflags-$(CONFIG_MCOREI7AVX) += \
++                $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx))
++        cflags-$(CONFIG_MCOREAVXI) += \
++                $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i))
++        cflags-$(CONFIG_MCOREAVX2) += \
++                $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2))
+ 	cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
+ 		$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
+         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
+diff -uprN a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
+--- a/arch/x86/Makefile_32.cpu	2013-11-03 18:41:51.000000000 -0500
++++ b/arch/x86/Makefile_32.cpu	2013-12-15 06:21:24.354455723 -0500
+@@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6)		+= -march=k6
+ # Please note, that patches that add -march=athlon-xp and friends are pointless.
+ # They make zero difference whatsosever to performance at this time.
+ cflags-$(CONFIG_MK7)		+= -march=athlon
++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
+ cflags-$(CONFIG_MK8)		+= $(call cc-option,-march=k8,-march=athlon)
++cflags-$(CONFIG_MK10)	+= $(call cc-option,-march=amdfam10,-march=athlon)
++cflags-$(CONFIG_MBARCELONA)	+= $(call cc-option,-march=barcelona,-march=athlon)
++cflags-$(CONFIG_MBOBCAT)	+= $(call cc-option,-march=btver1,-march=athlon)
++cflags-$(CONFIG_MBULLDOZER)	+= $(call cc-option,-march=bdver1,-march=athlon)
++cflags-$(CONFIG_MPILEDRIVER)	+= $(call cc-option,-march=bdver2,-march=athlon)
++cflags-$(CONFIG_MJAGUAR)	+= $(call cc-option,-march=btver2,-march=athlon)
+ cflags-$(CONFIG_MCRUSOE)	+= -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+ cflags-$(CONFIG_MEFFICEON)	+= -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+ cflags-$(CONFIG_MWINCHIPC6)	+= $(call cc-option,-march=winchip-c6,-march=i586)
+@@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-
+ cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
+ cflags-$(CONFIG_MVIAC7)		+= -march=i686
+ cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
++cflags-$(CONFIG_MCOREI7)	+= -march=i686 $(call tune,corei7)
++cflags-$(CONFIG_MCOREI7AVX)	+= -march=i686 $(call tune,corei7-avx)
++cflags-$(CONFIG_MCOREAVXI)	+= -march=i686 $(call tune,core-avx-i)
++cflags-$(CONFIG_MCOREAVX2)	+= -march=i686 $(call tune,core-avx2)
+ cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
+ 	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))

diff --git a/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
new file mode 100644
index 0000000..418201d
--- /dev/null
+++ b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
@@ -0,0 +1,402 @@
+WARNING - this version of the patch works with version 4.9+ of gcc and with
+kernel version 3.15.x+ and should NOT be applied when compiling on older
+versions due to name changes of the flags with the 4.9 release of gcc.
+Use the older version of this patch hosted on the same github for older
+versions of gcc. For example:
+
+corei7 --> nehalem
+corei7-avx --> sandybridge
+core-avx-i --> ivybridge
+core-avx2 --> haswell
+
+For more, see: https://gcc.gnu.org/gcc-4.9/changes.html
+
+It also changes 'atom' to 'bonnell' in accordance with the gcc v4.9 changes.
+Note that upstream is using the deprecated 'match=atom' flags when I believe it
+should use the newer 'march=bonnell' flag for atom processors.
+
+I have made that change to this patch set as well.  See the following kernel
+bug report to see if I'm right: https://bugzilla.kernel.org/show_bug.cgi?id=77461
+
+This patch will expand the number of microarchitectures to include newer
+processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
+14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
+Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 1.5 Gen Core
+i3/i5/i7 (Westmere), Intel 2nd Gen Core i3/i5/i7 (Sandybridge), Intel 3rd Gen
+Core i3/i5/i7 (Ivybridge), Intel 4th Gen Core i3/i5/i7 (Haswell), Intel 5th
+Gen Core i3/i5/i7 (Broadwell), and the low power Silvermont series of Atom
+processors (Silvermont). It also offers the compiler the 'native' flag.
+
+Small but real speed increases are measurable using a make endpoint comparing
+a generic kernel to one built with one of the respective microarchs.
+
+See the following experimental evidence supporting this statement:
+https://github.com/graysky2/kernel_gcc_patch
+
+REQUIREMENTS
+linux version >=3.15
+gcc version >=4.9
+
+--- a/arch/x86/include/asm/module.h	2014-06-16 16:44:27.000000000 -0400
++++ b/arch/x86/include/asm/module.h	2015-03-07 03:27:32.556672424 -0500
+@@ -15,6 +15,22 @@
+ #define MODULE_PROC_FAMILY "586MMX "
+ #elif defined CONFIG_MCORE2
+ #define MODULE_PROC_FAMILY "CORE2 "
++#elif defined CONFIG_MNATIVE
++#define MODULE_PROC_FAMILY "NATIVE "
++#elif defined CONFIG_MNEHALEM
++#define MODULE_PROC_FAMILY "NEHALEM "
++#elif defined CONFIG_MWESTMERE
++#define MODULE_PROC_FAMILY "WESTMERE "
++#elif defined CONFIG_MSILVERMONT
++#define MODULE_PROC_FAMILY "SILVERMONT "
++#elif defined CONFIG_MSANDYBRIDGE
++#define MODULE_PROC_FAMILY "SANDYBRIDGE "
++#elif defined CONFIG_MIVYBRIDGE
++#define MODULE_PROC_FAMILY "IVYBRIDGE "
++#elif defined CONFIG_MHASWELL
++#define MODULE_PROC_FAMILY "HASWELL "
++#elif defined CONFIG_MBROADWELL
++#define MODULE_PROC_FAMILY "BROADWELL "
+ #elif defined CONFIG_MATOM
+ #define MODULE_PROC_FAMILY "ATOM "
+ #elif defined CONFIG_M686
+@@ -33,6 +49,20 @@
+ #define MODULE_PROC_FAMILY "K7 "
+ #elif defined CONFIG_MK8
+ #define MODULE_PROC_FAMILY "K8 "
++#elif defined CONFIG_MK8SSE3
++#define MODULE_PROC_FAMILY "K8SSE3 "
++#elif defined CONFIG_MK10
++#define MODULE_PROC_FAMILY "K10 "
++#elif defined CONFIG_MBARCELONA
++#define MODULE_PROC_FAMILY "BARCELONA "
++#elif defined CONFIG_MBOBCAT
++#define MODULE_PROC_FAMILY "BOBCAT "
++#elif defined CONFIG_MBULLDOZER
++#define MODULE_PROC_FAMILY "BULLDOZER "
++#elif defined CONFIG_MPILEDRIVER
++#define MODULE_PROC_FAMILY "PILEDRIVER "
++#elif defined CONFIG_MJAGUAR
++#define MODULE_PROC_FAMILY "JAGUAR "
+ #elif defined CONFIG_MELAN
+ #define MODULE_PROC_FAMILY "ELAN "
+ #elif defined CONFIG_MCRUSOE
+--- a/arch/x86/Kconfig.cpu	2014-06-16 16:44:27.000000000 -0400
++++ b/arch/x86/Kconfig.cpu	2015-03-07 03:32:14.337713226 -0500
+@@ -137,9 +137,8 @@ config MPENTIUM4
+ 		-Paxville
+ 		-Dempsey
+ 
+-
+ config MK6
+-	bool "K6/K6-II/K6-III"
++	bool "AMD K6/K6-II/K6-III"
+ 	depends on X86_32
+ 	---help---
+ 	  Select this for an AMD K6-family processor.  Enables use of
+@@ -147,7 +146,7 @@ config MK6
+ 	  flags to GCC.
+ 
+ config MK7
+-	bool "Athlon/Duron/K7"
++	bool "AMD Athlon/Duron/K7"
+ 	depends on X86_32
+ 	---help---
+ 	  Select this for an AMD Athlon K7-family processor.  Enables use of
+@@ -155,12 +154,62 @@ config MK7
+ 	  flags to GCC.
+ 
+ config MK8
+-	bool "Opteron/Athlon64/Hammer/K8"
++	bool "AMD Opteron/Athlon64/Hammer/K8"
+ 	---help---
+ 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
+ 	  Enables use of some extended instructions, and passes appropriate
+ 	  optimization flags to GCC.
+ 
++config MK8SSE3
++	bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
++	---help---
++	  Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
++	  Enables use of some extended instructions, and passes appropriate
++	  optimization flags to GCC.
++
++config MK10
++	bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
++	---help---
++	  Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
++		Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
++	  Enables use of some extended instructions, and passes appropriate
++	  optimization flags to GCC.
++
++config MBARCELONA
++	bool "AMD Barcelona"
++	---help---
++	  Select this for AMD Barcelona and newer processors.
++
++	  Enables -march=barcelona
++
++config MBOBCAT
++	bool "AMD Bobcat"
++	---help---
++	  Select this for AMD Bobcat processors.
++
++	  Enables -march=btver1
++
++config MBULLDOZER
++	bool "AMD Bulldozer"
++	---help---
++	  Select this for AMD Bulldozer processors.
++
++	  Enables -march=bdver1
++
++config MPILEDRIVER
++	bool "AMD Piledriver"
++	---help---
++	  Select this for AMD Piledriver processors.
++
++	  Enables -march=bdver2
++
++config MJAGUAR
++	bool "AMD Jaguar"
++	---help---
++	  Select this for AMD Jaguar processors.
++
++	  Enables -march=btver2
++
+ config MCRUSOE
+ 	bool "Crusoe"
+ 	depends on X86_32
+@@ -251,8 +300,17 @@ config MPSC
+ 	  using the cpu family field
+ 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
+ 
++config MATOM
++	bool "Intel Atom"
++	---help---
++
++	  Select this for the Intel Atom platform. Intel Atom CPUs have an
++	  in-order pipelining architecture and thus can benefit from
++	  accordingly optimized code. Use a recent GCC with specific Atom
++	  support in order to fully benefit from selecting this option.
++
+ config MCORE2
+-	bool "Core 2/newer Xeon"
++	bool "Intel Core 2"
+ 	---help---
+ 
+ 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
+@@ -260,14 +318,63 @@ config MCORE2
+ 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
+ 	  (not a typo)
+ 
+-config MATOM
+-	bool "Intel Atom"
++	  Enables -march=core2
++
++config MNEHALEM
++	bool "Intel Nehalem"
+ 	---help---
+ 
+-	  Select this for the Intel Atom platform. Intel Atom CPUs have an
+-	  in-order pipelining architecture and thus can benefit from
+-	  accordingly optimized code. Use a recent GCC with specific Atom
+-	  support in order to fully benefit from selecting this option.
++	  Select this for 1st Gen Core processors in the Nehalem family.
++
++	  Enables -march=nehalem
++
++config MWESTMERE
++	bool "Intel Westmere"
++	---help---
++
++	  Select this for the Intel Westmere formerly Nehalem-C family.
++
++	  Enables -march=westmere
++
++config MSILVERMONT
++	bool "Intel Silvermont"
++	---help---
++
++	  Select this for the Intel Silvermont platform.
++
++	  Enables -march=silvermont
++
++config MSANDYBRIDGE
++	bool "Intel Sandy Bridge"
++	---help---
++
++	  Select this for 2nd Gen Core processors in the Sandy Bridge family.
++
++	  Enables -march=sandybridge
++
++config MIVYBRIDGE
++	bool "Intel Ivy Bridge"
++	---help---
++
++	  Select this for 3rd Gen Core processors in the Ivy Bridge family.
++
++	  Enables -march=ivybridge
++
++config MHASWELL
++	bool "Intel Haswell"
++	---help---
++
++	  Select this for 4th Gen Core processors in the Haswell family.
++
++	  Enables -march=haswell
++
++config MBROADWELL
++	bool "Intel Broadwell"
++	---help---
++
++	  Select this for 5th Gen Core processors in the Broadwell family.
++
++	  Enables -march=broadwell
+ 
+ config GENERIC_CPU
+ 	bool "Generic-x86-64"
+@@ -276,6 +383,19 @@ config GENERIC_CPU
+ 	  Generic x86-64 CPU.
+ 	  Run equally well on all x86-64 CPUs.
+ 
++config MNATIVE
++ bool "Native optimizations autodetected by GCC"
++ ---help---
++
++   GCC 4.2 and above support -march=native, which automatically detects
++   the optimum settings to use based on your processor. -march=native 
++   also detects and applies additional settings beyond -march specific
++   to your CPU, (eg. -msse4). Unless you have a specific reason not to
++   (e.g. distcc cross-compiling), you should probably be using
++   -march=native rather than anything listed below.
++
++   Enables -march=native
++
+ endchoice
+ 
+ config X86_GENERIC
+@@ -300,7 +420,7 @@ config X86_INTERNODE_CACHE_SHIFT
+ config X86_L1_CACHE_SHIFT
+ 	int
+ 	default "7" if MPENTIUM4 || MPSC
+-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
++	default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
+ 	default "4" if MELAN || M486 || MGEODEGX1
+ 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
+ 
+@@ -331,11 +451,11 @@ config X86_ALIGNMENT_16
+ 
+ config X86_INTEL_USERCOPY
+ 	def_bool y
+-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
++	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE
+ 
+ config X86_USE_PPRO_CHECKSUM
+ 	def_bool y
+-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
++	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MATOM || MNATIVE
+ 
+ config X86_USE_3DNOW
+ 	def_bool y
+@@ -359,17 +479,17 @@ config X86_P6_NOP
+ 
+ config X86_TSC
+ 	def_bool y
+-	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
++	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE || MATOM) || X86_64
+ 
+ config X86_CMPXCHG64
+ 	def_bool y
+-	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
++	depends on X86_PAE || X86_64 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
+ 
+ # this should be set for all -march=.. options where the compiler
+ # generates cmov.
+ config X86_CMOV
+ 	def_bool y
+-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
++	depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
+ 
+ config X86_MINIMUM_CPU_FAMILY
+ 	int
+--- a/arch/x86/Makefile	2014-06-16 16:44:27.000000000 -0400
++++ b/arch/x86/Makefile	2015-03-07 03:33:27.650843211 -0500
+@@ -92,13 +92,35 @@ else
+ 	KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
+ 
+         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
++        cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
+         cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
++        cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8)
++        cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
++        cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
++        cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
++        cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
++        cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
++        cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
+         cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+ 
+         cflags-$(CONFIG_MCORE2) += \
+-                $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
+-	cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
+-		$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
++                $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
++        cflags-$(CONFIG_MNEHALEM) += \
++                $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem))
++        cflags-$(CONFIG_MWESTMERE) += \
++                $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere))
++        cflags-$(CONFIG_MSILVERMONT) += \
++                $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont))
++        cflags-$(CONFIG_MSANDYBRIDGE) += \
++                $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge))
++        cflags-$(CONFIG_MIVYBRIDGE) += \
++                $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge))
++        cflags-$(CONFIG_MHASWELL) += \
++                $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell))
++        cflags-$(CONFIG_MBROADWELL) += \
++                $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell))
++        cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \
++                $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
+         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
+         KBUILD_CFLAGS += $(cflags-y)
+ 
+--- a/arch/x86/Makefile_32.cpu	2014-06-16 16:44:27.000000000 -0400
++++ b/arch/x86/Makefile_32.cpu	2015-03-07 03:34:15.203586024 -0500
+@@ -23,7 +23,15 @@ cflags-$(CONFIG_MK6)		+= -march=k6
+ # Please note, that patches that add -march=athlon-xp and friends are pointless.
+ # They make zero difference whatsosever to performance at this time.
+ cflags-$(CONFIG_MK7)		+= -march=athlon
++cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
+ cflags-$(CONFIG_MK8)		+= $(call cc-option,-march=k8,-march=athlon)
++cflags-$(CONFIG_MK8SSE3)		+= $(call cc-option,-march=k8-sse3,-march=athlon)
++cflags-$(CONFIG_MK10)	+= $(call cc-option,-march=amdfam10,-march=athlon)
++cflags-$(CONFIG_MBARCELONA)	+= $(call cc-option,-march=barcelona,-march=athlon)
++cflags-$(CONFIG_MBOBCAT)	+= $(call cc-option,-march=btver1,-march=athlon)
++cflags-$(CONFIG_MBULLDOZER)	+= $(call cc-option,-march=bdver1,-march=athlon)
++cflags-$(CONFIG_MPILEDRIVER)	+= $(call cc-option,-march=bdver2,-march=athlon)
++cflags-$(CONFIG_MJAGUAR)	+= $(call cc-option,-march=btver2,-march=athlon)
+ cflags-$(CONFIG_MCRUSOE)	+= -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+ cflags-$(CONFIG_MEFFICEON)	+= -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
+ cflags-$(CONFIG_MWINCHIPC6)	+= $(call cc-option,-march=winchip-c6,-march=i586)
+@@ -32,8 +40,15 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-
+ cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
+ cflags-$(CONFIG_MVIAC7)		+= -march=i686
+ cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
+-cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
+-	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
++cflags-$(CONFIG_MNEHALEM)	+= -march=i686 $(call tune,nehalem)
++cflags-$(CONFIG_MWESTMERE)	+= -march=i686 $(call tune,westmere)
++cflags-$(CONFIG_MSILVERMONT)	+= -march=i686 $(call tune,silvermont)
++cflags-$(CONFIG_MSANDYBRIDGE)	+= -march=i686 $(call tune,sandybridge)
++cflags-$(CONFIG_MIVYBRIDGE)	+= -march=i686 $(call tune,ivybridge)
++cflags-$(CONFIG_MHASWELL)	+= -march=i686 $(call tune,haswell)
++cflags-$(CONFIG_MBROADWELL)	+= -march=i686 $(call tune,broadwell)
++cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \
++	$(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
+ 
+ # AMD Elan support
+ cflags-$(CONFIG_MELAN)		+= -march=i486
+


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-06-02 19:39 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-06-02 19:39 UTC (permalink / raw
  To: gentoo-commits

commit:     dc34aa235aa823a2bb5b439479041daca6a93749
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Thu Jun  2 19:39:11 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Thu Jun  2 19:39:11 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=dc34aa23

Linux patch 4.6.1

 0000_README            |    4 +
 1000_linux-4.6.1.patch | 4584 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 4588 insertions(+)

diff --git a/0000_README b/0000_README
index 8e70e78..220d627 100644
--- a/0000_README
+++ b/0000_README
@@ -43,6 +43,10 @@ EXPERIMENTAL
 Individual Patch Descriptions:
 --------------------------------------------------------------------------
 
+Patch:  1000_linux-4.6.1.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.6.1
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1000_linux-4.6.1.patch b/1000_linux-4.6.1.patch
new file mode 100644
index 0000000..7282235
--- /dev/null
+++ b/1000_linux-4.6.1.patch
@@ -0,0 +1,4584 @@
+diff --git a/Documentation/serial/tty.txt b/Documentation/serial/tty.txt
+index 798cba82c762..b48780977a68 100644
+--- a/Documentation/serial/tty.txt
++++ b/Documentation/serial/tty.txt
+@@ -210,9 +210,6 @@ TTY_IO_ERROR		If set, causes all subsequent userspace read/write
+ 
+ TTY_OTHER_CLOSED	Device is a pty and the other side has closed.
+ 
+-TTY_OTHER_DONE		Device is a pty and the other side has closed and
+-			all pending input processing has been completed.
+-
+ TTY_NO_WRITE_SPLIT	Prevent driver from splitting up writes into
+ 			smaller chunks.
+ 
+diff --git a/Makefile b/Makefile
+index 0f9cb36d45c2..2fcc41ea99a3 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 6
+-SUBLEVEL = 0
++SUBLEVEL = 1
+ EXTRAVERSION =
+ NAME = Charred Weasel
+ 
+@@ -697,9 +697,10 @@ KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
+ KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+ else
+ 
+-# This warning generated too much noise in a regular build.
+-# Use make W=1 to enable this warning (see scripts/Makefile.build)
++# These warnings generated too much noise in a regular build.
++# Use make W=1 to enable them (see scripts/Makefile.build)
+ KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
++KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+ endif
+ 
+ ifdef CONFIG_FRAME_POINTER
+diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
+index d6d4191e68f2..dea1452a8419 100644
+--- a/arch/arm/kvm/mmu.c
++++ b/arch/arm/kvm/mmu.c
+@@ -893,11 +893,14 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+ 	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
+ 
+ 	old_pmd = *pmd;
+-	kvm_set_pmd(pmd, *new_pmd);
+-	if (pmd_present(old_pmd))
++	if (pmd_present(old_pmd)) {
++		pmd_clear(pmd);
+ 		kvm_tlb_flush_vmid_ipa(kvm, addr);
+-	else
++	} else {
+ 		get_page(virt_to_page(pmd));
++	}
++
++	kvm_set_pmd(pmd, *new_pmd);
+ 	return 0;
+ }
+ 
+@@ -946,12 +949,14 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ 
+ 	/* Create 2nd stage page table mapping - Level 3 */
+ 	old_pte = *pte;
+-	kvm_set_pte(pte, *new_pte);
+-	if (pte_present(old_pte))
++	if (pte_present(old_pte)) {
++		kvm_set_pte(pte, __pte(0));
+ 		kvm_tlb_flush_vmid_ipa(kvm, addr);
+-	else
++	} else {
+ 		get_page(virt_to_page(pte));
++	}
+ 
++	kvm_set_pte(pte, *new_pte);
+ 	return 0;
+ }
+ 
+diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
+index 5c25b831273d..9786f770088d 100644
+--- a/arch/arm64/include/asm/pgtable-hwdef.h
++++ b/arch/arm64/include/asm/pgtable-hwdef.h
+@@ -133,7 +133,6 @@
+  * Section
+  */
+ #define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+-#define PMD_SECT_PROT_NONE	(_AT(pmdval_t, 1) << 58)
+ #define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+ #define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
+ #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
+diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
+index 989fef16d461..44430ce5819e 100644
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -280,6 +280,7 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot)
+ #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ 
++#define pmd_present(pmd)	pte_present(pmd_pte(pmd))
+ #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
+ #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+ #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+@@ -288,7 +289,7 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot)
+ #define pmd_mkclean(pmd)       pte_pmd(pte_mkclean(pmd_pte(pmd)))
+ #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+ #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+-#define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
++#define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
+ 
+ #define __HAVE_ARCH_PMD_WRITE
+ #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+@@ -327,7 +328,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ 				     unsigned long size, pgprot_t vma_prot);
+ 
+ #define pmd_none(pmd)		(!pmd_val(pmd))
+-#define pmd_present(pmd)	(pmd_val(pmd))
+ 
+ #define pmd_bad(pmd)		(!(pmd_val(pmd) & 2))
+ 
+@@ -526,6 +526,21 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ }
+ 
+ #ifdef CONFIG_ARM64_HW_AFDBM
++#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
++extern int ptep_set_access_flags(struct vm_area_struct *vma,
++				 unsigned long address, pte_t *ptep,
++				 pte_t entry, int dirty);
++
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
++static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
++					unsigned long address, pmd_t *pmdp,
++					pmd_t entry, int dirty)
++{
++	return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
++}
++#endif
++
+ /*
+  * Atomic pte/pmd modifications.
+  */
+@@ -578,9 +593,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ }
+ 
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+-static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+-				       unsigned long address, pmd_t *pmdp)
++#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
++static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
++					    unsigned long address, pmd_t *pmdp)
+ {
+ 	return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
+ }
+diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
+index 84c8684431c7..f0c3fb7ec8cf 100644
+--- a/arch/arm64/kernel/cpuinfo.c
++++ b/arch/arm64/kernel/cpuinfo.c
+@@ -87,7 +87,8 @@ static const char *const compat_hwcap_str[] = {
+ 	"idivt",
+ 	"vfpd32",
+ 	"lpae",
+-	"evtstrm"
++	"evtstrm",
++	NULL
+ };
+ 
+ static const char *const compat_hwcap2_str[] = {
+diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
+index 4d1ac81870d2..e9e0e6db73f6 100644
+--- a/arch/arm64/kvm/inject_fault.c
++++ b/arch/arm64/kvm/inject_fault.c
+@@ -162,7 +162,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
+ 		esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT);
+ 
+ 	if (!is_iabt)
+-		esr |= ESR_ELx_EC_DABT_LOW;
++		esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
+ 
+ 	vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
+ }
+diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
+index 95df28bc875f..3ae4a28c4aed 100644
+--- a/arch/arm64/mm/fault.c
++++ b/arch/arm64/mm/fault.c
+@@ -81,6 +81,56 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
+ 	printk("\n");
+ }
+ 
++#ifdef CONFIG_ARM64_HW_AFDBM
++/*
++ * This function sets the access flags (dirty, accessed), as well as write
++ * permission, and only to a more permissive setting.
++ *
++ * It needs to cope with hardware update of the accessed/dirty state by other
++ * agents in the system and can safely skip the __sync_icache_dcache() call as,
++ * like set_pte_at(), the PTE is never changed from no-exec to exec here.
++ *
++ * Returns whether or not the PTE actually changed.
++ */
++int ptep_set_access_flags(struct vm_area_struct *vma,
++			  unsigned long address, pte_t *ptep,
++			  pte_t entry, int dirty)
++{
++	pteval_t old_pteval;
++	unsigned int tmp;
++
++	if (pte_same(*ptep, entry))
++		return 0;
++
++	/* only preserve the access flags and write permission */
++	pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
++
++	/*
++	 * PTE_RDONLY is cleared by default in the asm below, so set it in
++	 * back if necessary (read-only or clean PTE).
++	 */
++	if (!pte_write(entry) || !dirty)
++		pte_val(entry) |= PTE_RDONLY;
++
++	/*
++	 * Setting the flags must be done atomically to avoid racing with the
++	 * hardware update of the access/dirty state.
++	 */
++	asm volatile("//	ptep_set_access_flags\n"
++	"	prfm	pstl1strm, %2\n"
++	"1:	ldxr	%0, %2\n"
++	"	and	%0, %0, %3		// clear PTE_RDONLY\n"
++	"	orr	%0, %0, %4		// set flags\n"
++	"	stxr	%w1, %0, %2\n"
++	"	cbnz	%w1, 1b\n"
++	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
++	: "L" (~PTE_RDONLY), "r" (pte_val(entry)));
++
++	flush_tlb_fix_spurious_fault(vma, address);
++	return 1;
++}
++#endif
++
+ /*
+  * The kernel tried to access some page that wasn't present.
+  */
+diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
+index f6b12790716c..942b8f6bf35b 100644
+--- a/arch/mips/include/asm/kvm_host.h
++++ b/arch/mips/include/asm/kvm_host.h
+@@ -747,7 +747,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
+ 
+ uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu);
+ void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count);
+-void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare);
++void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack);
+ void kvm_mips_init_count(struct kvm_vcpu *vcpu);
+ int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
+ int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
+diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
+index b37954cc880d..b8b7860ec1a8 100644
+--- a/arch/mips/kvm/emulate.c
++++ b/arch/mips/kvm/emulate.c
+@@ -302,12 +302,31 @@ static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu)
+  */
+ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
+ {
+-	ktime_t expires;
++	struct mips_coproc *cop0 = vcpu->arch.cop0;
++	ktime_t expires, threshold;
++	uint32_t count, compare;
+ 	int running;
+ 
+-	/* Is the hrtimer pending? */
++	/* Calculate the biased and scaled guest CP0_Count */
++	count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
++	compare = kvm_read_c0_guest_compare(cop0);
++
++	/*
++	 * Find whether CP0_Count has reached the closest timer interrupt. If
++	 * not, we shouldn't inject it.
++	 */
++	if ((int32_t)(count - compare) < 0)
++		return count;
++
++	/*
++	 * The CP0_Count we're going to return has already reached the closest
++	 * timer interrupt. Quickly check if it really is a new interrupt by
++	 * looking at whether the interval until the hrtimer expiry time is
++	 * less than 1/4 of the timer period.
++	 */
+ 	expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer);
+-	if (ktime_compare(now, expires) >= 0) {
++	threshold = ktime_add_ns(now, vcpu->arch.count_period / 4);
++	if (ktime_before(expires, threshold)) {
+ 		/*
+ 		 * Cancel it while we handle it so there's no chance of
+ 		 * interference with the timeout handler.
+@@ -329,8 +348,7 @@ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
+ 		}
+ 	}
+ 
+-	/* Return the biased and scaled guest CP0_Count */
+-	return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
++	return count;
+ }
+ 
+ /**
+@@ -420,32 +438,6 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
+ }
+ 
+ /**
+- * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer.
+- * @vcpu:	Virtual CPU.
+- *
+- * Recalculates and updates the expiry time of the hrtimer. This can be used
+- * after timer parameters have been altered which do not depend on the time that
+- * the change occurs (in those cases kvm_mips_freeze_hrtimer() and
+- * kvm_mips_resume_hrtimer() are used directly).
+- *
+- * It is guaranteed that no timer interrupts will be lost in the process.
+- *
+- * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
+- */
+-static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu)
+-{
+-	ktime_t now;
+-	uint32_t count;
+-
+-	/*
+-	 * freeze_hrtimer takes care of a timer interrupts <= count, and
+-	 * resume_hrtimer the hrtimer takes care of a timer interrupts > count.
+-	 */
+-	now = kvm_mips_freeze_hrtimer(vcpu, &count);
+-	kvm_mips_resume_hrtimer(vcpu, now, count);
+-}
+-
+-/**
+  * kvm_mips_write_count() - Modify the count and update timer.
+  * @vcpu:	Virtual CPU.
+  * @count:	Guest CP0_Count value to set.
+@@ -540,23 +532,42 @@ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
+  * kvm_mips_write_compare() - Modify compare and update timer.
+  * @vcpu:	Virtual CPU.
+  * @compare:	New CP0_Compare value.
++ * @ack:	Whether to acknowledge timer interrupt.
+  *
+  * Update CP0_Compare to a new value and update the timeout.
++ * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure
++ * any pending timer interrupt is preserved.
+  */
+-void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare)
++void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack)
+ {
+ 	struct mips_coproc *cop0 = vcpu->arch.cop0;
++	int dc;
++	u32 old_compare = kvm_read_c0_guest_compare(cop0);
++	ktime_t now;
++	uint32_t count;
+ 
+ 	/* if unchanged, must just be an ack */
+-	if (kvm_read_c0_guest_compare(cop0) == compare)
++	if (old_compare == compare) {
++		if (!ack)
++			return;
++		kvm_mips_callbacks->dequeue_timer_int(vcpu);
++		kvm_write_c0_guest_compare(cop0, compare);
+ 		return;
++	}
++
++	/* freeze_hrtimer() takes care of timer interrupts <= count */
++	dc = kvm_mips_count_disabled(vcpu);
++	if (!dc)
++		now = kvm_mips_freeze_hrtimer(vcpu, &count);
++
++	if (ack)
++		kvm_mips_callbacks->dequeue_timer_int(vcpu);
+ 
+-	/* Update compare */
+ 	kvm_write_c0_guest_compare(cop0, compare);
+ 
+-	/* Update timeout if count enabled */
+-	if (!kvm_mips_count_disabled(vcpu))
+-		kvm_mips_update_hrtimer(vcpu);
++	/* resume_hrtimer() takes care of timer interrupts > count */
++	if (!dc)
++		kvm_mips_resume_hrtimer(vcpu, now, count);
+ }
+ 
+ /**
+@@ -1095,9 +1106,9 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
+ 
+ 				/* If we are writing to COMPARE */
+ 				/* Clear pending timer interrupt, if any */
+-				kvm_mips_callbacks->dequeue_timer_int(vcpu);
+ 				kvm_mips_write_compare(vcpu,
+-						       vcpu->arch.gprs[rt]);
++						       vcpu->arch.gprs[rt],
++						       true);
+ 			} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
+ 				unsigned int old_val, val, change;
+ 
+diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
+index c4038d2a724c..caa5ea1038a0 100644
+--- a/arch/mips/kvm/trap_emul.c
++++ b/arch/mips/kvm/trap_emul.c
+@@ -546,7 +546,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
+ 		kvm_mips_write_count(vcpu, v);
+ 		break;
+ 	case KVM_REG_MIPS_CP0_COMPARE:
+-		kvm_mips_write_compare(vcpu, v);
++		kvm_mips_write_compare(vcpu, v, false);
+ 		break;
+ 	case KVM_REG_MIPS_CP0_CAUSE:
+ 		/*
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index 84fb4fcfaa41..93243554cae9 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -27,6 +27,7 @@
+ #include <linux/export.h>
+ #include <linux/fs.h>
+ #include <linux/anon_inodes.h>
++#include <linux/cpu.h>
+ #include <linux/cpumask.h>
+ #include <linux/spinlock.h>
+ #include <linux/page-flags.h>
+diff --git a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
+index 8e1b47792b31..c9dae1cd2919 100644
+--- a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
++++ b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
+@@ -296,7 +296,11 @@ W14  = TMP_
+ #
+ ENTRY(sha1_x8_avx2)
+ 
+-	push	RSP_SAVE
++	# save callee-saved clobbered registers to comply with C function ABI
++	push	%r12
++	push	%r13
++	push	%r14
++	push	%r15
+ 
+ 	#save rsp
+ 	mov	%rsp, RSP_SAVE
+@@ -446,7 +450,12 @@ lloop:
+ 	## Postamble
+ 
+ 	mov     RSP_SAVE, %rsp
+-	pop	RSP_SAVE
++
++	# restore callee-saved clobbered registers
++	pop	%r15
++	pop	%r14
++	pop	%r13
++	pop	%r12
+ 
+ 	ret
+ ENDPROC(sha1_x8_avx2)
+diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
+index 7012d18bb293..f921a1ed43f7 100644
+--- a/arch/x86/events/intel/uncore.c
++++ b/arch/x86/events/intel/uncore.c
+@@ -888,7 +888,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
+ 		return -ENODEV;
+ 
+ 	pkg = topology_phys_to_logical_pkg(phys_id);
+-	if (WARN_ON_ONCE(pkg < 0))
++	if (pkg < 0)
+ 		return -EINVAL;
+ 
+ 	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index 3636ec06c887..aeab47932933 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -63,9 +63,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ 	   (((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) ||	\
+ 	   (((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) ||	\
+ 	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) ||	\
+-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) ||	\
+-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) ||	\
+-	   (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
++	   (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK14)) ||	\
++	   (((bit)>>5)==15 && (1UL<<((bit)&31) & REQUIRED_MASK15)) ||	\
++	   (((bit)>>5)==16 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
+ 
+ #define DISABLED_MASK_BIT_SET(bit)					\
+ 	 ( (((bit)>>5)==0  && (1UL<<((bit)&31) & DISABLED_MASK0 )) ||	\
+@@ -82,9 +82,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ 	   (((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) ||	\
+ 	   (((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) ||	\
+ 	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) ||	\
+-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) ||	\
+-	   (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) ||	\
+-	   (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
++	   (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK14)) ||	\
++	   (((bit)>>5)==15 && (1UL<<((bit)&31) & DISABLED_MASK15)) ||	\
++	   (((bit)>>5)==16 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
+ 
+ #define cpu_has(c, bit)							\
+ 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
+index 39343be7d4f4..911e9358ceb1 100644
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -29,11 +29,11 @@
+ #endif /* CONFIG_X86_64 */
+ 
+ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+-# define DISABLE_PKU		(1<<(X86_FEATURE_PKU))
+-# define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE))
+-#else
+ # define DISABLE_PKU		0
+ # define DISABLE_OSPKE		0
++#else
++# define DISABLE_PKU		(1<<(X86_FEATURE_PKU & 31))
++# define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE & 31))
+ #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
+ 
+ /*
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 8394b3d1f94f..f45a4b9d28c8 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -310,6 +310,10 @@ static bool pku_disabled;
+ 
+ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
+ {
++	/* check the boot processor, plus compile options for PKU: */
++	if (!cpu_feature_enabled(X86_FEATURE_PKU))
++		return;
++	/* checks the actual processor's cpuid bits: */
+ 	if (!cpu_has(c, X86_FEATURE_PKU))
+ 		return;
+ 	if (pku_disabled)
+diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
+index 3f8c732117ec..c146f3c262c3 100644
+--- a/arch/x86/kvm/mtrr.c
++++ b/arch/x86/kvm/mtrr.c
+@@ -44,8 +44,6 @@ static bool msr_mtrr_valid(unsigned msr)
+ 	case MSR_MTRRdefType:
+ 	case MSR_IA32_CR_PAT:
+ 		return true;
+-	case 0x2f8:
+-		return true;
+ 	}
+ 	return false;
+ }
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 133679d520af..faf52bac1416 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -5050,8 +5050,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+ 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+ 
+ 	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+-	vmx_set_cr0(vcpu, cr0); /* enter rmode */
+ 	vmx->vcpu.arch.cr0 = cr0;
++	vmx_set_cr0(vcpu, cr0); /* enter rmode */
+ 	vmx_set_cr4(vcpu, 0);
+ 	vmx_set_efer(vcpu, 0);
+ 	vmx_fpu_activate(vcpu);
+diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
+index beac4dfdade6..349b8ce92bf2 100644
+--- a/arch/x86/pci/xen.c
++++ b/arch/x86/pci/xen.c
+@@ -491,8 +491,11 @@ int __init pci_xen_initial_domain(void)
+ #endif
+ 	__acpi_register_gsi = acpi_register_gsi_xen;
+ 	__acpi_unregister_gsi = NULL;
+-	/* Pre-allocate legacy irqs */
+-	for (irq = 0; irq < nr_legacy_irqs(); irq++) {
++	/*
++	 * Pre-allocate the legacy IRQs.  Use NR_LEGACY_IRQS here
++	 * because we don't have a PIC and thus nr_legacy_irqs() is zero.
++	 */
++	for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
+ 		int trigger, polarity;
+ 
+ 		if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
+diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
+index cd2c3d6d40e0..993fd31394c8 100644
+--- a/drivers/acpi/device_pm.c
++++ b/drivers/acpi/device_pm.c
+@@ -319,6 +319,7 @@ int acpi_device_fix_up_power(struct acpi_device *device)
+ 
+ 	return ret;
+ }
++EXPORT_SYMBOL_GPL(acpi_device_fix_up_power);
+ 
+ int acpi_device_update_power(struct acpi_device *device, int *state_p)
+ {
+diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
+index 814d5f83b75e..f03677588b9d 100644
+--- a/drivers/acpi/osl.c
++++ b/drivers/acpi/osl.c
+@@ -135,7 +135,7 @@ static struct osi_linux {
+ 	unsigned int	enable:1;
+ 	unsigned int	dmi:1;
+ 	unsigned int	cmdline:1;
+-	unsigned int	default_disabling:1;
++	u8		default_disabling;
+ } osi_linux = {0, 0, 0, 0};
+ 
+ static u32 acpi_osi_handler(acpi_string interface, u32 supported)
+@@ -1751,10 +1751,13 @@ void __init acpi_osi_setup(char *str)
+ 	if (*str == '!') {
+ 		str++;
+ 		if (*str == '\0') {
+-			osi_linux.default_disabling = 1;
++			/* Do not override acpi_osi=!* */
++			if (!osi_linux.default_disabling)
++				osi_linux.default_disabling =
++					ACPI_DISABLE_ALL_VENDOR_STRINGS;
+ 			return;
+ 		} else if (*str == '*') {
+-			acpi_update_interfaces(ACPI_DISABLE_ALL_STRINGS);
++			osi_linux.default_disabling = ACPI_DISABLE_ALL_STRINGS;
+ 			for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
+ 				osi = &osi_setup_entries[i];
+ 				osi->enable = false;
+@@ -1827,10 +1830,13 @@ static void __init acpi_osi_setup_late(void)
+ 	acpi_status status;
+ 
+ 	if (osi_linux.default_disabling) {
+-		status = acpi_update_interfaces(ACPI_DISABLE_ALL_VENDOR_STRINGS);
++		status = acpi_update_interfaces(osi_linux.default_disabling);
+ 
+ 		if (ACPI_SUCCESS(status))
+-			printk(KERN_INFO PREFIX "Disabled all _OSI OS vendors\n");
++			printk(KERN_INFO PREFIX "Disabled all _OSI OS vendors%s\n",
++				osi_linux.default_disabling ==
++				ACPI_DISABLE_ALL_STRINGS ?
++				" and feature groups" : "");
+ 	}
+ 
+ 	for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
+diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
+index 80783dcb7f57..aba31210c802 100644
+--- a/drivers/bluetooth/hci_vhci.c
++++ b/drivers/bluetooth/hci_vhci.c
+@@ -50,6 +50,7 @@ struct vhci_data {
+ 	wait_queue_head_t read_wait;
+ 	struct sk_buff_head readq;
+ 
++	struct mutex open_mutex;
+ 	struct delayed_work open_timeout;
+ };
+ 
+@@ -87,12 +88,15 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
+ 	return 0;
+ }
+ 
+-static int vhci_create_device(struct vhci_data *data, __u8 opcode)
++static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
+ {
+ 	struct hci_dev *hdev;
+ 	struct sk_buff *skb;
+ 	__u8 dev_type;
+ 
++	if (data->hdev)
++		return -EBADFD;
++
+ 	/* bits 0-1 are dev_type (BR/EDR or AMP) */
+ 	dev_type = opcode & 0x03;
+ 
+@@ -151,6 +155,17 @@ static int vhci_create_device(struct vhci_data *data, __u8 opcode)
+ 	return 0;
+ }
+ 
++static int vhci_create_device(struct vhci_data *data, __u8 opcode)
++{
++	int err;
++
++	mutex_lock(&data->open_mutex);
++	err = __vhci_create_device(data, opcode);
++	mutex_unlock(&data->open_mutex);
++
++	return err;
++}
++
+ static inline ssize_t vhci_get_user(struct vhci_data *data,
+ 				    struct iov_iter *from)
+ {
+@@ -189,11 +204,6 @@ static inline ssize_t vhci_get_user(struct vhci_data *data,
+ 		break;
+ 
+ 	case HCI_VENDOR_PKT:
+-		if (data->hdev) {
+-			kfree_skb(skb);
+-			return -EBADFD;
+-		}
+-
+ 		cancel_delayed_work_sync(&data->open_timeout);
+ 
+ 		opcode = *((__u8 *) skb->data);
+@@ -320,6 +330,7 @@ static int vhci_open(struct inode *inode, struct file *file)
+ 	skb_queue_head_init(&data->readq);
+ 	init_waitqueue_head(&data->read_wait);
+ 
++	mutex_init(&data->open_mutex);
+ 	INIT_DELAYED_WORK(&data->open_timeout, vhci_open_timeout);
+ 
+ 	file->private_data = data;
+@@ -333,15 +344,18 @@ static int vhci_open(struct inode *inode, struct file *file)
+ static int vhci_release(struct inode *inode, struct file *file)
+ {
+ 	struct vhci_data *data = file->private_data;
+-	struct hci_dev *hdev = data->hdev;
++	struct hci_dev *hdev;
+ 
+ 	cancel_delayed_work_sync(&data->open_timeout);
+ 
++	hdev = data->hdev;
++
+ 	if (hdev) {
+ 		hci_unregister_dev(hdev);
+ 		hci_free_dev(hdev);
+ 	}
+ 
++	skb_queue_purge(&data->readq);
+ 	file->private_data = NULL;
+ 	kfree(data);
+ 
+diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
+index c74ed3fd496d..90338c38e38a 100644
+--- a/drivers/clk/bcm/clk-bcm2835.c
++++ b/drivers/clk/bcm/clk-bcm2835.c
+@@ -1079,10 +1079,12 @@ static void bcm2835_pll_divider_off(struct clk_hw *hw)
+ 	struct bcm2835_cprman *cprman = divider->cprman;
+ 	const struct bcm2835_pll_divider_data *data = divider->data;
+ 
++	spin_lock(&cprman->regs_lock);
+ 	cprman_write(cprman, data->cm_reg,
+ 		     (cprman_read(cprman, data->cm_reg) &
+ 		      ~data->load_mask) | data->hold_mask);
+ 	cprman_write(cprman, data->a2w_reg, A2W_PLL_CHANNEL_DISABLE);
++	spin_unlock(&cprman->regs_lock);
+ }
+ 
+ static int bcm2835_pll_divider_on(struct clk_hw *hw)
+@@ -1091,12 +1093,14 @@ static int bcm2835_pll_divider_on(struct clk_hw *hw)
+ 	struct bcm2835_cprman *cprman = divider->cprman;
+ 	const struct bcm2835_pll_divider_data *data = divider->data;
+ 
++	spin_lock(&cprman->regs_lock);
+ 	cprman_write(cprman, data->a2w_reg,
+ 		     cprman_read(cprman, data->a2w_reg) &
+ 		     ~A2W_PLL_CHANNEL_DISABLE);
+ 
+ 	cprman_write(cprman, data->cm_reg,
+ 		     cprman_read(cprman, data->cm_reg) & ~data->hold_mask);
++	spin_unlock(&cprman->regs_lock);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/clk/qcom/gcc-msm8916.c b/drivers/clk/qcom/gcc-msm8916.c
+index 9c29080a84d8..5c4e193164d4 100644
+--- a/drivers/clk/qcom/gcc-msm8916.c
++++ b/drivers/clk/qcom/gcc-msm8916.c
+@@ -2346,6 +2346,7 @@ static struct clk_branch gcc_crypto_ahb_clk = {
+ 				"pcnoc_bfdcd_clk_src",
+ 			},
+ 			.num_parents = 1,
++			.flags = CLK_SET_RATE_PARENT,
+ 			.ops = &clk_branch2_ops,
+ 		},
+ 	},
+@@ -2381,6 +2382,7 @@ static struct clk_branch gcc_crypto_clk = {
+ 				"crypto_clk_src",
+ 			},
+ 			.num_parents = 1,
++			.flags = CLK_SET_RATE_PARENT,
+ 			.ops = &clk_branch2_ops,
+ 		},
+ 	},
+diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
+index 6fd63a600614..5ef4be22eb80 100644
+--- a/drivers/crypto/caam/jr.c
++++ b/drivers/crypto/caam/jr.c
+@@ -248,7 +248,7 @@ static void caam_jr_dequeue(unsigned long devarg)
+ struct device *caam_jr_alloc(void)
+ {
+ 	struct caam_drv_private_jr *jrpriv, *min_jrpriv = NULL;
+-	struct device *dev = NULL;
++	struct device *dev = ERR_PTR(-ENODEV);
+ 	int min_tfm_cnt	= INT_MAX;
+ 	int tfm_cnt;
+ 
+diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+index 7be3fbcd8d78..3830d7c4e138 100644
+--- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
++++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+@@ -35,6 +35,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
+ 	unsigned int todo;
+ 	struct sg_mapping_iter mi, mo;
+ 	unsigned int oi, oo; /* offset for in and out */
++	unsigned long flags;
+ 
+ 	if (areq->nbytes == 0)
+ 		return 0;
+@@ -49,7 +50,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
+ 		return -EINVAL;
+ 	}
+ 
+-	spin_lock_bh(&ss->slock);
++	spin_lock_irqsave(&ss->slock, flags);
+ 
+ 	for (i = 0; i < op->keylen; i += 4)
+ 		writel(*(op->key + i / 4), ss->base + SS_KEY0 + i);
+@@ -117,7 +118,7 @@ release_ss:
+ 	sg_miter_stop(&mi);
+ 	sg_miter_stop(&mo);
+ 	writel(0, ss->base + SS_CTL);
+-	spin_unlock_bh(&ss->slock);
++	spin_unlock_irqrestore(&ss->slock, flags);
+ 	return err;
+ }
+ 
+@@ -149,6 +150,7 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
+ 	unsigned int ob = 0;	/* offset in buf */
+ 	unsigned int obo = 0;	/* offset in bufo*/
+ 	unsigned int obl = 0;	/* length of data in bufo */
++	unsigned long flags;
+ 
+ 	if (areq->nbytes == 0)
+ 		return 0;
+@@ -181,7 +183,7 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
+ 	if (no_chunk == 1)
+ 		return sun4i_ss_opti_poll(areq);
+ 
+-	spin_lock_bh(&ss->slock);
++	spin_lock_irqsave(&ss->slock, flags);
+ 
+ 	for (i = 0; i < op->keylen; i += 4)
+ 		writel(*(op->key + i / 4), ss->base + SS_KEY0 + i);
+@@ -307,7 +309,7 @@ release_ss:
+ 	sg_miter_stop(&mi);
+ 	sg_miter_stop(&mo);
+ 	writel(0, ss->base + SS_CTL);
+-	spin_unlock_bh(&ss->slock);
++	spin_unlock_irqrestore(&ss->slock, flags);
+ 
+ 	return err;
+ }
+diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
+index aae05547b924..b7ee8d30147d 100644
+--- a/drivers/crypto/talitos.c
++++ b/drivers/crypto/talitos.c
+@@ -835,6 +835,16 @@ struct talitos_ahash_req_ctx {
+ 	struct scatterlist *psrc;
+ };
+ 
++struct talitos_export_state {
++	u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
++	u8 buf[HASH_MAX_BLOCK_SIZE];
++	unsigned int swinit;
++	unsigned int first;
++	unsigned int last;
++	unsigned int to_hash_later;
++	unsigned int nbuf;
++};
++
+ static int aead_setkey(struct crypto_aead *authenc,
+ 		       const u8 *key, unsigned int keylen)
+ {
+@@ -1981,6 +1991,46 @@ static int ahash_digest(struct ahash_request *areq)
+ 	return ahash_process_req(areq, areq->nbytes);
+ }
+ 
++static int ahash_export(struct ahash_request *areq, void *out)
++{
++	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++	struct talitos_export_state *export = out;
++
++	memcpy(export->hw_context, req_ctx->hw_context,
++	       req_ctx->hw_context_size);
++	memcpy(export->buf, req_ctx->buf, req_ctx->nbuf);
++	export->swinit = req_ctx->swinit;
++	export->first = req_ctx->first;
++	export->last = req_ctx->last;
++	export->to_hash_later = req_ctx->to_hash_later;
++	export->nbuf = req_ctx->nbuf;
++
++	return 0;
++}
++
++static int ahash_import(struct ahash_request *areq, const void *in)
++{
++	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++	const struct talitos_export_state *export = in;
++
++	memset(req_ctx, 0, sizeof(*req_ctx));
++	req_ctx->hw_context_size =
++		(crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
++			? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
++			: TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
++	memcpy(req_ctx->hw_context, export->hw_context,
++	       req_ctx->hw_context_size);
++	memcpy(req_ctx->buf, export->buf, export->nbuf);
++	req_ctx->swinit = export->swinit;
++	req_ctx->first = export->first;
++	req_ctx->last = export->last;
++	req_ctx->to_hash_later = export->to_hash_later;
++	req_ctx->nbuf = export->nbuf;
++
++	return 0;
++}
++
+ struct keyhash_result {
+ 	struct completion completion;
+ 	int err;
+@@ -2458,6 +2508,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = MD5_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "md5",
+ 				.cra_driver_name = "md5-talitos",
+@@ -2473,6 +2524,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA1_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "sha1",
+ 				.cra_driver_name = "sha1-talitos",
+@@ -2488,6 +2540,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA224_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "sha224",
+ 				.cra_driver_name = "sha224-talitos",
+@@ -2503,6 +2556,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA256_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "sha256",
+ 				.cra_driver_name = "sha256-talitos",
+@@ -2518,6 +2572,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA384_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "sha384",
+ 				.cra_driver_name = "sha384-talitos",
+@@ -2533,6 +2588,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA512_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "sha512",
+ 				.cra_driver_name = "sha512-talitos",
+@@ -2548,6 +2604,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = MD5_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "hmac(md5)",
+ 				.cra_driver_name = "hmac-md5-talitos",
+@@ -2563,6 +2620,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA1_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "hmac(sha1)",
+ 				.cra_driver_name = "hmac-sha1-talitos",
+@@ -2578,6 +2636,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA224_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "hmac(sha224)",
+ 				.cra_driver_name = "hmac-sha224-talitos",
+@@ -2593,6 +2652,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA256_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "hmac(sha256)",
+ 				.cra_driver_name = "hmac-sha256-talitos",
+@@ -2608,6 +2668,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA384_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "hmac(sha384)",
+ 				.cra_driver_name = "hmac-sha384-talitos",
+@@ -2623,6 +2684,7 @@ static struct talitos_alg_template driver_algs[] = {
+ 	{	.type = CRYPTO_ALG_TYPE_AHASH,
+ 		.alg.hash = {
+ 			.halg.digestsize = SHA512_DIGEST_SIZE,
++			.halg.statesize = sizeof(struct talitos_export_state),
+ 			.halg.base = {
+ 				.cra_name = "hmac(sha512)",
+ 				.cra_driver_name = "hmac-sha512-talitos",
+@@ -2814,6 +2876,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
+ 		t_alg->algt.alg.hash.finup = ahash_finup;
+ 		t_alg->algt.alg.hash.digest = ahash_digest;
+ 		t_alg->algt.alg.hash.setkey = ahash_setkey;
++		t_alg->algt.alg.hash.import = ahash_import;
++		t_alg->algt.alg.hash.export = ahash_export;
+ 
+ 		if (!(priv->features & TALITOS_FTR_HMAC_OK) &&
+ 		    !strncmp(alg->cra_name, "hmac", 4)) {
+diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
+index b6bf20496021..845ce90c2885 100644
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -448,16 +448,16 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
+ 
+ /**
+  * srp_destroy_qp() - destroy an RDMA queue pair
+- * @ch: SRP RDMA channel.
++ * @qp: RDMA queue pair.
+  *
+  * Drain the qp before destroying it.  This avoids that the receive
+  * completion handler can access the queue pair while it is
+  * being destroyed.
+  */
+-static void srp_destroy_qp(struct srp_rdma_ch *ch)
++static void srp_destroy_qp(struct ib_qp *qp)
+ {
+-	ib_drain_rq(ch->qp);
+-	ib_destroy_qp(ch->qp);
++	ib_drain_rq(qp);
++	ib_destroy_qp(qp);
+ }
+ 
+ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
+@@ -530,7 +530,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
+ 	}
+ 
+ 	if (ch->qp)
+-		srp_destroy_qp(ch);
++		srp_destroy_qp(ch->qp);
+ 	if (ch->recv_cq)
+ 		ib_free_cq(ch->recv_cq);
+ 	if (ch->send_cq)
+@@ -554,7 +554,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
+ 	return 0;
+ 
+ err_qp:
+-	srp_destroy_qp(ch);
++	srp_destroy_qp(qp);
+ 
+ err_send_cq:
+ 	ib_free_cq(send_cq);
+@@ -597,7 +597,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
+ 			ib_destroy_fmr_pool(ch->fmr_pool);
+ 	}
+ 
+-	srp_destroy_qp(ch);
++	srp_destroy_qp(ch->qp);
+ 	ib_free_cq(ch->send_cq);
+ 	ib_free_cq(ch->recv_cq);
+ 
+@@ -1509,7 +1509,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
+ 
+ 	if (dev->use_fast_reg) {
+ 		state.sg = idb_sg;
+-		sg_set_buf(idb_sg, req->indirect_desc, idb_len);
++		sg_init_one(idb_sg, req->indirect_desc, idb_len);
+ 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
+ #ifdef CONFIG_NEED_SG_DMA_LENGTH
+ 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
+diff --git a/drivers/input/misc/pwm-beeper.c b/drivers/input/misc/pwm-beeper.c
+index f2261ab54701..18663d4edae5 100644
+--- a/drivers/input/misc/pwm-beeper.c
++++ b/drivers/input/misc/pwm-beeper.c
+@@ -20,21 +20,40 @@
+ #include <linux/platform_device.h>
+ #include <linux/pwm.h>
+ #include <linux/slab.h>
++#include <linux/workqueue.h>
+ 
+ struct pwm_beeper {
+ 	struct input_dev *input;
+ 	struct pwm_device *pwm;
++	struct work_struct work;
+ 	unsigned long period;
+ };
+ 
+ #define HZ_TO_NANOSECONDS(x) (1000000000UL/(x))
+ 
++static void __pwm_beeper_set(struct pwm_beeper *beeper)
++{
++	unsigned long period = beeper->period;
++
++	if (period) {
++		pwm_config(beeper->pwm, period / 2, period);
++		pwm_enable(beeper->pwm);
++	} else
++		pwm_disable(beeper->pwm);
++}
++
++static void pwm_beeper_work(struct work_struct *work)
++{
++	struct pwm_beeper *beeper =
++		container_of(work, struct pwm_beeper, work);
++
++	__pwm_beeper_set(beeper);
++}
++
+ static int pwm_beeper_event(struct input_dev *input,
+ 			    unsigned int type, unsigned int code, int value)
+ {
+-	int ret = 0;
+ 	struct pwm_beeper *beeper = input_get_drvdata(input);
+-	unsigned long period;
+ 
+ 	if (type != EV_SND || value < 0)
+ 		return -EINVAL;
+@@ -49,22 +68,31 @@ static int pwm_beeper_event(struct input_dev *input,
+ 		return -EINVAL;
+ 	}
+ 
+-	if (value == 0) {
+-		pwm_disable(beeper->pwm);
+-	} else {
+-		period = HZ_TO_NANOSECONDS(value);
+-		ret = pwm_config(beeper->pwm, period / 2, period);
+-		if (ret)
+-			return ret;
+-		ret = pwm_enable(beeper->pwm);
+-		if (ret)
+-			return ret;
+-		beeper->period = period;
+-	}
++	if (value == 0)
++		beeper->period = 0;
++	else
++		beeper->period = HZ_TO_NANOSECONDS(value);
++
++	schedule_work(&beeper->work);
+ 
+ 	return 0;
+ }
+ 
++static void pwm_beeper_stop(struct pwm_beeper *beeper)
++{
++	cancel_work_sync(&beeper->work);
++
++	if (beeper->period)
++		pwm_disable(beeper->pwm);
++}
++
++static void pwm_beeper_close(struct input_dev *input)
++{
++	struct pwm_beeper *beeper = input_get_drvdata(input);
++
++	pwm_beeper_stop(beeper);
++}
++
+ static int pwm_beeper_probe(struct platform_device *pdev)
+ {
+ 	unsigned long pwm_id = (unsigned long)dev_get_platdata(&pdev->dev);
+@@ -87,6 +115,8 @@ static int pwm_beeper_probe(struct platform_device *pdev)
+ 		goto err_free;
+ 	}
+ 
++	INIT_WORK(&beeper->work, pwm_beeper_work);
++
+ 	beeper->input = input_allocate_device();
+ 	if (!beeper->input) {
+ 		dev_err(&pdev->dev, "Failed to allocate input device\n");
+@@ -106,6 +136,7 @@ static int pwm_beeper_probe(struct platform_device *pdev)
+ 	beeper->input->sndbit[0] = BIT(SND_TONE) | BIT(SND_BELL);
+ 
+ 	beeper->input->event = pwm_beeper_event;
++	beeper->input->close = pwm_beeper_close;
+ 
+ 	input_set_drvdata(beeper->input, beeper);
+ 
+@@ -135,7 +166,6 @@ static int pwm_beeper_remove(struct platform_device *pdev)
+ 
+ 	input_unregister_device(beeper->input);
+ 
+-	pwm_disable(beeper->pwm);
+ 	pwm_free(beeper->pwm);
+ 
+ 	kfree(beeper);
+@@ -147,8 +177,7 @@ static int __maybe_unused pwm_beeper_suspend(struct device *dev)
+ {
+ 	struct pwm_beeper *beeper = dev_get_drvdata(dev);
+ 
+-	if (beeper->period)
+-		pwm_disable(beeper->pwm);
++	pwm_beeper_stop(beeper);
+ 
+ 	return 0;
+ }
+@@ -157,10 +186,8 @@ static int __maybe_unused pwm_beeper_resume(struct device *dev)
+ {
+ 	struct pwm_beeper *beeper = dev_get_drvdata(dev);
+ 
+-	if (beeper->period) {
+-		pwm_config(beeper->pwm, beeper->period / 2, beeper->period);
+-		pwm_enable(beeper->pwm);
+-	}
++	if (beeper->period)
++		__pwm_beeper_set(beeper);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
+index 5b7d3c2129d8..390e0ece26dc 100644
+--- a/drivers/irqchip/irq-gic-v3.c
++++ b/drivers/irqchip/irq-gic-v3.c
+@@ -364,6 +364,13 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
+ 			if (static_key_true(&supports_deactivate))
+ 				gic_write_dir(irqnr);
+ #ifdef CONFIG_SMP
++			/*
++			 * Unlike GICv2, we don't need an smp_rmb() here.
++			 * The control dependency from gic_read_iar to
++			 * the ISB in gic_write_eoir is enough to ensure
++			 * that any shared data read by handle_IPI will
++			 * be read after the ACK.
++			 */
+ 			handle_IPI(irqnr, regs);
+ #else
+ 			WARN_ONCE(true, "Unexpected SGI received!\n");
+@@ -383,6 +390,15 @@ static void __init gic_dist_init(void)
+ 	writel_relaxed(0, base + GICD_CTLR);
+ 	gic_dist_wait_for_rwp();
+ 
++	/*
++	 * Configure SPIs as non-secure Group-1. This will only matter
++	 * if the GIC only has a single security state. This will not
++	 * do the right thing if the kernel is running in secure mode,
++	 * but that's not the intended use case anyway.
++	 */
++	for (i = 32; i < gic_data.irq_nr; i += 32)
++		writel_relaxed(~0, base + GICD_IGROUPR + i / 8);
++
+ 	gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp);
+ 
+ 	/* Enable distributor with ARE, Group1 */
+@@ -500,6 +516,9 @@ static void gic_cpu_init(void)
+ 
+ 	rbase = gic_data_rdist_sgi_base();
+ 
++	/* Configure SGIs/PPIs as non-secure Group-1 */
++	writel_relaxed(~0, rbase + GICR_IGROUPR0);
++
+ 	gic_cpu_config(rbase, gic_redist_wait_for_rwp);
+ 
+ 	/* Give LPIs a spin */
+diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
+index 282344b95ec2..5c4da5808b15 100644
+--- a/drivers/irqchip/irq-gic.c
++++ b/drivers/irqchip/irq-gic.c
+@@ -344,6 +344,14 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+ 			if (static_key_true(&supports_deactivate))
+ 				writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
+ #ifdef CONFIG_SMP
++			/*
++			 * Ensure any shared data written by the CPU sending
++			 * the IPI is read after we've read the ACK register
++			 * on the GIC.
++			 *
++			 * Pairs with the write barrier in gic_raise_softirq
++			 */
++			smp_rmb();
+ 			handle_IPI(irqnr, regs);
+ #endif
+ 			continue;
+diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c
+index 004926955263..b0155b05cddb 100644
+--- a/drivers/mcb/mcb-parse.c
++++ b/drivers/mcb/mcb-parse.c
+@@ -57,7 +57,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus,
+ 	mdev->id = GDD_DEV(reg1);
+ 	mdev->rev = GDD_REV(reg1);
+ 	mdev->var = GDD_VAR(reg1);
+-	mdev->bar = GDD_BAR(reg1);
++	mdev->bar = GDD_BAR(reg2);
+ 	mdev->group = GDD_GRP(reg2);
+ 	mdev->inst = GDD_INS(reg2);
+ 
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 14d3b37944df..85b16aadd459 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -307,7 +307,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
+  */
+ void mddev_suspend(struct mddev *mddev)
+ {
+-	WARN_ON_ONCE(current == mddev->thread->tsk);
++	WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
+ 	if (mddev->suspended++)
+ 		return;
+ 	synchronize_rcu();
+diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c
+index b7b3e8ee64f2..c30290f33430 100644
+--- a/drivers/mfd/omap-usb-tll.c
++++ b/drivers/mfd/omap-usb-tll.c
+@@ -269,6 +269,8 @@ static int usbtll_omap_probe(struct platform_device *pdev)
+ 
+ 		if (IS_ERR(tll->ch_clk[i]))
+ 			dev_dbg(dev, "can't get clock : %s\n", clkname);
++		else
++			clk_prepare(tll->ch_clk[i]);
+ 	}
+ 
+ 	pm_runtime_put_sync(dev);
+@@ -301,9 +303,12 @@ static int usbtll_omap_remove(struct platform_device *pdev)
+ 	tll_dev = NULL;
+ 	spin_unlock(&tll_lock);
+ 
+-	for (i = 0; i < tll->nch; i++)
+-		if (!IS_ERR(tll->ch_clk[i]))
++	for (i = 0; i < tll->nch; i++) {
++		if (!IS_ERR(tll->ch_clk[i])) {
++			clk_unprepare(tll->ch_clk[i]);
+ 			clk_put(tll->ch_clk[i]);
++		}
++	}
+ 
+ 	pm_runtime_disable(&pdev->dev);
+ 	return 0;
+@@ -420,7 +425,7 @@ int omap_tll_enable(struct usbhs_omap_platform_data *pdata)
+ 			if (IS_ERR(tll->ch_clk[i]))
+ 				continue;
+ 
+-			r = clk_prepare_enable(tll->ch_clk[i]);
++			r = clk_enable(tll->ch_clk[i]);
+ 			if (r) {
+ 				dev_err(tll_dev,
+ 				 "Error enabling ch %d clock: %d\n", i, r);
+@@ -448,7 +453,7 @@ int omap_tll_disable(struct usbhs_omap_platform_data *pdata)
+ 	for (i = 0; i < tll->nch; i++) {
+ 		if (omap_usb_mode_needs_tll(pdata->port_mode[i])) {
+ 			if (!IS_ERR(tll->ch_clk[i]))
+-				clk_disable_unprepare(tll->ch_clk[i]);
++				clk_disable(tll->ch_clk[i]);
+ 		}
+ 	}
+ 
+diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
+index 194360a5f782..a039a5df6f21 100644
+--- a/drivers/misc/mei/amthif.c
++++ b/drivers/misc/mei/amthif.c
+@@ -380,8 +380,10 @@ int mei_amthif_irq_read_msg(struct mei_cl *cl,
+ 
+ 	dev = cl->dev;
+ 
+-	if (dev->iamthif_state != MEI_IAMTHIF_READING)
++	if (dev->iamthif_state != MEI_IAMTHIF_READING) {
++		mei_irq_discard_msg(dev, mei_hdr);
+ 		return 0;
++	}
+ 
+ 	ret = mei_cl_irq_read_msg(cl, mei_hdr, cmpl_list);
+ 	if (ret)
+diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
+index 5d5996e39a67..038b6dd24fb4 100644
+--- a/drivers/misc/mei/bus.c
++++ b/drivers/misc/mei/bus.c
+@@ -220,17 +220,23 @@ EXPORT_SYMBOL_GPL(mei_cldev_recv);
+ static void mei_cl_bus_event_work(struct work_struct *work)
+ {
+ 	struct mei_cl_device *cldev;
++	struct mei_device *bus;
+ 
+ 	cldev = container_of(work, struct mei_cl_device, event_work);
+ 
++	bus = cldev->bus;
++
+ 	if (cldev->event_cb)
+ 		cldev->event_cb(cldev, cldev->events, cldev->event_context);
+ 
+ 	cldev->events = 0;
+ 
+ 	/* Prepare for the next read */
+-	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX))
++	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
++		mutex_lock(&bus->device_lock);
+ 		mei_cl_read_start(cldev->cl, 0, NULL);
++		mutex_unlock(&bus->device_lock);
++	}
+ }
+ 
+ /**
+@@ -304,6 +310,7 @@ int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
+ 				unsigned long events_mask,
+ 				mei_cldev_event_cb_t event_cb, void *context)
+ {
++	struct mei_device *bus = cldev->bus;
+ 	int ret;
+ 
+ 	if (cldev->event_cb)
+@@ -316,15 +323,17 @@ int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
+ 	INIT_WORK(&cldev->event_work, mei_cl_bus_event_work);
+ 
+ 	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
++		mutex_lock(&bus->device_lock);
+ 		ret = mei_cl_read_start(cldev->cl, 0, NULL);
++		mutex_unlock(&bus->device_lock);
+ 		if (ret && ret != -EBUSY)
+ 			return ret;
+ 	}
+ 
+ 	if (cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)) {
+-		mutex_lock(&cldev->cl->dev->device_lock);
++		mutex_lock(&bus->device_lock);
+ 		ret = mei_cl_notify_request(cldev->cl, NULL, event_cb ? 1 : 0);
+-		mutex_unlock(&cldev->cl->dev->device_lock);
++		mutex_unlock(&bus->device_lock);
+ 		if (ret)
+ 			return ret;
+ 	}
+diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
+index bab17e4197b6..09f5280fa021 100644
+--- a/drivers/misc/mei/client.c
++++ b/drivers/misc/mei/client.c
+@@ -1767,6 +1767,10 @@ void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb)
+ 			wake_up(&cl->wait);
+ 
+ 		break;
++	case MEI_FOP_DISCONNECT_RSP:
++		mei_io_cb_free(cb);
++		mei_cl_set_disconnected(cl);
++		break;
+ 	default:
+ 		BUG_ON(0);
+ 	}
+diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
+index 5e305d2605f3..8fe1ef8215c1 100644
+--- a/drivers/misc/mei/hbm.c
++++ b/drivers/misc/mei/hbm.c
+@@ -882,8 +882,7 @@ static int mei_hbm_fw_disconnect_req(struct mei_device *dev,
+ 		cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT_RSP, NULL);
+ 		if (!cb)
+ 			return -ENOMEM;
+-		cl_dbg(dev, cl, "add disconnect response as first\n");
+-		list_add(&cb->list, &dev->ctrl_wr_list.list);
++		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ 	}
+ 	return 0;
+ }
+diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
+index 1e5cb1f704f8..3831a7ba2531 100644
+--- a/drivers/misc/mei/interrupt.c
++++ b/drivers/misc/mei/interrupt.c
+@@ -76,7 +76,6 @@ static inline int mei_cl_hbm_equal(struct mei_cl *cl,
+  * @dev: mei device
+  * @hdr: message header
+  */
+-static inline
+ void mei_irq_discard_msg(struct mei_device *dev, struct mei_msg_hdr *hdr)
+ {
+ 	/*
+@@ -194,10 +193,7 @@ static int mei_cl_irq_disconnect_rsp(struct mei_cl *cl, struct mei_cl_cb *cb,
+ 		return -EMSGSIZE;
+ 
+ 	ret = mei_hbm_cl_disconnect_rsp(dev, cl);
+-	mei_cl_set_disconnected(cl);
+-	mei_io_cb_free(cb);
+-	mei_me_cl_put(cl->me_cl);
+-	cl->me_cl = NULL;
++	list_move_tail(&cb->list, &cmpl_list->list);
+ 
+ 	return ret;
+ }
+diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
+index db78e6d99456..859bdac22352 100644
+--- a/drivers/misc/mei/mei_dev.h
++++ b/drivers/misc/mei/mei_dev.h
+@@ -704,6 +704,8 @@ bool mei_hbuf_acquire(struct mei_device *dev);
+ 
+ bool mei_write_is_idle(struct mei_device *dev);
+ 
++void mei_irq_discard_msg(struct mei_device *dev, struct mei_msg_hdr *hdr);
++
+ #if IS_ENABLED(CONFIG_DEBUG_FS)
+ int mei_dbgfs_register(struct mei_device *dev, const char *name);
+ void mei_dbgfs_deregister(struct mei_device *dev);
+diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
+index 88e45234d527..af57d2caac75 100644
+--- a/drivers/misc/mic/vop/vop_vringh.c
++++ b/drivers/misc/mic/vop/vop_vringh.c
+@@ -950,6 +950,11 @@ static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ 			ret = -EINVAL;
+ 			goto free_ret;
+ 		}
++		/* Ensure desc has not changed between the two reads */
++		if (memcmp(&dd, dd_config, sizeof(dd))) {
++			ret = -EINVAL;
++			goto free_ret;
++		}
+ 		mutex_lock(&vdev->vdev_mutex);
+ 		mutex_lock(&vi->vop_mutex);
+ 		ret = vop_virtio_add_device(vdev, dd_config);
+diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
+index 8a0147dfed27..b0a27413cb13 100644
+--- a/drivers/mmc/card/block.c
++++ b/drivers/mmc/card/block.c
+@@ -2494,11 +2494,12 @@ static const struct mmc_fixup blk_fixups[] =
+ 		  MMC_QUIRK_BLK_NO_CMD23),
+ 
+ 	/*
+-	 * Some Micron MMC cards needs longer data read timeout than
+-	 * indicated in CSD.
++	 * Some MMC cards need longer data read timeout than indicated in CSD.
+ 	 */
+ 	MMC_FIXUP(CID_NAME_ANY, CID_MANFID_MICRON, 0x200, add_quirk_mmc,
+ 		  MMC_QUIRK_LONG_READ_TIME),
++	MMC_FIXUP("008GE0", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_LONG_READ_TIME),
+ 
+ 	/*
+ 	 * On these Samsung MoviNAND parts, performing secure erase or
+diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
+index 41b1e761965f..9eba56c68ca1 100644
+--- a/drivers/mmc/core/core.c
++++ b/drivers/mmc/core/core.c
+@@ -868,11 +868,11 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
+ 	/*
+ 	 * Some cards require longer data read timeout than indicated in CSD.
+ 	 * Address this by setting the read timeout to a "reasonably high"
+-	 * value. For the cards tested, 300ms has proven enough. If necessary,
++	 * value. For the cards tested, 600ms has proven enough. If necessary,
+ 	 * this value can be increased if other problematic cards require this.
+ 	 */
+ 	if (mmc_card_long_read_time(card) && data->flags & MMC_DATA_READ) {
+-		data->timeout_ns = 300000000;
++		data->timeout_ns = 600000000;
+ 		data->timeout_clks = 0;
+ 	}
+ 
+diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
+index 4dbe3df8024b..80169643d59f 100644
+--- a/drivers/mmc/core/mmc.c
++++ b/drivers/mmc/core/mmc.c
+@@ -333,6 +333,9 @@ static void mmc_manage_gp_partitions(struct mmc_card *card, u8 *ext_csd)
+ 	}
+ }
+ 
++/* Minimum partition switch timeout in milliseconds */
++#define MMC_MIN_PART_SWITCH_TIME	300
++
+ /*
+  * Decode extended CSD.
+  */
+@@ -397,6 +400,10 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
+ 
+ 		/* EXT_CSD value is in units of 10ms, but we store in ms */
+ 		card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME];
++		/* Some eMMC set the value too low so set a minimum */
++		if (card->ext_csd.part_time &&
++		    card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME)
++			card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME;
+ 
+ 		/* Sleep / awake timeout in 100ns units */
+ 		if (sa_shift > 0 && sa_shift <= 0x17)
+diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
+index bed6a494f52c..c0e206d72153 100644
+--- a/drivers/mmc/host/sdhci-acpi.c
++++ b/drivers/mmc/host/sdhci-acpi.c
+@@ -277,7 +277,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = {
+ 	.chip    = &sdhci_acpi_chip_int,
+ 	.caps    = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
+ 		   MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR |
+-		   MMC_CAP_BUS_WIDTH_TEST | MMC_CAP_WAIT_WHILE_BUSY,
++		   MMC_CAP_WAIT_WHILE_BUSY,
+ 	.caps2   = MMC_CAP2_HC_ERASE_SZ,
+ 	.flags   = SDHCI_ACPI_RUNTIME_PM,
+ 	.quirks  = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+@@ -292,7 +292,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = {
+ 		   SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ 	.quirks2 = SDHCI_QUIRK2_HOST_OFF_CARD_ON,
+ 	.caps    = MMC_CAP_NONREMOVABLE | MMC_CAP_POWER_OFF_CARD |
+-		   MMC_CAP_BUS_WIDTH_TEST | MMC_CAP_WAIT_WHILE_BUSY,
++		   MMC_CAP_WAIT_WHILE_BUSY,
+ 	.flags   = SDHCI_ACPI_RUNTIME_PM,
+ 	.pm_caps = MMC_PM_KEEP_POWER,
+ 	.probe_slot	= sdhci_acpi_sdio_probe_slot,
+@@ -304,7 +304,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sd = {
+ 	.quirks  = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+ 	.quirks2 = SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON |
+ 		   SDHCI_QUIRK2_STOP_WITH_TC,
+-	.caps    = MMC_CAP_BUS_WIDTH_TEST | MMC_CAP_WAIT_WHILE_BUSY,
++	.caps    = MMC_CAP_WAIT_WHILE_BUSY,
+ 	.probe_slot	= sdhci_acpi_sd_probe_slot,
+ };
+ 
+@@ -381,7 +381,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
+ {
+ 	struct device *dev = &pdev->dev;
+ 	acpi_handle handle = ACPI_HANDLE(dev);
+-	struct acpi_device *device;
++	struct acpi_device *device, *child;
+ 	struct sdhci_acpi_host *c;
+ 	struct sdhci_host *host;
+ 	struct resource *iomem;
+@@ -393,6 +393,11 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
+ 	if (acpi_bus_get_device(handle, &device))
+ 		return -ENODEV;
+ 
++	/* Power on the SDHCI controller and its children */
++	acpi_device_fix_up_power(device);
++	list_for_each_entry(child, &device->children, node)
++		acpi_device_fix_up_power(child);
++
+ 	if (acpi_bus_get_status(device) || !device->status.present)
+ 		return -ENODEV;
+ 
+diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
+index 79e19017343e..b42dd6a1b639 100644
+--- a/drivers/mmc/host/sdhci-pci-core.c
++++ b/drivers/mmc/host/sdhci-pci-core.c
+@@ -361,7 +361,6 @@ static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
+ {
+ 	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
+ 				 MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR |
+-				 MMC_CAP_BUS_WIDTH_TEST |
+ 				 MMC_CAP_WAIT_WHILE_BUSY;
+ 	slot->host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ;
+ 	slot->hw_reset = sdhci_pci_int_hw_reset;
+@@ -377,15 +376,13 @@ static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
+ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
+ {
+ 	slot->host->mmc->caps |= MMC_CAP_POWER_OFF_CARD | MMC_CAP_NONREMOVABLE |
+-				 MMC_CAP_BUS_WIDTH_TEST |
+ 				 MMC_CAP_WAIT_WHILE_BUSY;
+ 	return 0;
+ }
+ 
+ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot)
+ {
+-	slot->host->mmc->caps |= MMC_CAP_BUS_WIDTH_TEST |
+-				 MMC_CAP_WAIT_WHILE_BUSY;
++	slot->host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
+ 	slot->cd_con_id = NULL;
+ 	slot->cd_idx = 0;
+ 	slot->cd_override_level = true;
+diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
+index 5b9834cf2820..96fddb016bf1 100644
+--- a/drivers/mtd/ubi/eba.c
++++ b/drivers/mtd/ubi/eba.c
+@@ -426,8 +426,25 @@ retry:
+ 						 pnum, vol_id, lnum);
+ 					err = -EBADMSG;
+ 				} else {
+-					err = -EINVAL;
+-					ubi_ro_mode(ubi);
++					/*
++					 * Ending up here in the non-Fastmap case
++					 * is a clear bug as the VID header had to
++					 * be present at scan time to have it referenced.
++					 * With fastmap the story is more complicated.
++					 * Fastmap has the mapping info without the need
++					 * of a full scan. So the LEB could have been
++					 * unmapped, Fastmap cannot know this and keeps
++					 * the LEB referenced.
++					 * This is valid and works as the layer above UBI
++					 * has to do bookkeeping about used/referenced
++					 * LEBs in any case.
++					 */
++					if (ubi->fast_attach) {
++						err = -EBADMSG;
++					} else {
++						err = -EINVAL;
++						ubi_ro_mode(ubi);
++					}
+ 				}
+ 			}
+ 			goto out_free;
+diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
+index 263b439e21a8..990898b9dc72 100644
+--- a/drivers/mtd/ubi/fastmap.c
++++ b/drivers/mtd/ubi/fastmap.c
+@@ -1058,6 +1058,7 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai,
+ 	ubi_msg(ubi, "fastmap WL pool size: %d",
+ 		ubi->fm_wl_pool.max_size);
+ 	ubi->fm_disabled = 0;
++	ubi->fast_attach = 1;
+ 
+ 	ubi_free_vid_hdr(ubi, vh);
+ 	kfree(ech);
+diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
+index dadc6a9d5755..61d4e99755a4 100644
+--- a/drivers/mtd/ubi/ubi.h
++++ b/drivers/mtd/ubi/ubi.h
+@@ -466,6 +466,7 @@ struct ubi_debug_info {
+  * @fm_eba_sem: allows ubi_update_fastmap() to block EBA table changes
+  * @fm_work: fastmap work queue
+  * @fm_work_scheduled: non-zero if fastmap work was scheduled
++ * @fast_attach: non-zero if UBI was attached by fastmap
+  *
+  * @used: RB-tree of used physical eraseblocks
+  * @erroneous: RB-tree of erroneous used physical eraseblocks
+@@ -574,6 +575,7 @@ struct ubi_device {
+ 	size_t fm_size;
+ 	struct work_struct fm_work;
+ 	int fm_work_scheduled;
++	int fast_attach;
+ 
+ 	/* Wear-leveling sub-system's stuff */
+ 	struct rb_root used;
+diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
+index 141c2a42d7ed..910c12e2638e 100644
+--- a/drivers/net/can/dev.c
++++ b/drivers/net/can/dev.c
+@@ -696,11 +696,17 @@ int can_change_mtu(struct net_device *dev, int new_mtu)
+ 	/* allow change of MTU according to the CANFD ability of the device */
+ 	switch (new_mtu) {
+ 	case CAN_MTU:
++		/* 'CANFD-only' controllers can not switch to CAN_MTU */
++		if (priv->ctrlmode_static & CAN_CTRLMODE_FD)
++			return -EINVAL;
++
+ 		priv->ctrlmode &= ~CAN_CTRLMODE_FD;
+ 		break;
+ 
+ 	case CANFD_MTU:
+-		if (!(priv->ctrlmode_supported & CAN_CTRLMODE_FD))
++		/* check for potential CANFD ability */
++		if (!(priv->ctrlmode_supported & CAN_CTRLMODE_FD) &&
++		    !(priv->ctrlmode_static & CAN_CTRLMODE_FD))
+ 			return -EINVAL;
+ 
+ 		priv->ctrlmode |= CAN_CTRLMODE_FD;
+@@ -782,6 +788,35 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = {
+ 				= { .len = sizeof(struct can_bittiming_const) },
+ };
+ 
++static int can_validate(struct nlattr *tb[], struct nlattr *data[])
++{
++	bool is_can_fd = false;
++
++	/* Make sure that valid CAN FD configurations always consist of
++	 * - nominal/arbitration bittiming
++	 * - data bittiming
++	 * - control mode with CAN_CTRLMODE_FD set
++	 */
++
++	if (data[IFLA_CAN_CTRLMODE]) {
++		struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]);
++
++		is_can_fd = cm->flags & cm->mask & CAN_CTRLMODE_FD;
++	}
++
++	if (is_can_fd) {
++		if (!data[IFLA_CAN_BITTIMING] || !data[IFLA_CAN_DATA_BITTIMING])
++			return -EOPNOTSUPP;
++	}
++
++	if (data[IFLA_CAN_DATA_BITTIMING]) {
++		if (!is_can_fd || !data[IFLA_CAN_BITTIMING])
++			return -EOPNOTSUPP;
++	}
++
++	return 0;
++}
++
+ static int can_changelink(struct net_device *dev,
+ 			  struct nlattr *tb[], struct nlattr *data[])
+ {
+@@ -813,19 +848,31 @@ static int can_changelink(struct net_device *dev,
+ 
+ 	if (data[IFLA_CAN_CTRLMODE]) {
+ 		struct can_ctrlmode *cm;
++		u32 ctrlstatic;
++		u32 maskedflags;
+ 
+ 		/* Do not allow changing controller mode while running */
+ 		if (dev->flags & IFF_UP)
+ 			return -EBUSY;
+ 		cm = nla_data(data[IFLA_CAN_CTRLMODE]);
++		ctrlstatic = priv->ctrlmode_static;
++		maskedflags = cm->flags & cm->mask;
++
++		/* check whether provided bits are allowed to be passed */
++		if (cm->mask & ~(priv->ctrlmode_supported | ctrlstatic))
++			return -EOPNOTSUPP;
++
++		/* do not check for static fd-non-iso if 'fd' is disabled */
++		if (!(maskedflags & CAN_CTRLMODE_FD))
++			ctrlstatic &= ~CAN_CTRLMODE_FD_NON_ISO;
+ 
+-		/* check whether changed bits are allowed to be modified */
+-		if (cm->mask & ~priv->ctrlmode_supported)
++		/* make sure static options are provided by configuration */
++		if ((maskedflags & ctrlstatic) != ctrlstatic)
+ 			return -EOPNOTSUPP;
+ 
+ 		/* clear bits to be modified and copy the flag values */
+ 		priv->ctrlmode &= ~cm->mask;
+-		priv->ctrlmode |= (cm->flags & cm->mask);
++		priv->ctrlmode |= maskedflags;
+ 
+ 		/* CAN_CTRLMODE_FD can only be set when driver supports FD */
+ 		if (priv->ctrlmode & CAN_CTRLMODE_FD)
+@@ -966,6 +1013,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = {
+ 	.maxtype	= IFLA_CAN_MAX,
+ 	.policy		= can_policy,
+ 	.setup		= can_setup,
++	.validate	= can_validate,
+ 	.newlink	= can_newlink,
+ 	.changelink	= can_changelink,
+ 	.get_size	= can_get_size,
+diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
+index 39cf911f7a1e..195f15edb32e 100644
+--- a/drivers/net/can/m_can/m_can.c
++++ b/drivers/net/can/m_can/m_can.c
+@@ -955,7 +955,7 @@ static struct net_device *alloc_m_can_dev(void)
+ 	priv->can.do_get_berr_counter = m_can_get_berr_counter;
+ 
+ 	/* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.1 */
+-	priv->can.ctrlmode = CAN_CTRLMODE_FD_NON_ISO;
++	can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
+ 
+ 	/* CAN_CTRLMODE_FD_NON_ISO can not be changed with M_CAN IP v3.0.1 */
+ 	priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
+diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
+index 0c5c22b84da8..7de5ab589e4e 100644
+--- a/drivers/net/usb/asix_common.c
++++ b/drivers/net/usb/asix_common.c
+@@ -66,7 +66,7 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb,
+ 	 * buffer.
+ 	 */
+ 	if (rx->remaining && (rx->remaining + sizeof(u32) <= skb->len)) {
+-		offset = ((rx->remaining + 1) & 0xfffe) + sizeof(u32);
++		offset = ((rx->remaining + 1) & 0xfffe);
+ 		rx->header = get_unaligned_le32(skb->data + offset);
+ 		offset = 0;
+ 
+diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
+index 1f44a151d206..d5a099b022e4 100644
+--- a/drivers/nfc/st21nfca/i2c.c
++++ b/drivers/nfc/st21nfca/i2c.c
+@@ -524,8 +524,10 @@ static int st21nfca_hci_i2c_acpi_request_resources(struct i2c_client *client)
+ 	/* Get EN GPIO from ACPI */
+ 	gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 1,
+ 					 GPIOD_OUT_LOW);
+-	if (!IS_ERR(gpiod_ena))
+-		phy->gpio_ena = desc_to_gpio(gpiod_ena);
++	if (!IS_ERR(gpiod_ena)) {
++		nfc_err(dev, "Unable to get ENABLE GPIO\n");
++		return -ENODEV;
++	}
+ 
+ 	phy->gpio_ena = desc_to_gpio(gpiod_ena);
+ 
+diff --git a/drivers/platform/x86/dell-rbtn.c b/drivers/platform/x86/dell-rbtn.c
+index b51a2008d782..dcd9f40a4b18 100644
+--- a/drivers/platform/x86/dell-rbtn.c
++++ b/drivers/platform/x86/dell-rbtn.c
+@@ -28,6 +28,7 @@ struct rbtn_data {
+ 	enum rbtn_type type;
+ 	struct rfkill *rfkill;
+ 	struct input_dev *input_dev;
++	bool suspended;
+ };
+ 
+ 
+@@ -235,9 +236,55 @@ static const struct acpi_device_id rbtn_ids[] = {
+ 	{ "", 0 },
+ };
+ 
++#ifdef CONFIG_PM_SLEEP
++static void ACPI_SYSTEM_XFACE rbtn_clear_suspended_flag(void *context)
++{
++	struct rbtn_data *rbtn_data = context;
++
++	rbtn_data->suspended = false;
++}
++
++static int rbtn_suspend(struct device *dev)
++{
++	struct acpi_device *device = to_acpi_device(dev);
++	struct rbtn_data *rbtn_data = acpi_driver_data(device);
++
++	rbtn_data->suspended = true;
++
++	return 0;
++}
++
++static int rbtn_resume(struct device *dev)
++{
++	struct acpi_device *device = to_acpi_device(dev);
++	struct rbtn_data *rbtn_data = acpi_driver_data(device);
++	acpi_status status;
++
++	/*
++	 * Upon resume, some BIOSes send an ACPI notification thet triggers
++	 * an unwanted input event. In order to ignore it, we use a flag
++	 * that we set at suspend and clear once we have received the extra
++	 * ACPI notification. Since ACPI notifications are delivered
++	 * asynchronously to drivers, we clear the flag from the workqueue
++	 * used to deliver the notifications. This should be enough
++	 * to have the flag cleared only after we received the extra
++	 * notification, if any.
++	 */
++	status = acpi_os_execute(OSL_NOTIFY_HANDLER,
++			 rbtn_clear_suspended_flag, rbtn_data);
++	if (ACPI_FAILURE(status))
++		rbtn_clear_suspended_flag(rbtn_data);
++
++	return 0;
++}
++#endif
++
++static SIMPLE_DEV_PM_OPS(rbtn_pm_ops, rbtn_suspend, rbtn_resume);
++
+ static struct acpi_driver rbtn_driver = {
+ 	.name = "dell-rbtn",
+ 	.ids = rbtn_ids,
++	.drv.pm = &rbtn_pm_ops,
+ 	.ops = {
+ 		.add = rbtn_add,
+ 		.remove = rbtn_remove,
+@@ -399,6 +446,15 @@ static void rbtn_notify(struct acpi_device *device, u32 event)
+ {
+ 	struct rbtn_data *rbtn_data = device->driver_data;
+ 
++	/*
++	 * Some BIOSes send a notification at resume.
++	 * Ignore it to prevent unwanted input events.
++	 */
++	if (rbtn_data->suspended) {
++		dev_dbg(&device->dev, "ACPI notification ignored\n");
++		return;
++	}
++
+ 	if (event != 0x80) {
+ 		dev_info(&device->dev, "Received unknown event (0x%x)\n",
+ 			 event);
+diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
+index 97074c91e328..6b5811de6519 100644
+--- a/drivers/scsi/scsi_scan.c
++++ b/drivers/scsi/scsi_scan.c
+@@ -316,6 +316,7 @@ static void scsi_target_destroy(struct scsi_target *starget)
+ 	struct Scsi_Host *shost = dev_to_shost(dev->parent);
+ 	unsigned long flags;
+ 
++	BUG_ON(starget->state == STARGET_DEL);
+ 	starget->state = STARGET_DEL;
+ 	transport_destroy_device(dev);
+ 	spin_lock_irqsave(shost->host_lock, flags);
+diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
+index 2b642b145be1..c7e4b5e0bae3 100644
+--- a/drivers/scsi/scsi_sysfs.c
++++ b/drivers/scsi/scsi_sysfs.c
+@@ -1366,18 +1366,18 @@ static void __scsi_remove_target(struct scsi_target *starget)
+ void scsi_remove_target(struct device *dev)
+ {
+ 	struct Scsi_Host *shost = dev_to_shost(dev->parent);
+-	struct scsi_target *starget, *last_target = NULL;
++	struct scsi_target *starget;
+ 	unsigned long flags;
+ 
+ restart:
+ 	spin_lock_irqsave(shost->host_lock, flags);
+ 	list_for_each_entry(starget, &shost->__targets, siblings) {
+ 		if (starget->state == STARGET_DEL ||
+-		    starget == last_target)
++		    starget->state == STARGET_REMOVE)
+ 			continue;
+ 		if (starget->dev.parent == dev || &starget->dev == dev) {
+ 			kref_get(&starget->reap_ref);
+-			last_target = starget;
++			starget->state = STARGET_REMOVE;
+ 			spin_unlock_irqrestore(shost->host_lock, flags);
+ 			__scsi_remove_target(starget);
+ 			scsi_target_reap(starget);
+diff --git a/drivers/staging/comedi/drivers/das1800.c b/drivers/staging/comedi/drivers/das1800.c
+index 940781183fac..3be10963f98b 100644
+--- a/drivers/staging/comedi/drivers/das1800.c
++++ b/drivers/staging/comedi/drivers/das1800.c
+@@ -567,14 +567,17 @@ static int das1800_cancel(struct comedi_device *dev, struct comedi_subdevice *s)
+ 	struct comedi_isadma_desc *desc;
+ 	int i;
+ 
+-	outb(0x0, dev->iobase + DAS1800_STATUS);	/* disable conversions */
+-	outb(0x0, dev->iobase + DAS1800_CONTROL_B);	/* disable interrupts and dma */
+-	outb(0x0, dev->iobase + DAS1800_CONTROL_A);	/* disable and clear fifo and stop triggering */
+-
+-	for (i = 0; i < 2; i++) {
+-		desc = &dma->desc[i];
+-		if (desc->chan)
+-			comedi_isadma_disable(desc->chan);
++	/* disable and stop conversions */
++	outb(0x0, dev->iobase + DAS1800_STATUS);
++	outb(0x0, dev->iobase + DAS1800_CONTROL_B);
++	outb(0x0, dev->iobase + DAS1800_CONTROL_A);
++
++	if (dma) {
++		for (i = 0; i < 2; i++) {
++			desc = &dma->desc[i];
++			if (desc->chan)
++				comedi_isadma_disable(desc->chan);
++		}
+ 	}
+ 
+ 	return 0;
+@@ -934,13 +937,14 @@ static void das1800_ai_setup_dma(struct comedi_device *dev,
+ {
+ 	struct das1800_private *devpriv = dev->private;
+ 	struct comedi_isadma *dma = devpriv->dma;
+-	struct comedi_isadma_desc *desc = &dma->desc[0];
++	struct comedi_isadma_desc *desc;
+ 	unsigned int bytes;
+ 
+ 	if ((devpriv->irq_dma_bits & DMA_ENABLED) == 0)
+ 		return;
+ 
+ 	dma->cur_dma = 0;
++	desc = &dma->desc[0];
+ 
+ 	/* determine a dma transfer size to fill buffer in 0.3 sec */
+ 	bytes = das1800_ai_transfer_size(dev, s, desc->maxsize, 300000000);
+diff --git a/drivers/thunderbolt/eeprom.c b/drivers/thunderbolt/eeprom.c
+index 0dde34e3a7c5..545c60c826a1 100644
+--- a/drivers/thunderbolt/eeprom.c
++++ b/drivers/thunderbolt/eeprom.c
+@@ -444,6 +444,7 @@ int tb_drom_read(struct tb_switch *sw)
+ 	return tb_drom_parse_entries(sw);
+ err:
+ 	kfree(sw->drom);
++	sw->drom = NULL;
+ 	return -EIO;
+ 
+ }
+diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
+index c01620780f5b..365dfd8bc42b 100644
+--- a/drivers/tty/n_gsm.c
++++ b/drivers/tty/n_gsm.c
+@@ -2045,7 +2045,9 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm)
+ 		}
+ 	}
+ 	spin_unlock(&gsm_mux_lock);
+-	WARN_ON(i == MAX_MUX);
++	/* open failed before registering => nothing to do */
++	if (i == MAX_MUX)
++		return;
+ 
+ 	/* In theory disconnecting DLCI 0 is sufficient but for some
+ 	   modems this is apparently not the case. */
+diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
+index bcaba17688f6..a7fa016f31eb 100644
+--- a/drivers/tty/n_hdlc.c
++++ b/drivers/tty/n_hdlc.c
+@@ -599,7 +599,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file,
+ 	add_wait_queue(&tty->read_wait, &wait);
+ 
+ 	for (;;) {
+-		if (test_bit(TTY_OTHER_DONE, &tty->flags)) {
++		if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) {
+ 			ret = -EIO;
+ 			break;
+ 		}
+@@ -827,7 +827,7 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
+ 		/* set bits for operations that won't block */
+ 		if (n_hdlc->rx_buf_list.head)
+ 			mask |= POLLIN | POLLRDNORM;	/* readable */
+-		if (test_bit(TTY_OTHER_DONE, &tty->flags))
++		if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
+ 			mask |= POLLHUP;
+ 		if (tty_hung_up_p(filp))
+ 			mask |= POLLHUP;
+diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
+index fb76a7d80e7e..bdf0e6e89991 100644
+--- a/drivers/tty/n_tty.c
++++ b/drivers/tty/n_tty.c
+@@ -1917,18 +1917,6 @@ static inline int input_available_p(struct tty_struct *tty, int poll)
+ 		return ldata->commit_head - ldata->read_tail >= amt;
+ }
+ 
+-static inline int check_other_done(struct tty_struct *tty)
+-{
+-	int done = test_bit(TTY_OTHER_DONE, &tty->flags);
+-	if (done) {
+-		/* paired with cmpxchg() in check_other_closed(); ensures
+-		 * read buffer head index is not stale
+-		 */
+-		smp_mb__after_atomic();
+-	}
+-	return done;
+-}
+-
+ /**
+  *	copy_from_read_buf	-	copy read data directly
+  *	@tty: terminal device
+@@ -2124,7 +2112,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
+ 	struct n_tty_data *ldata = tty->disc_data;
+ 	unsigned char __user *b = buf;
+ 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+-	int c, done;
++	int c;
+ 	int minimum, time;
+ 	ssize_t retval = 0;
+ 	long timeout;
+@@ -2183,32 +2171,35 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
+ 			break;
+ 		}
+ 
+-		done = check_other_done(tty);
+-
+ 		if (!input_available_p(tty, 0)) {
+-			if (done) {
+-				retval = -EIO;
+-				break;
+-			}
+-			if (tty_hung_up_p(file))
+-				break;
+-			if (!timeout)
+-				break;
+-			if (file->f_flags & O_NONBLOCK) {
+-				retval = -EAGAIN;
+-				break;
+-			}
+-			if (signal_pending(current)) {
+-				retval = -ERESTARTSYS;
+-				break;
+-			}
+ 			up_read(&tty->termios_rwsem);
++			tty_buffer_flush_work(tty->port);
++			down_read(&tty->termios_rwsem);
++			if (!input_available_p(tty, 0)) {
++				if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) {
++					retval = -EIO;
++					break;
++				}
++				if (tty_hung_up_p(file))
++					break;
++				if (!timeout)
++					break;
++				if (file->f_flags & O_NONBLOCK) {
++					retval = -EAGAIN;
++					break;
++				}
++				if (signal_pending(current)) {
++					retval = -ERESTARTSYS;
++					break;
++				}
++				up_read(&tty->termios_rwsem);
+ 
+-			timeout = wait_woken(&wait, TASK_INTERRUPTIBLE,
+-					     timeout);
++				timeout = wait_woken(&wait, TASK_INTERRUPTIBLE,
++						timeout);
+ 
+-			down_read(&tty->termios_rwsem);
+-			continue;
++				down_read(&tty->termios_rwsem);
++				continue;
++			}
+ 		}
+ 
+ 		if (ldata->icanon && !L_EXTPROC(tty)) {
+@@ -2386,12 +2377,17 @@ static unsigned int n_tty_poll(struct tty_struct *tty, struct file *file,
+ 
+ 	poll_wait(file, &tty->read_wait, wait);
+ 	poll_wait(file, &tty->write_wait, wait);
+-	if (check_other_done(tty))
+-		mask |= POLLHUP;
+ 	if (input_available_p(tty, 1))
+ 		mask |= POLLIN | POLLRDNORM;
++	else {
++		tty_buffer_flush_work(tty->port);
++		if (input_available_p(tty, 1))
++			mask |= POLLIN | POLLRDNORM;
++	}
+ 	if (tty->packet && tty->link->ctrl_status)
+ 		mask |= POLLPRI | POLLIN | POLLRDNORM;
++	if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
++		mask |= POLLHUP;
+ 	if (tty_hung_up_p(file))
+ 		mask |= POLLHUP;
+ 	if (tty->ops->write && !tty_is_writelocked(tty) &&
+diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
+index cf0dc51a2690..e198996c5b83 100644
+--- a/drivers/tty/pty.c
++++ b/drivers/tty/pty.c
+@@ -59,7 +59,7 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
+ 	if (!tty->link)
+ 		return;
+ 	set_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+-	tty_flip_buffer_push(tty->link->port);
++	wake_up_interruptible(&tty->link->read_wait);
+ 	wake_up_interruptible(&tty->link->write_wait);
+ 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ 		set_bit(TTY_OTHER_CLOSED, &tty->flags);
+@@ -247,9 +247,7 @@ static int pty_open(struct tty_struct *tty, struct file *filp)
+ 		goto out;
+ 
+ 	clear_bit(TTY_IO_ERROR, &tty->flags);
+-	/* TTY_OTHER_CLOSED must be cleared before TTY_OTHER_DONE */
+ 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+-	clear_bit(TTY_OTHER_DONE, &tty->link->flags);
+ 	set_bit(TTY_THROTTLED, &tty->flags);
+ 	return 0;
+ 
+diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c
+index 88531a36b69c..ed489880e62b 100644
+--- a/drivers/tty/serial/8250/8250_mid.c
++++ b/drivers/tty/serial/8250/8250_mid.c
+@@ -14,6 +14,7 @@
+ #include <linux/pci.h>
+ 
+ #include <linux/dma/hsu.h>
++#include <linux/8250_pci.h>
+ 
+ #include "8250.h"
+ 
+@@ -24,6 +25,7 @@
+ #define PCI_DEVICE_ID_INTEL_DNV_UART	0x19d8
+ 
+ /* Intel MID Specific registers */
++#define INTEL_MID_UART_DNV_FISR		0x08
+ #define INTEL_MID_UART_PS		0x30
+ #define INTEL_MID_UART_MUL		0x34
+ #define INTEL_MID_UART_DIV		0x38
+@@ -31,6 +33,7 @@
+ struct mid8250;
+ 
+ struct mid8250_board {
++	unsigned int flags;
+ 	unsigned long freq;
+ 	unsigned int base_baud;
+ 	int (*setup)(struct mid8250 *, struct uart_port *p);
+@@ -88,16 +91,16 @@ static int tng_setup(struct mid8250 *mid, struct uart_port *p)
+ static int dnv_handle_irq(struct uart_port *p)
+ {
+ 	struct mid8250 *mid = p->private_data;
+-	int ret;
+-
+-	ret = hsu_dma_irq(&mid->dma_chip, 0);
+-	ret |= hsu_dma_irq(&mid->dma_chip, 1);
+-
+-	/* For now, letting the HW generate separate interrupt for the UART */
+-	if (ret)
+-		return ret;
+-
+-	return serial8250_handle_irq(p, serial_port_in(p, UART_IIR));
++	unsigned int fisr = serial_port_in(p, INTEL_MID_UART_DNV_FISR);
++	int ret = IRQ_NONE;
++
++	if (fisr & BIT(2))
++		ret |= hsu_dma_irq(&mid->dma_chip, 1);
++	if (fisr & BIT(1))
++		ret |= hsu_dma_irq(&mid->dma_chip, 0);
++	if (fisr & BIT(0))
++		ret |= serial8250_handle_irq(p, serial_port_in(p, UART_IIR));
++	return ret;
+ }
+ 
+ #define DNV_DMA_CHAN_OFFSET 0x80
+@@ -106,12 +109,13 @@ static int dnv_setup(struct mid8250 *mid, struct uart_port *p)
+ {
+ 	struct hsu_dma_chip *chip = &mid->dma_chip;
+ 	struct pci_dev *pdev = to_pci_dev(p->dev);
++	unsigned int bar = FL_GET_BASE(mid->board->flags);
+ 	int ret;
+ 
+ 	chip->dev = &pdev->dev;
+ 	chip->irq = pdev->irq;
+ 	chip->regs = p->membase;
+-	chip->length = pci_resource_len(pdev, 0);
++	chip->length = pci_resource_len(pdev, bar);
+ 	chip->offset = DNV_DMA_CHAN_OFFSET;
+ 
+ 	/* Falling back to PIO mode if DMA probing fails */
+@@ -217,6 +221,7 @@ static int mid8250_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ {
+ 	struct uart_8250_port uart;
+ 	struct mid8250 *mid;
++	unsigned int bar;
+ 	int ret;
+ 
+ 	ret = pcim_enable_device(pdev);
+@@ -230,6 +235,7 @@ static int mid8250_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ 		return -ENOMEM;
+ 
+ 	mid->board = (struct mid8250_board *)id->driver_data;
++	bar = FL_GET_BASE(mid->board->flags);
+ 
+ 	memset(&uart, 0, sizeof(struct uart_8250_port));
+ 
+@@ -242,8 +248,8 @@ static int mid8250_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ 	uart.port.flags = UPF_SHARE_IRQ | UPF_FIXED_PORT | UPF_FIXED_TYPE;
+ 	uart.port.set_termios = mid8250_set_termios;
+ 
+-	uart.port.mapbase = pci_resource_start(pdev, 0);
+-	uart.port.membase = pcim_iomap(pdev, 0, 0);
++	uart.port.mapbase = pci_resource_start(pdev, bar);
++	uart.port.membase = pcim_iomap(pdev, bar, 0);
+ 	if (!uart.port.membase)
+ 		return -ENOMEM;
+ 
+@@ -282,18 +288,21 @@ static void mid8250_remove(struct pci_dev *pdev)
+ }
+ 
+ static const struct mid8250_board pnw_board = {
++	.flags = FL_BASE0,
+ 	.freq = 50000000,
+ 	.base_baud = 115200,
+ 	.setup = pnw_setup,
+ };
+ 
+ static const struct mid8250_board tng_board = {
++	.flags = FL_BASE0,
+ 	.freq = 38400000,
+ 	.base_baud = 1843200,
+ 	.setup = tng_setup,
+ };
+ 
+ static const struct mid8250_board dnv_board = {
++	.flags = FL_BASE1,
+ 	.freq = 133333333,
+ 	.base_baud = 115200,
+ 	.setup = dnv_setup,
+diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
+index 98862aa5bb58..4eedd1da32e6 100644
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -1377,6 +1377,9 @@ byt_set_termios(struct uart_port *p, struct ktermios *termios,
+ 	unsigned long m, n;
+ 	u32 reg;
+ 
++	/* Gracefully handle the B0 case: fall back to B9600 */
++	fuart = fuart ? fuart : 9600 * 16;
++
+ 	/* Get Fuart closer to Fref */
+ 	fuart *= rounddown_pow_of_two(fref / fuart);
+ 
+diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
+index d9439e6ab719..954941dd8124 100644
+--- a/drivers/tty/serial/atmel_serial.c
++++ b/drivers/tty/serial/atmel_serial.c
+@@ -274,6 +274,13 @@ static bool atmel_use_dma_rx(struct uart_port *port)
+ 	return atmel_port->use_dma_rx;
+ }
+ 
++static bool atmel_use_fifo(struct uart_port *port)
++{
++	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
++
++	return atmel_port->fifo_size;
++}
++
+ static unsigned int atmel_get_lines_status(struct uart_port *port)
+ {
+ 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+@@ -2090,7 +2097,12 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
+ 		mode |= ATMEL_US_USMODE_RS485;
+ 	} else if (termios->c_cflag & CRTSCTS) {
+ 		/* RS232 with hardware handshake (RTS/CTS) */
+-		mode |= ATMEL_US_USMODE_HWHS;
++		if (atmel_use_dma_rx(port) && !atmel_use_fifo(port)) {
++			dev_info(port->dev, "not enabling hardware flow control because DMA is used");
++			termios->c_cflag &= ~CRTSCTS;
++		} else {
++			mode |= ATMEL_US_USMODE_HWHS;
++		}
+ 	} else {
+ 		/* RS232 without hadware handshake */
+ 		mode |= ATMEL_US_USMODE_NORMAL;
+diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
+index ac7f8df54406..99bb23161dd6 100644
+--- a/drivers/tty/serial/samsung.c
++++ b/drivers/tty/serial/samsung.c
+@@ -1271,6 +1271,8 @@ static void s3c24xx_serial_set_termios(struct uart_port *port,
+ 	/* check to see if we need  to change clock source */
+ 
+ 	if (ourport->baudclk != clk) {
++		clk_prepare_enable(clk);
++
+ 		s3c24xx_serial_setsource(port, clk_sel);
+ 
+ 		if (!IS_ERR(ourport->baudclk)) {
+@@ -1278,8 +1280,6 @@ static void s3c24xx_serial_set_termios(struct uart_port *port,
+ 			ourport->baudclk = ERR_PTR(-EINVAL);
+ 		}
+ 
+-		clk_prepare_enable(clk);
+-
+ 		ourport->baudclk = clk;
+ 		ourport->baudclk_rate = clk ? clk_get_rate(clk) : 0;
+ 	}
+diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
+index a946e49a2626..aa80dc94ddc2 100644
+--- a/drivers/tty/tty_buffer.c
++++ b/drivers/tty/tty_buffer.c
+@@ -37,29 +37,6 @@
+ 
+ #define TTY_BUFFER_PAGE	(((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~0xFF)
+ 
+-/*
+- * If all tty flip buffers have been processed by flush_to_ldisc() or
+- * dropped by tty_buffer_flush(), check if the linked pty has been closed.
+- * If so, wake the reader/poll to process
+- */
+-static inline void check_other_closed(struct tty_struct *tty)
+-{
+-	unsigned long flags, old;
+-
+-	/* transition from TTY_OTHER_CLOSED => TTY_OTHER_DONE must be atomic */
+-	for (flags = ACCESS_ONCE(tty->flags);
+-	     test_bit(TTY_OTHER_CLOSED, &flags);
+-	     ) {
+-		old = flags;
+-		__set_bit(TTY_OTHER_DONE, &flags);
+-		flags = cmpxchg(&tty->flags, old, flags);
+-		if (old == flags) {
+-			wake_up_interruptible(&tty->read_wait);
+-			break;
+-		}
+-	}
+-}
+-
+ /**
+  *	tty_buffer_lock_exclusive	-	gain exclusive access to buffer
+  *	tty_buffer_unlock_exclusive	-	release exclusive access
+@@ -254,8 +231,6 @@ void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld)
+ 	if (ld && ld->ops->flush_buffer)
+ 		ld->ops->flush_buffer(tty);
+ 
+-	check_other_closed(tty);
+-
+ 	atomic_dec(&buf->priority);
+ 	mutex_unlock(&buf->lock);
+ }
+@@ -522,10 +497,8 @@ static void flush_to_ldisc(struct work_struct *work)
+ 		 */
+ 		count = smp_load_acquire(&head->commit) - head->read;
+ 		if (!count) {
+-			if (next == NULL) {
+-				check_other_closed(tty);
++			if (next == NULL)
+ 				break;
+-			}
+ 			buf->head = next;
+ 			tty_buffer_free(port, head);
+ 			continue;
+@@ -614,3 +587,8 @@ bool tty_buffer_cancel_work(struct tty_port *port)
+ {
+ 	return cancel_work_sync(&port->buf.work);
+ }
++
++void tty_buffer_flush_work(struct tty_port *port)
++{
++	flush_work(&port->buf.work);
++}
+diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
+index 3e3c7575e92d..bd523adb9643 100644
+--- a/drivers/tty/vt/vt.c
++++ b/drivers/tty/vt/vt.c
+@@ -3583,9 +3583,10 @@ static int do_register_con_driver(const struct consw *csw, int first, int last)
+ 		goto err;
+ 
+ 	desc = csw->con_startup();
+-
+-	if (!desc)
++	if (!desc) {
++		retval = -ENODEV;
+ 		goto err;
++	}
+ 
+ 	retval = -EINVAL;
+ 
+diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
+index 2057d91d8336..dadd1e8dfe09 100644
+--- a/drivers/usb/core/driver.c
++++ b/drivers/usb/core/driver.c
+@@ -284,7 +284,7 @@ static int usb_probe_interface(struct device *dev)
+ 	struct usb_device *udev = interface_to_usbdev(intf);
+ 	const struct usb_device_id *id;
+ 	int error = -ENODEV;
+-	int lpm_disable_error;
++	int lpm_disable_error = -ENODEV;
+ 
+ 	dev_dbg(dev, "%s\n", __func__);
+ 
+@@ -336,12 +336,14 @@ static int usb_probe_interface(struct device *dev)
+ 	 * setting during probe, that should also be fine.  usb_set_interface()
+ 	 * will attempt to disable LPM, and fail if it can't disable it.
+ 	 */
+-	lpm_disable_error = usb_unlocked_disable_lpm(udev);
+-	if (lpm_disable_error && driver->disable_hub_initiated_lpm) {
+-		dev_err(&intf->dev, "%s Failed to disable LPM for driver %s\n.",
+-				__func__, driver->name);
+-		error = lpm_disable_error;
+-		goto err;
++	if (driver->disable_hub_initiated_lpm) {
++		lpm_disable_error = usb_unlocked_disable_lpm(udev);
++		if (lpm_disable_error) {
++			dev_err(&intf->dev, "%s Failed to disable LPM for driver %s\n.",
++					__func__, driver->name);
++			error = lpm_disable_error;
++			goto err;
++		}
+ 	}
+ 
+ 	/* Carry out a deferred switch to altsetting 0 */
+@@ -391,7 +393,8 @@ static int usb_unbind_interface(struct device *dev)
+ 	struct usb_interface *intf = to_usb_interface(dev);
+ 	struct usb_host_endpoint *ep, **eps = NULL;
+ 	struct usb_device *udev;
+-	int i, j, error, r, lpm_disable_error;
++	int i, j, error, r;
++	int lpm_disable_error = -ENODEV;
+ 
+ 	intf->condition = USB_INTERFACE_UNBINDING;
+ 
+@@ -399,12 +402,13 @@ static int usb_unbind_interface(struct device *dev)
+ 	udev = interface_to_usbdev(intf);
+ 	error = usb_autoresume_device(udev);
+ 
+-	/* Hub-initiated LPM policy may change, so attempt to disable LPM until
++	/* If hub-initiated LPM policy may change, attempt to disable LPM until
+ 	 * the driver is unbound.  If LPM isn't disabled, that's fine because it
+ 	 * wouldn't be enabled unless all the bound interfaces supported
+ 	 * hub-initiated LPM.
+ 	 */
+-	lpm_disable_error = usb_unlocked_disable_lpm(udev);
++	if (driver->disable_hub_initiated_lpm)
++		lpm_disable_error = usb_unlocked_disable_lpm(udev);
+ 
+ 	/*
+ 	 * Terminate all URBs for this interface unless the driver
+@@ -505,7 +509,7 @@ int usb_driver_claim_interface(struct usb_driver *driver,
+ 	struct device *dev;
+ 	struct usb_device *udev;
+ 	int retval = 0;
+-	int lpm_disable_error;
++	int lpm_disable_error = -ENODEV;
+ 
+ 	if (!iface)
+ 		return -ENODEV;
+@@ -526,12 +530,14 @@ int usb_driver_claim_interface(struct usb_driver *driver,
+ 
+ 	iface->condition = USB_INTERFACE_BOUND;
+ 
+-	/* Disable LPM until this driver is bound. */
+-	lpm_disable_error = usb_unlocked_disable_lpm(udev);
+-	if (lpm_disable_error && driver->disable_hub_initiated_lpm) {
+-		dev_err(&iface->dev, "%s Failed to disable LPM for driver %s\n.",
+-				__func__, driver->name);
+-		return -ENOMEM;
++	/* See the comment about disabling LPM in usb_probe_interface(). */
++	if (driver->disable_hub_initiated_lpm) {
++		lpm_disable_error = usb_unlocked_disable_lpm(udev);
++		if (lpm_disable_error) {
++			dev_err(&iface->dev, "%s Failed to disable LPM for driver %s\n.",
++					__func__, driver->name);
++			return -ENOMEM;
++		}
+ 	}
+ 
+ 	/* Claimed interfaces are initially inactive (suspended) and
+diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
+index 2ca2cef7f681..980fc5774151 100644
+--- a/drivers/usb/core/hcd.c
++++ b/drivers/usb/core/hcd.c
+@@ -994,7 +994,7 @@ static void usb_bus_init (struct usb_bus *bus)
+ 	bus->bandwidth_allocated = 0;
+ 	bus->bandwidth_int_reqs  = 0;
+ 	bus->bandwidth_isoc_reqs = 0;
+-	mutex_init(&bus->usb_address0_mutex);
++	mutex_init(&bus->devnum_next_mutex);
+ }
+ 
+ /*-------------------------------------------------------------------------*/
+@@ -2521,6 +2521,14 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver,
+ 		return NULL;
+ 	}
+ 	if (primary_hcd == NULL) {
++		hcd->address0_mutex = kmalloc(sizeof(*hcd->address0_mutex),
++				GFP_KERNEL);
++		if (!hcd->address0_mutex) {
++			kfree(hcd);
++			dev_dbg(dev, "hcd address0 mutex alloc failed\n");
++			return NULL;
++		}
++		mutex_init(hcd->address0_mutex);
+ 		hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex),
+ 				GFP_KERNEL);
+ 		if (!hcd->bandwidth_mutex) {
+@@ -2532,6 +2540,7 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver,
+ 		dev_set_drvdata(dev, hcd);
+ 	} else {
+ 		mutex_lock(&usb_port_peer_mutex);
++		hcd->address0_mutex = primary_hcd->address0_mutex;
+ 		hcd->bandwidth_mutex = primary_hcd->bandwidth_mutex;
+ 		hcd->primary_hcd = primary_hcd;
+ 		primary_hcd->primary_hcd = primary_hcd;
+@@ -2598,8 +2607,10 @@ static void hcd_release(struct kref *kref)
+ 	struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);
+ 
+ 	mutex_lock(&usb_port_peer_mutex);
+-	if (usb_hcd_is_primary_hcd(hcd))
++	if (usb_hcd_is_primary_hcd(hcd)) {
++		kfree(hcd->address0_mutex);
+ 		kfree(hcd->bandwidth_mutex);
++	}
+ 	if (hcd->shared_hcd) {
+ 		struct usb_hcd *peer = hcd->shared_hcd;
+ 
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index 38cc4bae0a82..1ab42bfbecaf 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -2080,7 +2080,7 @@ static void choose_devnum(struct usb_device *udev)
+ 	struct usb_bus	*bus = udev->bus;
+ 
+ 	/* be safe when more hub events are proceed in parallel */
+-	mutex_lock(&bus->usb_address0_mutex);
++	mutex_lock(&bus->devnum_next_mutex);
+ 	if (udev->wusb) {
+ 		devnum = udev->portnum + 1;
+ 		BUG_ON(test_bit(devnum, bus->devmap.devicemap));
+@@ -2098,7 +2098,7 @@ static void choose_devnum(struct usb_device *udev)
+ 		set_bit(devnum, bus->devmap.devicemap);
+ 		udev->devnum = devnum;
+ 	}
+-	mutex_unlock(&bus->usb_address0_mutex);
++	mutex_unlock(&bus->devnum_next_mutex);
+ }
+ 
+ static void release_devnum(struct usb_device *udev)
+@@ -4364,7 +4364,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
+ 	if (oldspeed == USB_SPEED_LOW)
+ 		delay = HUB_LONG_RESET_TIME;
+ 
+-	mutex_lock(&hdev->bus->usb_address0_mutex);
++	mutex_lock(hcd->address0_mutex);
+ 
+ 	/* Reset the device; full speed may morph to high speed */
+ 	/* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
+@@ -4650,7 +4650,7 @@ fail:
+ 		hub_port_disable(hub, port1, 0);
+ 		update_devnum(udev, devnum);	/* for disconnect processing */
+ 	}
+-	mutex_unlock(&hdev->bus->usb_address0_mutex);
++	mutex_unlock(hcd->address0_mutex);
+ 	return retval;
+ }
+ 
+diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
+index 15b648cbc75c..73515d54e1cc 100644
+--- a/drivers/usb/gadget/function/f_fs.c
++++ b/drivers/usb/gadget/function/f_fs.c
+@@ -651,7 +651,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
+ 	if (io_data->read && ret > 0) {
+ 		use_mm(io_data->mm);
+ 		ret = copy_to_iter(io_data->buf, ret, &io_data->data);
+-		if (iov_iter_count(&io_data->data))
++		if (ret != io_data->req->actual && iov_iter_count(&io_data->data))
+ 			ret = -EFAULT;
+ 		unuse_mm(io_data->mm);
+ 	}
+diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
+index acf210f16328..5c6d4d7ca605 100644
+--- a/drivers/usb/gadget/function/f_mass_storage.c
++++ b/drivers/usb/gadget/function/f_mass_storage.c
+@@ -2977,25 +2977,6 @@ void fsg_common_set_inquiry_string(struct fsg_common *common, const char *vn,
+ }
+ EXPORT_SYMBOL_GPL(fsg_common_set_inquiry_string);
+ 
+-int fsg_common_run_thread(struct fsg_common *common)
+-{
+-	common->state = FSG_STATE_IDLE;
+-	/* Tell the thread to start working */
+-	common->thread_task =
+-		kthread_create(fsg_main_thread, common, "file-storage");
+-	if (IS_ERR(common->thread_task)) {
+-		common->state = FSG_STATE_TERMINATED;
+-		return PTR_ERR(common->thread_task);
+-	}
+-
+-	DBG(common, "I/O thread pid: %d\n", task_pid_nr(common->thread_task));
+-
+-	wake_up_process(common->thread_task);
+-
+-	return 0;
+-}
+-EXPORT_SYMBOL_GPL(fsg_common_run_thread);
+-
+ static void fsg_common_release(struct kref *ref)
+ {
+ 	struct fsg_common *common = container_of(ref, struct fsg_common, ref);
+@@ -3005,6 +2986,7 @@ static void fsg_common_release(struct kref *ref)
+ 	if (common->state != FSG_STATE_TERMINATED) {
+ 		raise_exception(common, FSG_STATE_EXIT);
+ 		wait_for_completion(&common->thread_notifier);
++		common->thread_task = NULL;
+ 	}
+ 
+ 	for (i = 0; i < ARRAY_SIZE(common->luns); ++i) {
+@@ -3050,9 +3032,21 @@ static int fsg_bind(struct usb_configuration *c, struct usb_function *f)
+ 		if (ret)
+ 			return ret;
+ 		fsg_common_set_inquiry_string(fsg->common, NULL, NULL);
+-		ret = fsg_common_run_thread(fsg->common);
+-		if (ret)
++	}
++
++	if (!common->thread_task) {
++		common->state = FSG_STATE_IDLE;
++		common->thread_task =
++			kthread_create(fsg_main_thread, common, "file-storage");
++		if (IS_ERR(common->thread_task)) {
++			int ret = PTR_ERR(common->thread_task);
++			common->thread_task = NULL;
++			common->state = FSG_STATE_TERMINATED;
+ 			return ret;
++		}
++		DBG(common, "I/O thread pid: %d\n",
++		    task_pid_nr(common->thread_task));
++		wake_up_process(common->thread_task);
+ 	}
+ 
+ 	fsg->gadget = gadget;
+diff --git a/drivers/usb/gadget/function/f_mass_storage.h b/drivers/usb/gadget/function/f_mass_storage.h
+index 445df6775609..b6a9918eaefb 100644
+--- a/drivers/usb/gadget/function/f_mass_storage.h
++++ b/drivers/usb/gadget/function/f_mass_storage.h
+@@ -153,8 +153,6 @@ int fsg_common_create_luns(struct fsg_common *common, struct fsg_config *cfg);
+ void fsg_common_set_inquiry_string(struct fsg_common *common, const char *vn,
+ 				   const char *pn);
+ 
+-int fsg_common_run_thread(struct fsg_common *common);
+-
+ void fsg_config_from_params(struct fsg_config *cfg,
+ 			    const struct fsg_module_parameters *params,
+ 			    unsigned int fsg_num_buffers);
+diff --git a/drivers/usb/gadget/legacy/acm_ms.c b/drivers/usb/gadget/legacy/acm_ms.c
+index c16089efc322..c39de65a448b 100644
+--- a/drivers/usb/gadget/legacy/acm_ms.c
++++ b/drivers/usb/gadget/legacy/acm_ms.c
+@@ -133,10 +133,6 @@ static int acm_ms_do_config(struct usb_configuration *c)
+ 	if (status < 0)
+ 		goto put_msg;
+ 
+-	status = fsg_common_run_thread(opts->common);
+-	if (status)
+-		goto remove_acm;
+-
+ 	status = usb_add_function(c, f_msg);
+ 	if (status)
+ 		goto remove_acm;
+diff --git a/drivers/usb/gadget/legacy/mass_storage.c b/drivers/usb/gadget/legacy/mass_storage.c
+index e61af53c7d2b..125974f32f50 100644
+--- a/drivers/usb/gadget/legacy/mass_storage.c
++++ b/drivers/usb/gadget/legacy/mass_storage.c
+@@ -132,10 +132,6 @@ static int msg_do_config(struct usb_configuration *c)
+ 	if (IS_ERR(f_msg))
+ 		return PTR_ERR(f_msg);
+ 
+-	ret = fsg_common_run_thread(opts->common);
+-	if (ret)
+-		goto put_func;
+-
+ 	ret = usb_add_function(c, f_msg);
+ 	if (ret)
+ 		goto put_func;
+diff --git a/drivers/usb/gadget/legacy/multi.c b/drivers/usb/gadget/legacy/multi.c
+index 229d704a620b..a70a406580ea 100644
+--- a/drivers/usb/gadget/legacy/multi.c
++++ b/drivers/usb/gadget/legacy/multi.c
+@@ -137,7 +137,6 @@ static struct usb_function *f_msg_rndis;
+ 
+ static int rndis_do_config(struct usb_configuration *c)
+ {
+-	struct fsg_opts *fsg_opts;
+ 	int ret;
+ 
+ 	if (gadget_is_otg(c->cdev->gadget)) {
+@@ -169,11 +168,6 @@ static int rndis_do_config(struct usb_configuration *c)
+ 		goto err_fsg;
+ 	}
+ 
+-	fsg_opts = fsg_opts_from_func_inst(fi_msg);
+-	ret = fsg_common_run_thread(fsg_opts->common);
+-	if (ret)
+-		goto err_run;
+-
+ 	ret = usb_add_function(c, f_msg_rndis);
+ 	if (ret)
+ 		goto err_run;
+@@ -225,7 +219,6 @@ static struct usb_function *f_msg_multi;
+ 
+ static int cdc_do_config(struct usb_configuration *c)
+ {
+-	struct fsg_opts *fsg_opts;
+ 	int ret;
+ 
+ 	if (gadget_is_otg(c->cdev->gadget)) {
+@@ -258,11 +251,6 @@ static int cdc_do_config(struct usb_configuration *c)
+ 		goto err_fsg;
+ 	}
+ 
+-	fsg_opts = fsg_opts_from_func_inst(fi_msg);
+-	ret = fsg_common_run_thread(fsg_opts->common);
+-	if (ret)
+-		goto err_run;
+-
+ 	ret = usb_add_function(c, f_msg_multi);
+ 	if (ret)
+ 		goto err_run;
+diff --git a/drivers/usb/gadget/legacy/nokia.c b/drivers/usb/gadget/legacy/nokia.c
+index 09975046c694..b1e535f4022e 100644
+--- a/drivers/usb/gadget/legacy/nokia.c
++++ b/drivers/usb/gadget/legacy/nokia.c
+@@ -152,7 +152,6 @@ static int nokia_bind_config(struct usb_configuration *c)
+ 	struct usb_function *f_ecm;
+ 	struct usb_function *f_obex2 = NULL;
+ 	struct usb_function *f_msg;
+-	struct fsg_opts *fsg_opts;
+ 	int status = 0;
+ 	int obex1_stat = -1;
+ 	int obex2_stat = -1;
+@@ -222,12 +221,6 @@ static int nokia_bind_config(struct usb_configuration *c)
+ 		goto err_ecm;
+ 	}
+ 
+-	fsg_opts = fsg_opts_from_func_inst(fi_msg);
+-
+-	status = fsg_common_run_thread(fsg_opts->common);
+-	if (status)
+-		goto err_msg;
+-
+ 	status = usb_add_function(c, f_msg);
+ 	if (status)
+ 		goto err_msg;
+diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
+index e4e70e11d0f6..c6e76465065a 100644
+--- a/drivers/usb/gadget/udc/udc-core.c
++++ b/drivers/usb/gadget/udc/udc-core.c
+@@ -75,7 +75,7 @@ int usb_gadget_map_request(struct usb_gadget *gadget,
+ 		mapped = dma_map_sg(dev, req->sg, req->num_sgs,
+ 				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ 		if (mapped == 0) {
+-			dev_err(&gadget->dev, "failed to map SGs\n");
++			dev_err(dev, "failed to map SGs\n");
+ 			return -EFAULT;
+ 		}
+ 
+diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
+index 3050b18b2447..e9d4dde3e9b3 100644
+--- a/drivers/usb/host/Kconfig
++++ b/drivers/usb/host/Kconfig
+@@ -35,6 +35,7 @@ config USB_XHCI_PCI
+ 
+ config USB_XHCI_PLATFORM
+ 	tristate "Generic xHCI driver for a platform device"
++	select USB_XHCI_RCAR if ARCH_RENESAS
+ 	---help---
+ 	  Adds an xHCI host driver for a generic platform device, which
+ 	  provides a memory space and an irq.
+@@ -63,7 +64,7 @@ config USB_XHCI_MVEBU
+ 
+ config USB_XHCI_RCAR
+ 	tristate "xHCI support for Renesas R-Car SoCs"
+-	select USB_XHCI_PLATFORM
++	depends on USB_XHCI_PLATFORM
+ 	depends on ARCH_RENESAS || COMPILE_TEST
+ 	---help---
+ 	  Say 'Y' to enable the support for the xHCI host controller
+diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
+index 92fdb6e9faff..c78ff95a43be 100644
+--- a/drivers/usb/misc/usbtest.c
++++ b/drivers/usb/misc/usbtest.c
+@@ -529,6 +529,7 @@ static struct scatterlist *
+ alloc_sglist(int nents, int max, int vary, struct usbtest_dev *dev, int pipe)
+ {
+ 	struct scatterlist	*sg;
++	unsigned int		n_size = 0;
+ 	unsigned		i;
+ 	unsigned		size = max;
+ 	unsigned		maxpacket =
+@@ -561,7 +562,8 @@ alloc_sglist(int nents, int max, int vary, struct usbtest_dev *dev, int pipe)
+ 			break;
+ 		case 1:
+ 			for (j = 0; j < size; j++)
+-				*buf++ = (u8) ((j % maxpacket) % 63);
++				*buf++ = (u8) (((j + n_size) % maxpacket) % 63);
++			n_size += size;
+ 			break;
+ 		}
+ 
+diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
+index 7c9f25e9c422..a7c8d26a322b 100644
+--- a/drivers/usb/serial/cp210x.c
++++ b/drivers/usb/serial/cp210x.c
+@@ -971,8 +971,7 @@ static void cp210x_set_termios(struct tty_struct *tty,
+ 		} else {
+ 			modem_ctl[0] &= ~0x7B;
+ 			modem_ctl[0] |= 0x01;
+-			/* FIXME - OR here instead of assignment looks wrong */
+-			modem_ctl[4] |= 0x40;
++			modem_ctl[4] = 0x40;
+ 			dev_dbg(dev, "%s - flow control = NONE\n", __func__);
+ 		}
+ 
+diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
+index f3007ecdd1b4..11c05ce2f35f 100644
+--- a/drivers/usb/serial/io_edgeport.c
++++ b/drivers/usb/serial/io_edgeport.c
+@@ -2849,14 +2849,16 @@ static int edge_startup(struct usb_serial *serial)
+ 				/* not set up yet, so do it now */
+ 				edge_serial->interrupt_read_urb =
+ 						usb_alloc_urb(0, GFP_KERNEL);
+-				if (!edge_serial->interrupt_read_urb)
+-					return -ENOMEM;
++				if (!edge_serial->interrupt_read_urb) {
++					response = -ENOMEM;
++					break;
++				}
+ 
+ 				edge_serial->interrupt_in_buffer =
+ 					kmalloc(buffer_size, GFP_KERNEL);
+ 				if (!edge_serial->interrupt_in_buffer) {
+-					usb_free_urb(edge_serial->interrupt_read_urb);
+-					return -ENOMEM;
++					response = -ENOMEM;
++					break;
+ 				}
+ 				edge_serial->interrupt_in_endpoint =
+ 						endpoint->bEndpointAddress;
+@@ -2884,14 +2886,16 @@ static int edge_startup(struct usb_serial *serial)
+ 				/* not set up yet, so do it now */
+ 				edge_serial->read_urb =
+ 						usb_alloc_urb(0, GFP_KERNEL);
+-				if (!edge_serial->read_urb)
+-					return -ENOMEM;
++				if (!edge_serial->read_urb) {
++					response = -ENOMEM;
++					break;
++				}
+ 
+ 				edge_serial->bulk_in_buffer =
+ 					kmalloc(buffer_size, GFP_KERNEL);
+ 				if (!edge_serial->bulk_in_buffer) {
+-					usb_free_urb(edge_serial->read_urb);
+-					return -ENOMEM;
++					response = -ENOMEM;
++					break;
+ 				}
+ 				edge_serial->bulk_in_endpoint =
+ 						endpoint->bEndpointAddress;
+@@ -2917,9 +2921,22 @@ static int edge_startup(struct usb_serial *serial)
+ 			}
+ 		}
+ 
+-		if (!interrupt_in_found || !bulk_in_found || !bulk_out_found) {
+-			dev_err(ddev, "Error - the proper endpoints were not found!\n");
+-			return -ENODEV;
++		if (response || !interrupt_in_found || !bulk_in_found ||
++							!bulk_out_found) {
++			if (!response) {
++				dev_err(ddev, "expected endpoints not found\n");
++				response = -ENODEV;
++			}
++
++			usb_free_urb(edge_serial->interrupt_read_urb);
++			kfree(edge_serial->interrupt_in_buffer);
++
++			usb_free_urb(edge_serial->read_urb);
++			kfree(edge_serial->bulk_in_buffer);
++
++			kfree(edge_serial);
++
++			return response;
+ 		}
+ 
+ 		/* start interrupt read for this edgeport this interrupt will
+@@ -2942,16 +2959,9 @@ static void edge_disconnect(struct usb_serial *serial)
+ {
+ 	struct edgeport_serial *edge_serial = usb_get_serial_data(serial);
+ 
+-	/* stop reads and writes on all ports */
+-	/* free up our endpoint stuff */
+ 	if (edge_serial->is_epic) {
+ 		usb_kill_urb(edge_serial->interrupt_read_urb);
+-		usb_free_urb(edge_serial->interrupt_read_urb);
+-		kfree(edge_serial->interrupt_in_buffer);
+-
+ 		usb_kill_urb(edge_serial->read_urb);
+-		usb_free_urb(edge_serial->read_urb);
+-		kfree(edge_serial->bulk_in_buffer);
+ 	}
+ }
+ 
+@@ -2964,6 +2974,16 @@ static void edge_release(struct usb_serial *serial)
+ {
+ 	struct edgeport_serial *edge_serial = usb_get_serial_data(serial);
+ 
++	if (edge_serial->is_epic) {
++		usb_kill_urb(edge_serial->interrupt_read_urb);
++		usb_free_urb(edge_serial->interrupt_read_urb);
++		kfree(edge_serial->interrupt_in_buffer);
++
++		usb_kill_urb(edge_serial->read_urb);
++		usb_free_urb(edge_serial->read_urb);
++		kfree(edge_serial->bulk_in_buffer);
++	}
++
+ 	kfree(edge_serial);
+ }
+ 
+diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
+index b6bd8e4a6486..1cf05883f48c 100644
+--- a/drivers/usb/serial/keyspan.c
++++ b/drivers/usb/serial/keyspan.c
+@@ -2376,6 +2376,10 @@ static void keyspan_release(struct usb_serial *serial)
+ 
+ 	s_priv = usb_get_serial_data(serial);
+ 
++	/* Make sure to unlink the URBs submitted in attach. */
++	usb_kill_urb(s_priv->instat_urb);
++	usb_kill_urb(s_priv->indat_urb);
++
+ 	usb_free_urb(s_priv->instat_urb);
+ 	usb_free_urb(s_priv->indat_urb);
+ 	usb_free_urb(s_priv->glocont_urb);
+diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c
+index 31a8b47f1ac6..c6596cbcc4b6 100644
+--- a/drivers/usb/serial/mxuport.c
++++ b/drivers/usb/serial/mxuport.c
+@@ -1259,6 +1259,15 @@ static int mxuport_attach(struct usb_serial *serial)
+ 	return 0;
+ }
+ 
++static void mxuport_release(struct usb_serial *serial)
++{
++	struct usb_serial_port *port0 = serial->port[0];
++	struct usb_serial_port *port1 = serial->port[1];
++
++	usb_serial_generic_close(port1);
++	usb_serial_generic_close(port0);
++}
++
+ static int mxuport_open(struct tty_struct *tty, struct usb_serial_port *port)
+ {
+ 	struct mxuport_port *mxport = usb_get_serial_port_data(port);
+@@ -1361,6 +1370,7 @@ static struct usb_serial_driver mxuport_device = {
+ 	.probe			= mxuport_probe,
+ 	.port_probe		= mxuport_port_probe,
+ 	.attach			= mxuport_attach,
++	.release		= mxuport_release,
+ 	.calc_num_ports		= mxuport_calc_num_ports,
+ 	.open			= mxuport_open,
+ 	.close			= mxuport_close,
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index c6f497f16526..d96d423d00e6 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -375,18 +375,22 @@ static void option_instat_callback(struct urb *urb);
+ #define HAIER_PRODUCT_CE81B			0x10f8
+ #define HAIER_PRODUCT_CE100			0x2009
+ 
+-/* Cinterion (formerly Siemens) products */
+-#define SIEMENS_VENDOR_ID				0x0681
+-#define CINTERION_VENDOR_ID				0x1e2d
++/* Gemalto's Cinterion products (formerly Siemens) */
++#define SIEMENS_VENDOR_ID			0x0681
++#define CINTERION_VENDOR_ID			0x1e2d
++#define CINTERION_PRODUCT_HC25_MDMNET		0x0040
+ #define CINTERION_PRODUCT_HC25_MDM		0x0047
+-#define CINTERION_PRODUCT_HC25_MDMNET	0x0040
++#define CINTERION_PRODUCT_HC28_MDMNET		0x004A /* same for HC28J */
+ #define CINTERION_PRODUCT_HC28_MDM		0x004C
+-#define CINTERION_PRODUCT_HC28_MDMNET	0x004A /* same for HC28J */
+ #define CINTERION_PRODUCT_EU3_E			0x0051
+ #define CINTERION_PRODUCT_EU3_P			0x0052
+ #define CINTERION_PRODUCT_PH8			0x0053
+ #define CINTERION_PRODUCT_AHXX			0x0055
+ #define CINTERION_PRODUCT_PLXX			0x0060
++#define CINTERION_PRODUCT_PH8_2RMNET		0x0082
++#define CINTERION_PRODUCT_PH8_AUDIO		0x0083
++#define CINTERION_PRODUCT_AHXX_2RMNET		0x0084
++#define CINTERION_PRODUCT_AHXX_AUDIO		0x0085
+ 
+ /* Olivetti products */
+ #define OLIVETTI_VENDOR_ID			0x0b3c
+@@ -633,6 +637,10 @@ static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = {
+ 	.reserved = BIT(1) | BIT(2) | BIT(3),
+ };
+ 
++static const struct option_blacklist_info cinterion_rmnet2_blacklist = {
++	.reserved = BIT(4) | BIT(5),
++};
++
+ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
+ 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
+@@ -1602,7 +1610,79 @@ static const struct usb_device_id option_ids[] = {
+ 		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0178, 0xff, 0xff, 0xff),
+ 		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffe9, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff42, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff43, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff44, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff45, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff46, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff47, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff48, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff49, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4a, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4b, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4c, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4d, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4e, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4f, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff50, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff51, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff52, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff53, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff54, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff55, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff56, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff57, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff58, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff59, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5a, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5b, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5c, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5d, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5e, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5f, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff60, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff61, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff62, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff63, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff64, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff65, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff66, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff67, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff68, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff69, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6a, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6b, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6c, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6d, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6e, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6f, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff70, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff71, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff72, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff73, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff74, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff75, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff76, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff77, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff78, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff79, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7a, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7b, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7c, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7d, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7e, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7f, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff80, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff81, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff82, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff83, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff84, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff85, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff86, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff87, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff88, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff89, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8a, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8b, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8c, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8d, 0xff, 0xff, 0xff) },
+@@ -1613,6 +1693,61 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff92, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff93, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff94, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff9f, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa0, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa1, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa2, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa3, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa4, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa5, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa6, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa7, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa8, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa9, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffaa, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffab, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffac, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffae, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffaf, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb0, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb1, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb2, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb3, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb4, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb5, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb6, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb7, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb8, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb9, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffba, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbb, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbc, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbd, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbe, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbf, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc0, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc1, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc2, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc3, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc4, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc5, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc6, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc7, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc8, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc9, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffca, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffcb, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffcc, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffcd, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffce, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffcf, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd0, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd1, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd2, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd3, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd4, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd5, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffe9, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffec, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffee, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xfff6, 0xff, 0xff, 0xff) },
+@@ -1712,7 +1847,13 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) },
+ 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX),
+ 		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+-	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, 
++	{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8_2RMNET, 0xff),
++		.driver_info = (kernel_ulong_t)&cinterion_rmnet2_blacklist },
++	{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8_AUDIO, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
++	{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_2RMNET, 0xff) },
++	{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_AUDIO, 0xff) },
++	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) },
+ 	{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) },
+ 	{ USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDM) },
+ 	{ USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDMNET) },
+diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
+index 2df8ad5ede89..85acb50a7ee2 100644
+--- a/drivers/usb/serial/quatech2.c
++++ b/drivers/usb/serial/quatech2.c
+@@ -141,6 +141,7 @@ static void qt2_release(struct usb_serial *serial)
+ 
+ 	serial_priv = usb_get_serial_data(serial);
+ 
++	usb_kill_urb(serial_priv->read_urb);
+ 	usb_free_urb(serial_priv->read_urb);
+ 	kfree(serial_priv->read_buffer);
+ 	kfree(serial_priv);
+diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c
+index 6467b91f2245..028618c5eeba 100644
+--- a/drivers/watchdog/sp5100_tco.c
++++ b/drivers/watchdog/sp5100_tco.c
+@@ -73,6 +73,13 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started."
+ /*
+  * Some TCO specific functions
+  */
++
++static bool tco_has_sp5100_reg_layout(struct pci_dev *dev)
++{
++	return dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
++	       dev->revision < 0x40;
++}
++
+ static void tco_timer_start(void)
+ {
+ 	u32 val;
+@@ -129,7 +136,7 @@ static void tco_timer_enable(void)
+ {
+ 	int val;
+ 
+-	if (sp5100_tco_pci->revision >= 0x40) {
++	if (!tco_has_sp5100_reg_layout(sp5100_tco_pci)) {
+ 		/* For SB800 or later */
+ 		/* Set the Watchdog timer resolution to 1 sec */
+ 		outb(SB800_PM_WATCHDOG_CONFIG, SB800_IO_PM_INDEX_REG);
+@@ -342,8 +349,7 @@ static unsigned char sp5100_tco_setupdevice(void)
+ 	/*
+ 	 * Determine type of southbridge chipset.
+ 	 */
+-	if (sp5100_tco_pci->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
+-	    sp5100_tco_pci->revision < 0x40) {
++	if (tco_has_sp5100_reg_layout(sp5100_tco_pci)) {
+ 		dev_name = SP5100_DEVNAME;
+ 		index_reg = SP5100_IO_PM_INDEX_REG;
+ 		data_reg = SP5100_IO_PM_DATA_REG;
+@@ -388,8 +394,7 @@ static unsigned char sp5100_tco_setupdevice(void)
+ 	 * Secondly, Find the watchdog timer MMIO address
+ 	 * from SBResource_MMIO register.
+ 	 */
+-	if (sp5100_tco_pci->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
+-	    sp5100_tco_pci->revision < 0x40) {
++	if (tco_has_sp5100_reg_layout(sp5100_tco_pci)) {
+ 		/* Read SBResource_MMIO from PCI config(PCI_Reg: 9Ch) */
+ 		pci_read_config_dword(sp5100_tco_pci,
+ 				      SP5100_SB_RESOURCE_MMIO_BASE, &val);
+diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
+index e2c5abbb45ff..3595cffa24ea 100644
+--- a/drivers/watchdog/watchdog_dev.c
++++ b/drivers/watchdog/watchdog_dev.c
+@@ -736,7 +736,6 @@ static int watchdog_release(struct inode *inode, struct file *file)
+ 		watchdog_ping(wdd);
+ 	}
+ 
+-	cancel_delayed_work_sync(&wd_data->work);
+ 	watchdog_update_worker(wdd);
+ 
+ 	/* make sure that /dev/watchdog can be re-opened */
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 84a6a5b3384a..208d19938fdf 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -4122,6 +4122,7 @@ void btrfs_test_inode_set_ops(struct inode *inode);
+ 
+ /* ioctl.c */
+ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
++long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+ int btrfs_ioctl_get_supported_features(void __user *arg);
+ void btrfs_update_iflags(struct inode *inode);
+ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
+diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
+index 8d7b5a45c005..af5c7fa22e0e 100644
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1596,6 +1596,13 @@ again:
+ 
+ 		copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
+ 
++		num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
++						reserve_bytes);
++		dirty_sectors = round_up(copied + sector_offset,
++					root->sectorsize);
++		dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
++						dirty_sectors);
++
+ 		/*
+ 		 * if we have trouble faulting in the pages, fall
+ 		 * back to one page at a time
+@@ -1605,6 +1612,7 @@ again:
+ 
+ 		if (copied == 0) {
+ 			force_page_uptodate = true;
++			dirty_sectors = 0;
+ 			dirty_pages = 0;
+ 		} else {
+ 			force_page_uptodate = false;
+@@ -1615,20 +1623,19 @@ again:
+ 		/*
+ 		 * If we had a short copy we need to release the excess delaloc
+ 		 * bytes we reserved.  We need to increment outstanding_extents
+-		 * because btrfs_delalloc_release_space will decrement it, but
++		 * because btrfs_delalloc_release_space and
++		 * btrfs_delalloc_release_metadata will decrement it, but
+ 		 * we still have an outstanding extent for the chunk we actually
+ 		 * managed to copy.
+ 		 */
+-		num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+-						reserve_bytes);
+-		dirty_sectors = round_up(copied + sector_offset,
+-					root->sectorsize);
+-		dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+-						dirty_sectors);
+-
+ 		if (num_sectors > dirty_sectors) {
+-			release_bytes = (write_bytes - copied)
+-				& ~((u64)root->sectorsize - 1);
++			/*
++			 * we round down because we don't want to count
++			 * any partial blocks actually sent through the
++			 * IO machines
++			 */
++			release_bytes = round_down(release_bytes - copied,
++				      root->sectorsize);
+ 			if (copied > 0) {
+ 				spin_lock(&BTRFS_I(inode)->lock);
+ 				BTRFS_I(inode)->outstanding_extents++;
+@@ -2956,7 +2963,7 @@ const struct file_operations btrfs_file_operations = {
+ 	.fallocate	= btrfs_fallocate,
+ 	.unlocked_ioctl	= btrfs_ioctl,
+ #ifdef CONFIG_COMPAT
+-	.compat_ioctl	= btrfs_ioctl,
++	.compat_ioctl	= btrfs_compat_ioctl,
+ #endif
+ 	.copy_file_range = btrfs_copy_file_range,
+ 	.clone_file_range = btrfs_clone_file_range,
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 2aaba58b4856..167fc3d49450 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -10184,7 +10184,7 @@ static const struct file_operations btrfs_dir_file_operations = {
+ 	.iterate	= btrfs_real_readdir,
+ 	.unlocked_ioctl	= btrfs_ioctl,
+ #ifdef CONFIG_COMPAT
+-	.compat_ioctl	= btrfs_ioctl,
++	.compat_ioctl	= btrfs_compat_ioctl,
+ #endif
+ 	.release        = btrfs_release_file,
+ 	.fsync		= btrfs_sync_file,
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 5a23806ae418..f545f81f642d 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -5552,3 +5552,24 @@ long btrfs_ioctl(struct file *file, unsigned int
+ 
+ 	return -ENOTTY;
+ }
++
++#ifdef CONFIG_COMPAT
++long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	switch (cmd) {
++	case FS_IOC32_GETFLAGS:
++		cmd = FS_IOC_GETFLAGS;
++		break;
++	case FS_IOC32_SETFLAGS:
++		cmd = FS_IOC_SETFLAGS;
++		break;
++	case FS_IOC32_GETVERSION:
++		cmd = FS_IOC_GETVERSION;
++		break;
++	default:
++		return -ENOIOCTLCMD;
++	}
++
++	return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
++}
++#endif
+diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
+index 59727e32ed0f..af0ec2d5ad0e 100644
+--- a/fs/cifs/sess.c
++++ b/fs/cifs/sess.c
+@@ -400,19 +400,27 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+ 	sec_blob->LmChallengeResponse.MaximumLength = 0;
+ 
+ 	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+-	rc = setup_ntlmv2_rsp(ses, nls_cp);
+-	if (rc) {
+-		cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+-		goto setup_ntlmv2_ret;
++	if (ses->user_name != NULL) {
++		rc = setup_ntlmv2_rsp(ses, nls_cp);
++		if (rc) {
++			cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
++			goto setup_ntlmv2_ret;
++		}
++		memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
++				ses->auth_key.len - CIFS_SESS_KEY_SIZE);
++		tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
++
++		sec_blob->NtChallengeResponse.Length =
++				cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
++		sec_blob->NtChallengeResponse.MaximumLength =
++				cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
++	} else {
++		/*
++		 * don't send an NT Response for anonymous access
++		 */
++		sec_blob->NtChallengeResponse.Length = 0;
++		sec_blob->NtChallengeResponse.MaximumLength = 0;
+ 	}
+-	memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+-			ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+-	tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+-
+-	sec_blob->NtChallengeResponse.Length =
+-			cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+-	sec_blob->NtChallengeResponse.MaximumLength =
+-			cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ 
+ 	if (ses->domainName == NULL) {
+ 		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+@@ -670,20 +678,24 @@ sess_auth_lanman(struct sess_data *sess_data)
+ 
+ 	pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
+ 
+-	/* no capabilities flags in old lanman negotiation */
+-	pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+-
+-	/* Calculate hash with password and copy into bcc_ptr.
+-	 * Encryption Key (stored as in cryptkey) gets used if the
+-	 * security mode bit in Negottiate Protocol response states
+-	 * to use challenge/response method (i.e. Password bit is 1).
+-	 */
+-	rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
+-			      ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
+-			      true : false, lnm_session_key);
+-
+-	memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
+-	bcc_ptr += CIFS_AUTH_RESP_SIZE;
++	if (ses->user_name != NULL) {
++		/* no capabilities flags in old lanman negotiation */
++		pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
++
++		/* Calculate hash with password and copy into bcc_ptr.
++		 * Encryption Key (stored as in cryptkey) gets used if the
++		 * security mode bit in Negottiate Protocol response states
++		 * to use challenge/response method (i.e. Password bit is 1).
++		 */
++		rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
++				      ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
++				      true : false, lnm_session_key);
++
++		memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
++		bcc_ptr += CIFS_AUTH_RESP_SIZE;
++	} else {
++		pSMB->old_req.PasswordLength = 0;
++	}
+ 
+ 	/*
+ 	 * can not sign if LANMAN negotiated so no need
+@@ -769,26 +781,31 @@ sess_auth_ntlm(struct sess_data *sess_data)
+ 	capabilities = cifs_ssetup_hdr(ses, pSMB);
+ 
+ 	pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
+-	pSMB->req_no_secext.CaseInsensitivePasswordLength =
+-			cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+-	pSMB->req_no_secext.CaseSensitivePasswordLength =
+-			cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+-
+-	/* calculate ntlm response and session key */
+-	rc = setup_ntlm_response(ses, sess_data->nls_cp);
+-	if (rc) {
+-		cifs_dbg(VFS, "Error %d during NTLM authentication\n",
+-				 rc);
+-		goto out;
+-	}
++	if (ses->user_name != NULL) {
++		pSMB->req_no_secext.CaseInsensitivePasswordLength =
++				cpu_to_le16(CIFS_AUTH_RESP_SIZE);
++		pSMB->req_no_secext.CaseSensitivePasswordLength =
++				cpu_to_le16(CIFS_AUTH_RESP_SIZE);
++
++		/* calculate ntlm response and session key */
++		rc = setup_ntlm_response(ses, sess_data->nls_cp);
++		if (rc) {
++			cifs_dbg(VFS, "Error %d during NTLM authentication\n",
++					 rc);
++			goto out;
++		}
+ 
+-	/* copy ntlm response */
+-	memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+-			CIFS_AUTH_RESP_SIZE);
+-	bcc_ptr += CIFS_AUTH_RESP_SIZE;
+-	memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+-			CIFS_AUTH_RESP_SIZE);
+-	bcc_ptr += CIFS_AUTH_RESP_SIZE;
++		/* copy ntlm response */
++		memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
++				CIFS_AUTH_RESP_SIZE);
++		bcc_ptr += CIFS_AUTH_RESP_SIZE;
++		memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
++				CIFS_AUTH_RESP_SIZE);
++		bcc_ptr += CIFS_AUTH_RESP_SIZE;
++	} else {
++		pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
++		pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
++	}
+ 
+ 	if (ses->capabilities & CAP_UNICODE) {
+ 		/* unicode strings must be word aligned */
+@@ -878,22 +895,26 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
+ 	/* LM2 password would be here if we supported it */
+ 	pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
+ 
+-	/* calculate nlmv2 response and session key */
+-	rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
+-	if (rc) {
+-		cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
+-		goto out;
+-	}
++	if (ses->user_name != NULL) {
++		/* calculate nlmv2 response and session key */
++		rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
++		if (rc) {
++			cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
++			goto out;
++		}
+ 
+-	memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+-			ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+-	bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
++		memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
++				ses->auth_key.len - CIFS_SESS_KEY_SIZE);
++		bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+ 
+-	/* set case sensitive password length after tilen may get
+-	 * assigned, tilen is 0 otherwise.
+-	 */
+-	pSMB->req_no_secext.CaseSensitivePasswordLength =
+-		cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
++		/* set case sensitive password length after tilen may get
++		 * assigned, tilen is 0 otherwise.
++		 */
++		pSMB->req_no_secext.CaseSensitivePasswordLength =
++			cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
++	} else {
++		pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
++	}
+ 
+ 	if (ses->capabilities & CAP_UNICODE) {
+ 		if (sess_data->iov[0].iov_len % 2) {
+diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
+index bc0bb9c34f72..0ffa18094335 100644
+--- a/fs/cifs/smb2glob.h
++++ b/fs/cifs/smb2glob.h
+@@ -44,6 +44,7 @@
+ #define SMB2_OP_DELETE 7
+ #define SMB2_OP_HARDLINK 8
+ #define SMB2_OP_SET_EOF 9
++#define SMB2_OP_RMDIR 10
+ 
+ /* Used when constructing chained read requests. */
+ #define CHAINED_REQUEST 1
+diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
+index 899bbc86f73e..4f0231e685a9 100644
+--- a/fs/cifs/smb2inode.c
++++ b/fs/cifs/smb2inode.c
+@@ -80,6 +80,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
+ 		 * SMB2_open() call.
+ 		 */
+ 		break;
++	case SMB2_OP_RMDIR:
++		tmprc = SMB2_rmdir(xid, tcon, fid.persistent_fid,
++				   fid.volatile_fid);
++		break;
+ 	case SMB2_OP_RENAME:
+ 		tmprc = SMB2_rename(xid, tcon, fid.persistent_fid,
+ 				    fid.volatile_fid, (__le16 *)data);
+@@ -191,8 +195,8 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+ 	   struct cifs_sb_info *cifs_sb)
+ {
+ 	return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+-				  CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE,
+-				  NULL, SMB2_OP_DELETE);
++				  CREATE_NOT_FILE,
++				  NULL, SMB2_OP_RMDIR);
+ }
+ 
+ int
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index 42e1f440eb1e..8f38e33d365b 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -2575,6 +2575,22 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
+ }
+ 
+ int
++SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
++		  u64 persistent_fid, u64 volatile_fid)
++{
++	__u8 delete_pending = 1;
++	void *data;
++	unsigned int size;
++
++	data = &delete_pending;
++	size = 1; /* sizeof __u8 */
++
++	return send_set_info(xid, tcon, persistent_fid, volatile_fid,
++			current->tgid, FILE_DISPOSITION_INFORMATION, 1, &data,
++			&size);
++}
++
++int
+ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
+ 		  u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
+ {
+diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
+index 4f07dc93608d..eb2cde2f64ba 100644
+--- a/fs/cifs/smb2proto.h
++++ b/fs/cifs/smb2proto.h
+@@ -141,6 +141,8 @@ extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
+ extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
+ 		       u64 persistent_fid, u64 volatile_fid,
+ 		       __le16 *target_file);
++extern int SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
++		      u64 persistent_fid, u64 volatile_fid);
+ extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
+ 			     u64 persistent_fid, u64 volatile_fid,
+ 			     __le16 *target_file);
+diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
+index 06f5aa478bf2..1ac263eddc4e 100644
+--- a/fs/crypto/keyinfo.c
++++ b/fs/crypto/keyinfo.c
+@@ -78,6 +78,67 @@ out:
+ 	return res;
+ }
+ 
++static int validate_user_key(struct fscrypt_info *crypt_info,
++			struct fscrypt_context *ctx, u8 *raw_key,
++			u8 *prefix, int prefix_size)
++{
++	u8 *full_key_descriptor;
++	struct key *keyring_key;
++	struct fscrypt_key *master_key;
++	const struct user_key_payload *ukp;
++	int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1;
++	int res;
++
++	full_key_descriptor = kmalloc(full_key_len, GFP_NOFS);
++	if (!full_key_descriptor)
++		return -ENOMEM;
++
++	memcpy(full_key_descriptor, prefix, prefix_size);
++	sprintf(full_key_descriptor + prefix_size,
++			"%*phN", FS_KEY_DESCRIPTOR_SIZE,
++			ctx->master_key_descriptor);
++	full_key_descriptor[full_key_len - 1] = '\0';
++	keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
++	kfree(full_key_descriptor);
++	if (IS_ERR(keyring_key))
++		return PTR_ERR(keyring_key);
++
++	if (keyring_key->type != &key_type_logon) {
++		printk_once(KERN_WARNING
++				"%s: key type must be logon\n", __func__);
++		res = -ENOKEY;
++		goto out;
++	}
++	down_read(&keyring_key->sem);
++	ukp = user_key_payload(keyring_key);
++	if (ukp->datalen != sizeof(struct fscrypt_key)) {
++		res = -EINVAL;
++		up_read(&keyring_key->sem);
++		goto out;
++	}
++	master_key = (struct fscrypt_key *)ukp->data;
++	BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
++
++	if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
++		printk_once(KERN_WARNING
++				"%s: key size incorrect: %d\n",
++				__func__, master_key->size);
++		res = -ENOKEY;
++		up_read(&keyring_key->sem);
++		goto out;
++	}
++	res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
++	up_read(&keyring_key->sem);
++	if (res)
++		goto out;
++
++	crypt_info->ci_keyring_key = keyring_key;
++	return 0;
++out:
++	key_put(keyring_key);
++	return res;
++}
++
+ static void put_crypt_info(struct fscrypt_info *ci)
+ {
+ 	if (!ci)
+@@ -91,12 +152,7 @@ static void put_crypt_info(struct fscrypt_info *ci)
+ int get_crypt_info(struct inode *inode)
+ {
+ 	struct fscrypt_info *crypt_info;
+-	u8 full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE +
+-				(FS_KEY_DESCRIPTOR_SIZE * 2) + 1];
+-	struct key *keyring_key = NULL;
+-	struct fscrypt_key *master_key;
+ 	struct fscrypt_context ctx;
+-	const struct user_key_payload *ukp;
+ 	struct crypto_skcipher *ctfm;
+ 	const char *cipher_str;
+ 	u8 raw_key[FS_MAX_KEY_SIZE];
+@@ -167,48 +223,24 @@ retry:
+ 		memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
+ 		goto got_key;
+ 	}
+-	memcpy(full_key_descriptor, FS_KEY_DESC_PREFIX,
+-					FS_KEY_DESC_PREFIX_SIZE);
+-	sprintf(full_key_descriptor + FS_KEY_DESC_PREFIX_SIZE,
+-					"%*phN", FS_KEY_DESCRIPTOR_SIZE,
+-					ctx.master_key_descriptor);
+-	full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE +
+-					(2 * FS_KEY_DESCRIPTOR_SIZE)] = '\0';
+-	keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
+-	if (IS_ERR(keyring_key)) {
+-		res = PTR_ERR(keyring_key);
+-		keyring_key = NULL;
+-		goto out;
+-	}
+-	crypt_info->ci_keyring_key = keyring_key;
+-	if (keyring_key->type != &key_type_logon) {
+-		printk_once(KERN_WARNING
+-				"%s: key type must be logon\n", __func__);
+-		res = -ENOKEY;
+-		goto out;
+-	}
+-	down_read(&keyring_key->sem);
+-	ukp = user_key_payload(keyring_key);
+-	if (ukp->datalen != sizeof(struct fscrypt_key)) {
+-		res = -EINVAL;
+-		up_read(&keyring_key->sem);
+-		goto out;
+-	}
+-	master_key = (struct fscrypt_key *)ukp->data;
+-	BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
+ 
+-	if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
+-		printk_once(KERN_WARNING
+-				"%s: key size incorrect: %d\n",
+-				__func__, master_key->size);
+-		res = -ENOKEY;
+-		up_read(&keyring_key->sem);
++	res = validate_user_key(crypt_info, &ctx, raw_key,
++			FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE);
++	if (res && inode->i_sb->s_cop->key_prefix) {
++		u8 *prefix = NULL;
++		int prefix_size, res2;
++
++		prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix);
++		res2 = validate_user_key(crypt_info, &ctx, raw_key,
++							prefix, prefix_size);
++		if (res2) {
++			if (res2 == -ENOKEY)
++				res = -ENOKEY;
++			goto out;
++		}
++	} else if (res) {
+ 		goto out;
+ 	}
+-	res = derive_key_aes(ctx.nonce, master_key->raw, raw_key);
+-	up_read(&keyring_key->sem);
+-	if (res)
+-		goto out;
+ got_key:
+ 	ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
+ 	if (!ctfm || IS_ERR(ctfm)) {
+diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
+index 7a4558d17f36..2effb79b5f3e 100644
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -680,6 +680,10 @@ enum {
+ 	MAX_TIME,
+ };
+ 
++#ifdef CONFIG_F2FS_FS_ENCRYPTION
++#define F2FS_KEY_DESC_PREFIX "f2fs:"
++#define F2FS_KEY_DESC_PREFIX_SIZE 5
++#endif
+ struct f2fs_sb_info {
+ 	struct super_block *sb;			/* pointer to VFS super block */
+ 	struct proc_dir_entry *s_proc;		/* proc entry */
+@@ -687,6 +691,10 @@ struct f2fs_sb_info {
+ 	int valid_super_block;			/* valid super block no */
+ 	int s_flag;				/* flags for sbi */
+ 
++#ifdef CONFIG_F2FS_FS_ENCRYPTION
++	u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
++	u8 key_prefix_size;
++#endif
+ 	/* for node-related operations */
+ 	struct f2fs_nm_info *nm_info;		/* node manager */
+ 	struct inode *node_inode;		/* cache node blocks */
+diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
+index 006f87d69921..e11385bbd4c6 100644
+--- a/fs/f2fs/super.c
++++ b/fs/f2fs/super.c
+@@ -893,6 +893,12 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
+ 				ctx, len, NULL);
+ }
+ 
++static int f2fs_key_prefix(struct inode *inode, u8 **key)
++{
++	*key = F2FS_I_SB(inode)->key_prefix;
++	return F2FS_I_SB(inode)->key_prefix_size;
++}
++
+ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
+ 							void *fs_data)
+ {
+@@ -909,6 +915,7 @@ static unsigned f2fs_max_namelen(struct inode *inode)
+ 
+ static struct fscrypt_operations f2fs_cryptops = {
+ 	.get_context	= f2fs_get_context,
++	.key_prefix	= f2fs_key_prefix,
+ 	.set_context	= f2fs_set_context,
+ 	.is_encrypted	= f2fs_encrypted_inode,
+ 	.empty_dir	= f2fs_empty_dir,
+@@ -1231,6 +1238,12 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
+ 
+ 	INIT_LIST_HEAD(&sbi->s_list);
+ 	mutex_init(&sbi->umount_mutex);
++
++#ifdef CONFIG_F2FS_FS_ENCRYPTION
++	memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX,
++				F2FS_KEY_DESC_PREFIX_SIZE);
++	sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE;
++#endif
+ }
+ 
+ /*
+diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
+index 458cf463047b..82067ca22f2b 100644
+--- a/fs/hpfs/super.c
++++ b/fs/hpfs/super.c
+@@ -15,6 +15,7 @@
+ #include <linux/sched.h>
+ #include <linux/bitmap.h>
+ #include <linux/slab.h>
++#include <linux/seq_file.h>
+ 
+ /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
+ 
+@@ -453,10 +454,6 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
+ 	int lowercase, eas, chk, errs, chkdsk, timeshift;
+ 	int o;
+ 	struct hpfs_sb_info *sbi = hpfs_sb(s);
+-	char *new_opts = kstrdup(data, GFP_KERNEL);
+-
+-	if (!new_opts)
+-		return -ENOMEM;
+ 
+ 	sync_filesystem(s);
+ 
+@@ -493,17 +490,44 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
+ 
+ 	if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
+ 
+-	replace_mount_options(s, new_opts);
+-
+ 	hpfs_unlock(s);
+ 	return 0;
+ 
+ out_err:
+ 	hpfs_unlock(s);
+-	kfree(new_opts);
+ 	return -EINVAL;
+ }
+ 
++static int hpfs_show_options(struct seq_file *seq, struct dentry *root)
++{
++	struct hpfs_sb_info *sbi = hpfs_sb(root->d_sb);
++
++	seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, sbi->sb_uid));
++	seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, sbi->sb_gid));
++	seq_printf(seq, ",umask=%03o", (~sbi->sb_mode & 0777));
++	if (sbi->sb_lowercase)
++		seq_printf(seq, ",case=lower");
++	if (!sbi->sb_chk)
++		seq_printf(seq, ",check=none");
++	if (sbi->sb_chk == 2)
++		seq_printf(seq, ",check=strict");
++	if (!sbi->sb_err)
++		seq_printf(seq, ",errors=continue");
++	if (sbi->sb_err == 2)
++		seq_printf(seq, ",errors=panic");
++	if (!sbi->sb_chkdsk)
++		seq_printf(seq, ",chkdsk=no");
++	if (sbi->sb_chkdsk == 2)
++		seq_printf(seq, ",chkdsk=always");
++	if (!sbi->sb_eas)
++		seq_printf(seq, ",eas=no");
++	if (sbi->sb_eas == 1)
++		seq_printf(seq, ",eas=ro");
++	if (sbi->sb_timeshift)
++		seq_printf(seq, ",timeshift=%d", sbi->sb_timeshift);
++	return 0;
++}
++
+ /* Super operations */
+ 
+ static const struct super_operations hpfs_sops =
+@@ -514,7 +538,7 @@ static const struct super_operations hpfs_sops =
+ 	.put_super	= hpfs_put_super,
+ 	.statfs		= hpfs_statfs,
+ 	.remount_fs	= hpfs_remount_fs,
+-	.show_options	= generic_show_options,
++	.show_options	= hpfs_show_options,
+ };
+ 
+ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
+@@ -537,8 +561,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
+ 
+ 	int o;
+ 
+-	save_mount_options(s, options);
+-
+ 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ 	if (!sbi) {
+ 		return -ENOMEM;
+diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
+index 618ced381a14..7c9fbf504f07 100644
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -500,8 +500,10 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
+ 	cps->slot = slot;
+ 
+ 	/* The ca_maxresponsesize_cached is 0 with no DRC */
+-	if (args->csa_cachethis != 0)
+-		return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
++	if (args->csa_cachethis != 0) {
++		status = htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
++		goto out_unlock;
++	}
+ 
+ 	/*
+ 	 * Check for pending referring calls.  If a match is found, a
+diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
+index 35a52a880b2f..6bd05700d8c9 100644
+--- a/include/asm-generic/qspinlock.h
++++ b/include/asm-generic/qspinlock.h
+@@ -28,7 +28,30 @@
+  */
+ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+ {
+-	return atomic_read(&lock->val);
++	/*
++	 * queued_spin_lock_slowpath() can ACQUIRE the lock before
++	 * issuing the unordered store that sets _Q_LOCKED_VAL.
++	 *
++	 * See both smp_cond_acquire() sites for more detail.
++	 *
++	 * This however means that in code like:
++	 *
++	 *   spin_lock(A)		spin_lock(B)
++	 *   spin_unlock_wait(B)	spin_is_locked(A)
++	 *   do_something()		do_something()
++	 *
++	 * Both CPUs can end up running do_something() because the store
++	 * setting _Q_LOCKED_VAL will pass through the loads in
++	 * spin_unlock_wait() and/or spin_is_locked().
++	 *
++	 * Avoid this by issuing a full memory barrier between the spin_lock()
++	 * and the loads in spin_unlock_wait() and spin_is_locked().
++	 *
++	 * Note that regular mutual exclusion doesn't care about this
++	 * delayed store.
++	 */
++	smp_mb();
++	return atomic_read(&lock->val) & _Q_LOCKED_MASK;
+ }
+ 
+ /**
+@@ -108,6 +131,8 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
+  */
+ static inline void queued_spin_unlock_wait(struct qspinlock *lock)
+ {
++	/* See queued_spin_is_locked() */
++	smp_mb();
+ 	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+ 		cpu_relax();
+ }
+diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
+index 3d1a3af5cf59..a2508a8f9a9c 100644
+--- a/include/asm-generic/siginfo.h
++++ b/include/asm-generic/siginfo.h
+@@ -17,21 +17,6 @@
+ struct siginfo;
+ void do_schedule_next_timer(struct siginfo *info);
+ 
+-#ifndef HAVE_ARCH_COPY_SIGINFO
+-
+-#include <linux/string.h>
+-
+-static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
+-{
+-	if (from->si_code < 0)
+-		memcpy(to, from, sizeof(*to));
+-	else
+-		/* _sigchld is currently the largest know union member */
+-		memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
+-}
+-
+-#endif
+-
+ extern int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
+ 
+ #endif
+diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
+index 735f9f8c4e43..5261751f6bd4 100644
+--- a/include/linux/can/dev.h
++++ b/include/linux/can/dev.h
+@@ -40,8 +40,11 @@ struct can_priv {
+ 	struct can_clock clock;
+ 
+ 	enum can_state state;
+-	u32 ctrlmode;
+-	u32 ctrlmode_supported;
++
++	/* CAN controller features - see include/uapi/linux/can/netlink.h */
++	u32 ctrlmode;		/* current options setting */
++	u32 ctrlmode_supported;	/* options that can be modified by netlink */
++	u32 ctrlmode_static;	/* static enabled options for driver/hardware */
+ 
+ 	int restart_ms;
+ 	struct timer_list restart_timer;
+@@ -108,6 +111,21 @@ static inline bool can_is_canfd_skb(const struct sk_buff *skb)
+ 	return skb->len == CANFD_MTU;
+ }
+ 
++/* helper to define static CAN controller features at device creation time */
++static inline void can_set_static_ctrlmode(struct net_device *dev,
++					   u32 static_mode)
++{
++	struct can_priv *priv = netdev_priv(dev);
++
++	/* alloc_candev() succeeded => netdev_priv() is valid at this point */
++	priv->ctrlmode = static_mode;
++	priv->ctrlmode_static = static_mode;
++
++	/* override MTU which was set by default in can_setup()? */
++	if (static_mode & CAN_CTRLMODE_FD)
++		dev->mtu = CANFD_MTU;
++}
++
+ /* get data length from can_dlc with sanitized can_dlc */
+ u8 can_dlc2len(u8 can_dlc);
+ 
+diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
+index 6027f6bbb061..cfa6cde25f8e 100644
+--- a/include/linux/fscrypto.h
++++ b/include/linux/fscrypto.h
+@@ -175,6 +175,7 @@ struct fscrypt_name {
+  */
+ struct fscrypt_operations {
+ 	int (*get_context)(struct inode *, void *, size_t);
++	int (*key_prefix)(struct inode *, u8 **);
+ 	int (*prepare_context)(struct inode *);
+ 	int (*set_context)(struct inode *, const void *, size_t, void *);
+ 	int (*dummy_context)(struct inode *);
+diff --git a/include/linux/signal.h b/include/linux/signal.h
+index 92557bbce7e7..d80259afb9e5 100644
+--- a/include/linux/signal.h
++++ b/include/linux/signal.h
+@@ -28,6 +28,21 @@ struct sigpending {
+ 	sigset_t signal;
+ };
+ 
++#ifndef HAVE_ARCH_COPY_SIGINFO
++
++#include <linux/string.h>
++
++static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
++{
++	if (from->si_code < 0)
++		memcpy(to, from, sizeof(*to));
++	else
++		/* _sigchld is currently the largest know union member */
++		memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
++}
++
++#endif
++
+ /*
+  * Define some primitives to manipulate sigset_t.
+  */
+diff --git a/include/linux/tty.h b/include/linux/tty.h
+index 3b09f235db66..a34442031aae 100644
+--- a/include/linux/tty.h
++++ b/include/linux/tty.h
+@@ -338,7 +338,6 @@ struct tty_file_private {
+ #define TTY_OTHER_CLOSED 	2	/* Other side (if any) has closed */
+ #define TTY_EXCLUSIVE 		3	/* Exclusive open mode */
+ #define TTY_DO_WRITE_WAKEUP 	5	/* Call write_wakeup after queuing new */
+-#define TTY_OTHER_DONE		6	/* Closed pty has completed input processing */
+ #define TTY_LDISC_OPEN	 	11	/* Line discipline is open */
+ #define TTY_PTY_LOCK 		16	/* pty private */
+ #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
+@@ -457,6 +456,7 @@ extern void tty_buffer_init(struct tty_port *port);
+ extern void tty_buffer_set_lock_subclass(struct tty_port *port);
+ extern bool tty_buffer_restart_work(struct tty_port *port);
+ extern bool tty_buffer_cancel_work(struct tty_port *port);
++extern void tty_buffer_flush_work(struct tty_port *port);
+ extern speed_t tty_termios_baud_rate(struct ktermios *termios);
+ extern speed_t tty_termios_input_baud_rate(struct ktermios *termios);
+ extern void tty_termios_encode_baud_rate(struct ktermios *termios,
+diff --git a/include/linux/usb.h b/include/linux/usb.h
+index 6a9a0c28415d..818bf7087996 100644
+--- a/include/linux/usb.h
++++ b/include/linux/usb.h
+@@ -374,13 +374,12 @@ struct usb_bus {
+ 
+ 	int devnum_next;		/* Next open device number in
+ 					 * round-robin allocation */
++	struct mutex devnum_next_mutex; /* devnum_next mutex */
+ 
+ 	struct usb_devmap devmap;	/* device address allocation map */
+ 	struct usb_device *root_hub;	/* Root hub */
+ 	struct usb_bus *hs_companion;	/* Companion EHCI bus, if any */
+ 
+-	struct mutex usb_address0_mutex; /* unaddressed device mutex */
+-
+ 	int bandwidth_allocated;	/* on this bus: how much of the time
+ 					 * reserved for periodic (intr/iso)
+ 					 * requests is used, on average?
+@@ -1069,7 +1068,7 @@ struct usbdrv_wrap {
+  *	for interfaces bound to this driver.
+  * @soft_unbind: if set to 1, the USB core will not kill URBs and disable
+  *	endpoints before calling the driver's disconnect method.
+- * @disable_hub_initiated_lpm: if set to 0, the USB core will not allow hubs
++ * @disable_hub_initiated_lpm: if set to 1, the USB core will not allow hubs
+  *	to initiate lower power link state transitions when an idle timeout
+  *	occurs.  Device-initiated USB 3.0 link PM will still be allowed.
+  *
+diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
+index b98f831dcda3..66fc13705ab7 100644
+--- a/include/linux/usb/hcd.h
++++ b/include/linux/usb/hcd.h
+@@ -181,6 +181,7 @@ struct usb_hcd {
+ 	 * bandwidth_mutex should be dropped after a successful control message
+ 	 * to the device, or resetting the bandwidth after a failed attempt.
+ 	 */
++	struct mutex		*address0_mutex;
+ 	struct mutex		*bandwidth_mutex;
+ 	struct usb_hcd		*shared_hcd;
+ 	struct usb_hcd		*primary_hcd;
+diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
+index 74d79bde7075..a505079dcf88 100644
+--- a/include/scsi/scsi_device.h
++++ b/include/scsi/scsi_device.h
+@@ -242,6 +242,7 @@ scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...);
+ enum scsi_target_state {
+ 	STARGET_CREATED = 1,
+ 	STARGET_RUNNING,
++	STARGET_REMOVE,
+ 	STARGET_DEL,
+ };
+ 
+diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
+index ef7159012cf3..b0b93fd33af9 100644
+--- a/kernel/sched/loadavg.c
++++ b/kernel/sched/loadavg.c
+@@ -99,10 +99,13 @@ long calc_load_fold_active(struct rq *this_rq)
+ static unsigned long
+ calc_load(unsigned long load, unsigned long exp, unsigned long active)
+ {
+-	load *= exp;
+-	load += active * (FIXED_1 - exp);
+-	load += 1UL << (FSHIFT - 1);
+-	return load >> FSHIFT;
++	unsigned long newload;
++
++	newload = load * exp + active * (FIXED_1 - exp);
++	if (active >= load)
++		newload += FIXED_1-1;
++
++	return newload / FIXED_1;
+ }
+ 
+ #ifdef CONFIG_NO_HZ_COMMON
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index 95181e36891a..9c143739b8d7 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -437,7 +437,7 @@ struct ring_buffer_per_cpu {
+ 	raw_spinlock_t			reader_lock;	/* serialize readers */
+ 	arch_spinlock_t			lock;
+ 	struct lock_class_key		lock_key;
+-	unsigned int			nr_pages;
++	unsigned long			nr_pages;
+ 	unsigned int			current_context;
+ 	struct list_head		*pages;
+ 	struct buffer_page		*head_page;	/* read from head */
+@@ -458,7 +458,7 @@ struct ring_buffer_per_cpu {
+ 	u64				write_stamp;
+ 	u64				read_stamp;
+ 	/* ring buffer pages to update, > 0 to add, < 0 to remove */
+-	int				nr_pages_to_update;
++	long				nr_pages_to_update;
+ 	struct list_head		new_pages; /* new pages to add */
+ 	struct work_struct		update_pages_work;
+ 	struct completion		update_done;
+@@ -1128,10 +1128,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+ 	return 0;
+ }
+ 
+-static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
++static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
+ {
+-	int i;
+ 	struct buffer_page *bpage, *tmp;
++	long i;
+ 
+ 	for (i = 0; i < nr_pages; i++) {
+ 		struct page *page;
+@@ -1168,7 +1168,7 @@ free_pages:
+ }
+ 
+ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+-			     unsigned nr_pages)
++			     unsigned long nr_pages)
+ {
+ 	LIST_HEAD(pages);
+ 
+@@ -1193,7 +1193,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ }
+ 
+ static struct ring_buffer_per_cpu *
+-rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
++rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer;
+ 	struct buffer_page *bpage;
+@@ -1293,8 +1293,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
+ 					struct lock_class_key *key)
+ {
+ 	struct ring_buffer *buffer;
++	long nr_pages;
+ 	int bsize;
+-	int cpu, nr_pages;
++	int cpu;
+ 
+ 	/* keep it in its own cache line */
+ 	buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
+@@ -1420,12 +1421,12 @@ static inline unsigned long rb_page_write(struct buffer_page *bpage)
+ }
+ 
+ static int
+-rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
++rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
+ {
+ 	struct list_head *tail_page, *to_remove, *next_page;
+ 	struct buffer_page *to_remove_page, *tmp_iter_page;
+ 	struct buffer_page *last_page, *first_page;
+-	unsigned int nr_removed;
++	unsigned long nr_removed;
+ 	unsigned long head_bit;
+ 	int page_entries;
+ 
+@@ -1642,7 +1643,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
+ 			int cpu_id)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer;
+-	unsigned nr_pages;
++	unsigned long nr_pages;
+ 	int cpu, err = 0;
+ 
+ 	/*
+@@ -1656,14 +1657,13 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
+ 	    !cpumask_test_cpu(cpu_id, buffer->cpumask))
+ 		return size;
+ 
+-	size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+-	size *= BUF_PAGE_SIZE;
++	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ 
+ 	/* we need a minimum of two pages */
+-	if (size < BUF_PAGE_SIZE * 2)
+-		size = BUF_PAGE_SIZE * 2;
++	if (nr_pages < 2)
++		nr_pages = 2;
+ 
+-	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
++	size = nr_pages * BUF_PAGE_SIZE;
+ 
+ 	/*
+ 	 * Don't succeed if resizing is disabled, as a reader might be
+@@ -4640,8 +4640,9 @@ static int rb_cpu_notify(struct notifier_block *self,
+ 	struct ring_buffer *buffer =
+ 		container_of(self, struct ring_buffer, cpu_notify);
+ 	long cpu = (long)hcpu;
+-	int cpu_i, nr_pages_same;
+-	unsigned int nr_pages;
++	long nr_pages_same;
++	int cpu_i;
++	unsigned long nr_pages;
+ 
+ 	switch (action) {
+ 	case CPU_UP_PREPARE:
+diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
+index f9e47a70509c..53449a6ff6aa 100644
+--- a/scripts/Makefile.extrawarn
++++ b/scripts/Makefile.extrawarn
+@@ -24,6 +24,7 @@ warning-1 += $(call cc-option, -Wmissing-prototypes)
+ warning-1 += -Wold-style-definition
+ warning-1 += $(call cc-option, -Wmissing-include-dirs)
+ warning-1 += $(call cc-option, -Wunused-but-set-variable)
++warning-1 += $(call cc-option, -Wunused-const-variable)
+ warning-1 += $(call cc-disable-warning, missing-field-initializers)
+ warning-1 += $(call cc-disable-warning, sign-compare)
+ 
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 4918ffa5ba68..d53c25e7a1c1 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -335,6 +335,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
+ 	case 0x10ec0283:
+ 	case 0x10ec0286:
+ 	case 0x10ec0288:
++	case 0x10ec0295:
+ 	case 0x10ec0298:
+ 		alc_update_coef_idx(codec, 0x10, 1<<9, 0);
+ 		break;
+@@ -342,6 +343,11 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
+ 	case 0x10ec0293:
+ 		alc_update_coef_idx(codec, 0xa, 1<<13, 0);
+ 		break;
++	case 0x10ec0234:
++	case 0x10ec0274:
++	case 0x10ec0294:
++		alc_update_coef_idx(codec, 0x10, 1<<15, 0);
++		break;
+ 	case 0x10ec0662:
+ 		if ((coef & 0x00f0) == 0x0030)
+ 			alc_update_coef_idx(codec, 0x4, 1<<10, 0); /* EAPD Ctrl */
+@@ -902,6 +908,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = {
+ 	{ 0x10ec0298, 0x1028, 0, "ALC3266" },
+ 	{ 0x10ec0256, 0x1028, 0, "ALC3246" },
+ 	{ 0x10ec0225, 0x1028, 0, "ALC3253" },
++	{ 0x10ec0295, 0x1028, 0, "ALC3254" },
+ 	{ 0x10ec0670, 0x1025, 0, "ALC669X" },
+ 	{ 0x10ec0676, 0x1025, 0, "ALC679X" },
+ 	{ 0x10ec0282, 0x1043, 0, "ALC3229" },
+@@ -2647,6 +2654,7 @@ enum {
+ 	ALC269_TYPE_ALC255,
+ 	ALC269_TYPE_ALC256,
+ 	ALC269_TYPE_ALC225,
++	ALC269_TYPE_ALC294,
+ };
+ 
+ /*
+@@ -2677,6 +2685,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
+ 	case ALC269_TYPE_ALC255:
+ 	case ALC269_TYPE_ALC256:
+ 	case ALC269_TYPE_ALC225:
++	case ALC269_TYPE_ALC294:
+ 		ssids = alc269_ssids;
+ 		break;
+ 	default:
+@@ -3690,6 +3699,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
+ 		alc_process_coef_fw(codec, coef0668);
+ 		break;
+ 	case 0x10ec0225:
++	case 0x10ec0295:
+ 		alc_process_coef_fw(codec, coef0225);
+ 		break;
+ 	}
+@@ -3790,6 +3800,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
+ 		snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
+ 		break;
+ 	case 0x10ec0225:
++	case 0x10ec0295:
+ 		alc_update_coef_idx(codec, 0x45, 0x3f<<10, 0x31<<10);
+ 		snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
+ 		alc_process_coef_fw(codec, coef0225);
+@@ -3847,6 +3858,7 @@ static void alc_headset_mode_default(struct hda_codec *codec)
+ 
+ 	switch (codec->core.vendor_id) {
+ 	case 0x10ec0225:
++	case 0x10ec0295:
+ 		alc_process_coef_fw(codec, coef0225);
+ 		break;
+ 	case 0x10ec0255:
+@@ -3950,6 +3962,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
+ 		alc_process_coef_fw(codec, coef0688);
+ 		break;
+ 	case 0x10ec0225:
++	case 0x10ec0295:
+ 		alc_process_coef_fw(codec, coef0225);
+ 		break;
+ 	}
+@@ -4031,6 +4044,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
+ 		alc_process_coef_fw(codec, coef0688);
+ 		break;
+ 	case 0x10ec0225:
++	case 0x10ec0295:
+ 		alc_process_coef_fw(codec, coef0225);
+ 		break;
+ 	}
+@@ -4114,6 +4128,7 @@ static void alc_determine_headset_type(struct hda_codec *codec)
+ 		is_ctia = (val & 0x1c02) == 0x1c02;
+ 		break;
+ 	case 0x10ec0225:
++	case 0x10ec0295:
+ 		alc_process_coef_fw(codec, coef0225);
+ 		msleep(800);
+ 		val = alc_read_coef_idx(codec, 0x46);
+@@ -5459,8 +5474,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ 	SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ 	SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+-	SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
++	SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ 	SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
++	SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
+@@ -5704,6 +5720,9 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ 		{0x14, 0x90170110},
+ 		{0x21, 0x02211020}),
+ 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++		{0x14, 0x90170130},
++		{0x21, 0x02211040}),
++	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ 		{0x12, 0x90a60140},
+ 		{0x14, 0x90170110},
+ 		{0x21, 0x02211020}),
+@@ -6026,8 +6045,14 @@ static int patch_alc269(struct hda_codec *codec)
+ 		alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
+ 		break;
+ 	case 0x10ec0225:
++	case 0x10ec0295:
+ 		spec->codec_variant = ALC269_TYPE_ALC225;
+ 		break;
++	case 0x10ec0234:
++	case 0x10ec0274:
++	case 0x10ec0294:
++		spec->codec_variant = ALC269_TYPE_ALC294;
++		break;
+ 	}
+ 
+ 	if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
+@@ -6942,6 +6967,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
+ 	HDA_CODEC_ENTRY(0x10ec0225, "ALC225", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0231, "ALC231", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269),
++	HDA_CODEC_ENTRY(0x10ec0234, "ALC234", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0255, "ALC255", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0256, "ALC256", patch_alc269),
+@@ -6952,6 +6978,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
+ 	HDA_CODEC_ENTRY(0x10ec0269, "ALC269", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0270, "ALC270", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0272, "ALC272", patch_alc662),
++	HDA_CODEC_ENTRY(0x10ec0274, "ALC274", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0275, "ALC275", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0276, "ALC276", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0280, "ALC280", patch_alc269),
+@@ -6964,6 +6991,8 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
+ 	HDA_CODEC_ENTRY(0x10ec0290, "ALC290", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0292, "ALC292", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0293, "ALC293", patch_alc269),
++	HDA_CODEC_ENTRY(0x10ec0294, "ALC294", patch_alc269),
++	HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
+ 	HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
+ 	HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
+diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
+index 6765c7e949f3..f094f3c4ed84 100644
+--- a/tools/objtool/Makefile
++++ b/tools/objtool/Makefile
+@@ -30,6 +30,10 @@ INCLUDES := -I$(srctree)/tools/include
+ CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
+ LDFLAGS  += -lelf $(LIBSUBCMD)
+ 
++# Allow old libelf to be used:
++elfshdr := $(shell echo '\#include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
++CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
++
+ AWK = awk
+ export srctree OUTPUT CFLAGS ARCH AWK
+ include $(srctree)/tools/build/Makefile.include
+diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
+index 7f3e00a2f907..aa1ff6596684 100644
+--- a/tools/objtool/elf.h
++++ b/tools/objtool/elf.h
+@@ -23,6 +23,11 @@
+ #include <linux/list.h>
+ #include <linux/hashtable.h>
+ 
++#ifdef LIBELF_USE_DEPRECATED
++# define elf_getshdrnum    elf_getshnum
++# define elf_getshdrstrndx elf_getshstrndx
++#endif
++
+ struct section {
+ 	struct list_head list;
+ 	GElf_Shdr sh;


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-06-08 10:09 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-06-08 10:09 UTC (permalink / raw
  To: gentoo-commits

commit:     2c441aecc4fae188c2eb290024668d79d0b1a51e
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Wed Jun  8 10:09:35 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Wed Jun  8 10:09:35 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=2c441aec

Linux patch 4.6.2

 0000_README            |    4 +
 1001_linux-4.6.2.patch | 4710 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 4714 insertions(+)

diff --git a/0000_README b/0000_README
index 220d627..61071b1 100644
--- a/0000_README
+++ b/0000_README
@@ -47,6 +47,10 @@ Patch:  1000_linux-4.6.1.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.6.1
 
+Patch:  1001_linux-4.6.2.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.6.2
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1001_linux-4.6.2.patch b/1001_linux-4.6.2.patch
new file mode 100644
index 0000000..2da7598
--- /dev/null
+++ b/1001_linux-4.6.2.patch
@@ -0,0 +1,4710 @@
+diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
+index 1692c4dd5487..ab037f6fa2ee 100644
+--- a/Documentation/DocBook/gpu.tmpl
++++ b/Documentation/DocBook/gpu.tmpl
+@@ -1623,6 +1623,12 @@ void intel_crt_init(struct drm_device *dev)
+ !Edrivers/gpu/drm/drm_dp_helper.c
+     </sect2>
+     <sect2>
++      <title>Display Port Dual Mode Adaptor Helper Functions Reference</title>
++!Pdrivers/gpu/drm/drm_dp_dual_mode_helper.c dp dual mode helpers
++!Iinclude/drm/drm_dp_dual_mode_helper.h
++!Edrivers/gpu/drm/drm_dp_dual_mode_helper.c
++    </sect2>
++    <sect2>
+       <title>Display Port MST Helper Functions Reference</title>
+ !Pdrivers/gpu/drm/drm_dp_mst_topology.c dp mst helper
+ !Iinclude/drm/drm_dp_mst_helper.h
+diff --git a/Documentation/devicetree/bindings/clock/imx35-clock.txt b/Documentation/devicetree/bindings/clock/imx35-clock.txt
+index a70356452a82..f49783213c56 100644
+--- a/Documentation/devicetree/bindings/clock/imx35-clock.txt
++++ b/Documentation/devicetree/bindings/clock/imx35-clock.txt
+@@ -94,6 +94,7 @@ clocks and IDs.
+ 	csi_sel			79
+ 	iim_gate		80
+ 	gpu2d_gate		81
++	ckli_gate		82
+ 
+ Examples:
+ 
+diff --git a/Makefile b/Makefile
+index 2fcc41ea99a3..93068c2d0656 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 6
+-SUBLEVEL = 1
++SUBLEVEL = 2
+ EXTRAVERSION =
+ NAME = Charred Weasel
+ 
+@@ -364,7 +364,7 @@ AFLAGS_MODULE   =
+ LDFLAGS_MODULE  =
+ CFLAGS_KERNEL	=
+ AFLAGS_KERNEL	=
+-CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage
++CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+ CFLAGS_KCOV	= -fsanitize-coverage=trace-pc
+ 
+ 
+diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
+index 85d2c377c332..8450944b28e6 100644
+--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
++++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
+@@ -245,7 +245,7 @@
+ 		button@2 {
+ 			label = "Factory Reset Button";
+ 			linux,code = <KEY_RESTART>;
+-			gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
++			gpios = <&gpio0 29 GPIO_ACTIVE_LOW>;
+ 		};
+ 	};
+ 
+@@ -260,7 +260,7 @@
+ 		};
+ 
+ 		sata {
+-			gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>;
++			gpios = <&gpio1 22 GPIO_ACTIVE_LOW>;
+ 			default-state = "off";
+ 		};
+ 	};
+@@ -313,7 +313,7 @@
+ 
+ &pinctrl {
+ 	keys_pin: keys-pin {
+-		marvell,pins = "mpp24", "mpp47";
++		marvell,pins = "mpp24", "mpp29";
+ 		marvell,function = "gpio";
+ 	};
+ 
+diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+index b89e6cf1271a..7a461541ce50 100644
+--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
++++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+@@ -304,13 +304,13 @@
+ 		button@1 {
+ 			label = "WPS";
+ 			linux,code = <KEY_WPS_BUTTON>;
+-			gpios = <&gpio1 0 GPIO_ACTIVE_HIGH>;
++			gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
+ 		};
+ 
+ 		button@2 {
+ 			label = "Factory Reset Button";
+ 			linux,code = <KEY_RESTART>;
+-			gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>;
++			gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+ 		};
+ 	};
+ 
+diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts
+index 1df2f0bc1d76..a9fae910bb15 100644
+--- a/arch/arm/boot/dts/exynos4210-trats.dts
++++ b/arch/arm/boot/dts/exynos4210-trats.dts
+@@ -298,6 +298,8 @@
+ 		compatible = "maxim,max8997-pmic";
+ 
+ 		reg = <0x66>;
++		interrupt-parent = <&gpx0>;
++		interrupts = <7 0>;
+ 
+ 		max8997,pmic-buck1-uses-gpio-dvs;
+ 		max8997,pmic-buck2-uses-gpio-dvs;
+diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h
+index b0c912feaa2f..8a394f336003 100644
+--- a/arch/arm/boot/dts/sama5d2-pinfunc.h
++++ b/arch/arm/boot/dts/sama5d2-pinfunc.h
+@@ -837,8 +837,8 @@
+ #define PIN_PD23__ISC_FIELD		PINMUX_PIN(PIN_PD23, 6, 4)
+ #define PIN_PD24			120
+ #define PIN_PD24__GPIO			PINMUX_PIN(PIN_PD24, 0, 0)
+-#define PIN_PD24__UTXD2			PINMUX_PIN(PIN_PD23, 1, 2)
+-#define PIN_PD24__FLEXCOM4_IO3		PINMUX_PIN(PIN_PD23, 3, 3)
++#define PIN_PD24__UTXD2			PINMUX_PIN(PIN_PD24, 1, 2)
++#define PIN_PD24__FLEXCOM4_IO3		PINMUX_PIN(PIN_PD24, 3, 3)
+ #define PIN_PD25			121
+ #define PIN_PD25__GPIO			PINMUX_PIN(PIN_PD25, 0, 0)
+ #define PIN_PD25__SPI1_SPCK		PINMUX_PIN(PIN_PD25, 1, 3)
+diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
+index 2c8f5e6ad905..bf70d0ae93ce 100644
+--- a/arch/arm/boot/dts/sun4i-a10.dtsi
++++ b/arch/arm/boot/dts/sun4i-a10.dtsi
+@@ -96,7 +96,7 @@
+ 			allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0";
+ 			clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>,
+ 				 <&ahb_gates 44>, <&ahb_gates 46>,
+-				 <&dram_gates 25>, <&dram_gates 26>;
++				 <&dram_gates 5>, <&dram_gates 25>, <&dram_gates 26>;
+ 			status = "disabled";
+ 		};
+ 	};
+diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
+index 0940a788f824..ee4e8e7d3e30 100644
+--- a/arch/arm/boot/dts/sun7i-a20.dtsi
++++ b/arch/arm/boot/dts/sun7i-a20.dtsi
+@@ -85,8 +85,9 @@
+ 			compatible = "allwinner,simple-framebuffer",
+ 				     "simple-framebuffer";
+ 			allwinner,pipeline = "de_be0-lcd0-tve0";
+-			clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>,
+-				 <&ahb_gates 44>, <&dram_gates 26>;
++			clocks = <&pll5 1>,
++				 <&ahb_gates 34>, <&ahb_gates 36>, <&ahb_gates 44>,
++				 <&dram_gates 5>, <&dram_gates 26>;
+ 			status = "disabled";
+ 		};
+ 	};
+diff --git a/arch/mips/ath79/early_printk.c b/arch/mips/ath79/early_printk.c
+index b955fafc58ba..d1adc59af5bf 100644
+--- a/arch/mips/ath79/early_printk.c
++++ b/arch/mips/ath79/early_printk.c
+@@ -31,13 +31,15 @@ static inline void prom_putchar_wait(void __iomem *reg, u32 mask, u32 val)
+ 	} while (1);
+ }
+ 
++#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
++
+ static void prom_putchar_ar71xx(unsigned char ch)
+ {
+ 	void __iomem *base = (void __iomem *)(KSEG1ADDR(AR71XX_UART_BASE));
+ 
+-	prom_putchar_wait(base + UART_LSR * 4, UART_LSR_THRE, UART_LSR_THRE);
++	prom_putchar_wait(base + UART_LSR * 4, BOTH_EMPTY, BOTH_EMPTY);
+ 	__raw_writel(ch, base + UART_TX * 4);
+-	prom_putchar_wait(base + UART_LSR * 4, UART_LSR_THRE, UART_LSR_THRE);
++	prom_putchar_wait(base + UART_LSR * 4, BOTH_EMPTY, BOTH_EMPTY);
+ }
+ 
+ static void prom_putchar_ar933x(unsigned char ch)
+diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
+index 867f924b05c7..e689b894353c 100644
+--- a/arch/mips/include/asm/asmmacro.h
++++ b/arch/mips/include/asm/asmmacro.h
+@@ -298,21 +298,21 @@
+ 	.set	pop
+ 	.endm
+ 
+-	.macro	copy_u_w	ws, n
++	.macro	copy_s_w	ws, n
+ 	.set	push
+ 	.set	mips32r2
+ 	.set	fp=64
+ 	.set	msa
+-	copy_u.w $1, $w\ws[\n]
++	copy_s.w $1, $w\ws[\n]
+ 	.set	pop
+ 	.endm
+ 
+-	.macro	copy_u_d	ws, n
++	.macro	copy_s_d	ws, n
+ 	.set	push
+ 	.set	mips64r2
+ 	.set	fp=64
+ 	.set	msa
+-	copy_u.d $1, $w\ws[\n]
++	copy_s.d $1, $w\ws[\n]
+ 	.set	pop
+ 	.endm
+ 
+@@ -346,8 +346,8 @@
+ #define STH_MSA_INSN		0x5800081f
+ #define STW_MSA_INSN		0x5800082f
+ #define STD_MSA_INSN		0x5800083f
+-#define COPY_UW_MSA_INSN	0x58f00056
+-#define COPY_UD_MSA_INSN	0x58f80056
++#define COPY_SW_MSA_INSN	0x58b00056
++#define COPY_SD_MSA_INSN	0x58b80056
+ #define INSERT_W_MSA_INSN	0x59300816
+ #define INSERT_D_MSA_INSN	0x59380816
+ #else
+@@ -361,8 +361,8 @@
+ #define STH_MSA_INSN		0x78000825
+ #define STW_MSA_INSN		0x78000826
+ #define STD_MSA_INSN		0x78000827
+-#define COPY_UW_MSA_INSN	0x78f00059
+-#define COPY_UD_MSA_INSN	0x78f80059
++#define COPY_SW_MSA_INSN	0x78b00059
++#define COPY_SD_MSA_INSN	0x78b80059
+ #define INSERT_W_MSA_INSN	0x79300819
+ #define INSERT_D_MSA_INSN	0x79380819
+ #endif
+@@ -393,7 +393,7 @@
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+-	addu	$1, \base, \off
++	PTR_ADDU $1, \base, \off
+ 	.word	LDB_MSA_INSN | (\wd << 6)
+ 	.set	pop
+ 	.endm
+@@ -402,7 +402,7 @@
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+-	addu	$1, \base, \off
++	PTR_ADDU $1, \base, \off
+ 	.word	LDH_MSA_INSN | (\wd << 6)
+ 	.set	pop
+ 	.endm
+@@ -411,7 +411,7 @@
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+-	addu	$1, \base, \off
++	PTR_ADDU $1, \base, \off
+ 	.word	LDW_MSA_INSN | (\wd << 6)
+ 	.set	pop
+ 	.endm
+@@ -420,7 +420,7 @@
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+-	addu	$1, \base, \off
++	PTR_ADDU $1, \base, \off
+ 	.word	LDD_MSA_INSN | (\wd << 6)
+ 	.set	pop
+ 	.endm
+@@ -429,7 +429,7 @@
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+-	addu	$1, \base, \off
++	PTR_ADDU $1, \base, \off
+ 	.word	STB_MSA_INSN | (\wd << 6)
+ 	.set	pop
+ 	.endm
+@@ -438,7 +438,7 @@
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+-	addu	$1, \base, \off
++	PTR_ADDU $1, \base, \off
+ 	.word	STH_MSA_INSN | (\wd << 6)
+ 	.set	pop
+ 	.endm
+@@ -447,7 +447,7 @@
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+-	addu	$1, \base, \off
++	PTR_ADDU $1, \base, \off
+ 	.word	STW_MSA_INSN | (\wd << 6)
+ 	.set	pop
+ 	.endm
+@@ -456,26 +456,26 @@
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+-	addu	$1, \base, \off
++	PTR_ADDU $1, \base, \off
+ 	.word	STD_MSA_INSN | (\wd << 6)
+ 	.set	pop
+ 	.endm
+ 
+-	.macro	copy_u_w	ws, n
++	.macro	copy_s_w	ws, n
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+ 	.insn
+-	.word	COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
++	.word	COPY_SW_MSA_INSN | (\n << 16) | (\ws << 11)
+ 	.set	pop
+ 	.endm
+ 
+-	.macro	copy_u_d	ws, n
++	.macro	copy_s_d	ws, n
+ 	.set	push
+ 	.set	noat
+ 	SET_HARDFLOAT
+ 	.insn
+-	.word	COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
++	.word	COPY_SD_MSA_INSN | (\n << 16) | (\ws << 11)
+ 	.set	pop
+ 	.endm
+ 
+diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
+index 723229f4cf27..176de586a71a 100644
+--- a/arch/mips/include/asm/cacheflush.h
++++ b/arch/mips/include/asm/cacheflush.h
+@@ -51,7 +51,6 @@ extern void (*flush_cache_range)(struct vm_area_struct *vma,
+ 	unsigned long start, unsigned long end);
+ extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
+ extern void __flush_dcache_page(struct page *page);
+-extern void __flush_icache_page(struct vm_area_struct *vma, struct page *page);
+ 
+ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+ static inline void flush_dcache_page(struct page *page)
+@@ -77,11 +76,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
+ static inline void flush_icache_page(struct vm_area_struct *vma,
+ 	struct page *page)
+ {
+-	if (!cpu_has_ic_fills_f_dc && (vma->vm_flags & VM_EXEC) &&
+-	    Page_dcache_dirty(page)) {
+-		__flush_icache_page(vma, page);
+-		ClearPageDcacheDirty(page);
+-	}
+ }
+ 
+ extern void (*flush_icache_range)(unsigned long start, unsigned long end);
+diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
+index bbb85fe21642..6e4effa6f626 100644
+--- a/arch/mips/include/asm/msa.h
++++ b/arch/mips/include/asm/msa.h
+@@ -147,6 +147,19 @@ static inline void restore_msa(struct task_struct *t)
+ 		_restore_msa(t);
+ }
+ 
++static inline void init_msa_upper(void)
++{
++	/*
++	 * Check cpu_has_msa only if it's a constant. This will allow the
++	 * compiler to optimise out code for CPUs without MSA without adding
++	 * an extra redundant check for CPUs with MSA.
++	 */
++	if (__builtin_constant_p(cpu_has_msa) && !cpu_has_msa)
++		return;
++
++	_init_msa_upper();
++}
++
+ #ifdef TOOLCHAIN_SUPPORTS_MSA
+ 
+ #define __BUILD_MSA_CTL_REG(name, cs)				\
+diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
+index 9a4fe0133ff1..65bf2c065db5 100644
+--- a/arch/mips/include/asm/pgtable.h
++++ b/arch/mips/include/asm/pgtable.h
+@@ -127,10 +127,14 @@ do {									\
+ 	}								\
+ } while(0)
+ 
++static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
++			      pte_t *ptep, pte_t pteval);
++
+ #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
+ 
+ #define pte_none(pte)		(!(((pte).pte_high) & ~_PAGE_GLOBAL))
+ #define pte_present(pte)	((pte).pte_low & _PAGE_PRESENT)
++#define pte_no_exec(pte)	((pte).pte_low & _PAGE_NO_EXEC)
+ 
+ static inline void set_pte(pte_t *ptep, pte_t pte)
+ {
+@@ -148,7 +152,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
+ 			buddy->pte_high |= _PAGE_GLOBAL;
+ 	}
+ }
+-#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+ 
+ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+@@ -166,6 +169,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
+ 
+ #define pte_none(pte)		(!(pte_val(pte) & ~_PAGE_GLOBAL))
+ #define pte_present(pte)	(pte_val(pte) & _PAGE_PRESENT)
++#define pte_no_exec(pte)	(pte_val(pte) & _PAGE_NO_EXEC)
+ 
+ /*
+  * Certain architectures need to do special things when pte's
+@@ -218,7 +222,6 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
+ 	}
+ #endif
+ }
+-#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+ 
+ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+@@ -234,6 +237,22 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
+ }
+ #endif
+ 
++static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
++			      pte_t *ptep, pte_t pteval)
++{
++	extern void __update_cache(unsigned long address, pte_t pte);
++
++	if (!pte_present(pteval))
++		goto cache_sync_done;
++
++	if (pte_present(*ptep) && (pte_pfn(*ptep) == pte_pfn(pteval)))
++		goto cache_sync_done;
++
++	__update_cache(addr, pteval);
++cache_sync_done:
++	set_pte(ptep, pteval);
++}
++
+ /*
+  * (pmds are folded into puds so this doesn't get actually called,
+  * but the define is needed for a generic inline function.)
+@@ -430,15 +449,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ 
+ extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
+ 	pte_t pte);
+-extern void __update_cache(struct vm_area_struct *vma, unsigned long address,
+-	pte_t pte);
+ 
+ static inline void update_mmu_cache(struct vm_area_struct *vma,
+ 	unsigned long address, pte_t *ptep)
+ {
+ 	pte_t pte = *ptep;
+ 	__update_tlb(vma, address, pte);
+-	__update_cache(vma, address, pte);
+ }
+ 
+ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
+index 28b5d84a5022..ebb5c0f2f90d 100644
+--- a/arch/mips/include/asm/switch_to.h
++++ b/arch/mips/include/asm/switch_to.h
+@@ -105,7 +105,7 @@ do {									\
+ 	__clear_software_ll_bit();					\
+ 	if (cpu_has_userlocal)						\
+ 		write_c0_userlocal(task_thread_info(next)->tp_value);	\
+-	__restore_watch();						\
++	__restore_watch(next);						\
+ 	(last) = resume(prev, next, task_thread_info(next));		\
+ } while (0)
+ 
+diff --git a/arch/mips/include/asm/watch.h b/arch/mips/include/asm/watch.h
+index 20126ec79359..6ffe3eadf105 100644
+--- a/arch/mips/include/asm/watch.h
++++ b/arch/mips/include/asm/watch.h
+@@ -12,21 +12,21 @@
+ 
+ #include <asm/mipsregs.h>
+ 
+-void mips_install_watch_registers(void);
++void mips_install_watch_registers(struct task_struct *t);
+ void mips_read_watch_registers(void);
+ void mips_clear_watch_registers(void);
+ void mips_probe_watch_registers(struct cpuinfo_mips *c);
+ 
+ #ifdef CONFIG_HARDWARE_WATCHPOINTS
+-#define __restore_watch() do {						\
++#define __restore_watch(task) do {					\
+ 	if (unlikely(test_bit(TIF_LOAD_WATCH,				\
+-			      &current_thread_info()->flags))) {	\
+-		mips_install_watch_registers();				\
++			      &task_thread_info(task)->flags))) {	\
++		mips_install_watch_registers(task);			\
+ 	}								\
+ } while (0)
+ 
+ #else
+-#define __restore_watch() do {} while (0)
++#define __restore_watch(task) do {} while (0)
+ #endif
+ 
+ #endif /* _ASM_WATCH_H */
+diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h
+index cc49dc240d67..8069cf766603 100644
+--- a/arch/mips/include/uapi/asm/siginfo.h
++++ b/arch/mips/include/uapi/asm/siginfo.h
+@@ -28,7 +28,7 @@
+ 
+ #define __ARCH_SIGSYS
+ 
+-#include <uapi/asm-generic/siginfo.h>
++#include <asm-generic/siginfo.h>
+ 
+ /* We can't use generic siginfo_t, because our si_code and si_errno are swapped */
+ typedef struct siginfo {
+@@ -42,13 +42,13 @@ typedef struct siginfo {
+ 
+ 		/* kill() */
+ 		struct {
+-			pid_t _pid;		/* sender's pid */
++			__kernel_pid_t _pid;	/* sender's pid */
+ 			__ARCH_SI_UID_T _uid;	/* sender's uid */
+ 		} _kill;
+ 
+ 		/* POSIX.1b timers */
+ 		struct {
+-			timer_t _tid;		/* timer id */
++			__kernel_timer_t _tid;	/* timer id */
+ 			int _overrun;		/* overrun count */
+ 			char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)];
+ 			sigval_t _sigval;	/* same as below */
+@@ -57,26 +57,26 @@ typedef struct siginfo {
+ 
+ 		/* POSIX.1b signals */
+ 		struct {
+-			pid_t _pid;		/* sender's pid */
++			__kernel_pid_t _pid;	/* sender's pid */
+ 			__ARCH_SI_UID_T _uid;	/* sender's uid */
+ 			sigval_t _sigval;
+ 		} _rt;
+ 
+ 		/* SIGCHLD */
+ 		struct {
+-			pid_t _pid;		/* which child */
++			__kernel_pid_t _pid;	/* which child */
+ 			__ARCH_SI_UID_T _uid;	/* sender's uid */
+ 			int _status;		/* exit code */
+-			clock_t _utime;
+-			clock_t _stime;
++			__kernel_clock_t _utime;
++			__kernel_clock_t _stime;
+ 		} _sigchld;
+ 
+ 		/* IRIX SIGCHLD */
+ 		struct {
+-			pid_t _pid;		/* which child */
+-			clock_t _utime;
++			__kernel_pid_t _pid;	/* which child */
++			__kernel_clock_t _utime;
+ 			int _status;		/* exit code */
+-			clock_t _stime;
++			__kernel_clock_t _stime;
+ 		} _irix_sigchld;
+ 
+ 		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+@@ -123,6 +123,4 @@ typedef struct siginfo {
+ #define SI_TIMER __SI_CODE(__SI_TIMER, -3) /* sent by timer expiration */
+ #define SI_MESGQ __SI_CODE(__SI_MESGQ, -4) /* sent by real time mesq state change */
+ 
+-#include <asm-generic/siginfo.h>
+-
+ #endif /* _UAPI_ASM_SIGINFO_H */
+diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
+index 3fff89ae760b..625ee770b1aa 100644
+--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
++++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
+@@ -28,6 +28,7 @@
+ #include <asm/inst.h>
+ #include <asm/mips-r2-to-r6-emul.h>
+ #include <asm/local.h>
++#include <asm/mipsregs.h>
+ #include <asm/ptrace.h>
+ #include <asm/uaccess.h>
+ 
+@@ -1251,10 +1252,10 @@ fpu_emul:
+ 			"	j	10b\n"
+ 			"	.previous\n"
+ 			"	.section	__ex_table,\"a\"\n"
+-			"	.word	1b,8b\n"
+-			"	.word	2b,8b\n"
+-			"	.word	3b,8b\n"
+-			"	.word	4b,8b\n"
++			STR(PTR) " 1b,8b\n"
++			STR(PTR) " 2b,8b\n"
++			STR(PTR) " 3b,8b\n"
++			STR(PTR) " 4b,8b\n"
+ 			"	.previous\n"
+ 			"	.set	pop\n"
+ 			: "+&r"(rt), "=&r"(rs),
+@@ -1326,10 +1327,10 @@ fpu_emul:
+ 			"	j	10b\n"
+ 			"       .previous\n"
+ 			"	.section	__ex_table,\"a\"\n"
+-			"	.word	1b,8b\n"
+-			"	.word	2b,8b\n"
+-			"	.word	3b,8b\n"
+-			"	.word	4b,8b\n"
++			STR(PTR) " 1b,8b\n"
++			STR(PTR) " 2b,8b\n"
++			STR(PTR) " 3b,8b\n"
++			STR(PTR) " 4b,8b\n"
+ 			"	.previous\n"
+ 			"	.set	pop\n"
+ 			: "+&r"(rt), "=&r"(rs),
+@@ -1397,10 +1398,10 @@ fpu_emul:
+ 			"	j	9b\n"
+ 			"	.previous\n"
+ 			"	.section        __ex_table,\"a\"\n"
+-			"	.word	1b,8b\n"
+-			"	.word	2b,8b\n"
+-			"	.word	3b,8b\n"
+-			"	.word	4b,8b\n"
++			STR(PTR) " 1b,8b\n"
++			STR(PTR) " 2b,8b\n"
++			STR(PTR) " 3b,8b\n"
++			STR(PTR) " 4b,8b\n"
+ 			"	.previous\n"
+ 			"	.set	pop\n"
+ 			: "+&r"(rt), "=&r"(rs),
+@@ -1467,10 +1468,10 @@ fpu_emul:
+ 			"	j	9b\n"
+ 			"	.previous\n"
+ 			"	.section        __ex_table,\"a\"\n"
+-			"	.word	1b,8b\n"
+-			"	.word	2b,8b\n"
+-			"	.word	3b,8b\n"
+-			"	.word	4b,8b\n"
++			STR(PTR) " 1b,8b\n"
++			STR(PTR) " 2b,8b\n"
++			STR(PTR) " 3b,8b\n"
++			STR(PTR) " 4b,8b\n"
+ 			"	.previous\n"
+ 			"	.set	pop\n"
+ 			: "+&r"(rt), "=&r"(rs),
+@@ -1582,14 +1583,14 @@ fpu_emul:
+ 			"	j	9b\n"
+ 			"	.previous\n"
+ 			"	.section        __ex_table,\"a\"\n"
+-			"	.word	1b,8b\n"
+-			"	.word	2b,8b\n"
+-			"	.word	3b,8b\n"
+-			"	.word	4b,8b\n"
+-			"	.word	5b,8b\n"
+-			"	.word	6b,8b\n"
+-			"	.word	7b,8b\n"
+-			"	.word	0b,8b\n"
++			STR(PTR) " 1b,8b\n"
++			STR(PTR) " 2b,8b\n"
++			STR(PTR) " 3b,8b\n"
++			STR(PTR) " 4b,8b\n"
++			STR(PTR) " 5b,8b\n"
++			STR(PTR) " 6b,8b\n"
++			STR(PTR) " 7b,8b\n"
++			STR(PTR) " 0b,8b\n"
+ 			"	.previous\n"
+ 			"	.set	pop\n"
+ 			: "+&r"(rt), "=&r"(rs),
+@@ -1701,14 +1702,14 @@ fpu_emul:
+ 			"	j      9b\n"
+ 			"	.previous\n"
+ 			"	.section        __ex_table,\"a\"\n"
+-			"	.word  1b,8b\n"
+-			"	.word  2b,8b\n"
+-			"	.word  3b,8b\n"
+-			"	.word  4b,8b\n"
+-			"	.word  5b,8b\n"
+-			"	.word  6b,8b\n"
+-			"	.word  7b,8b\n"
+-			"	.word  0b,8b\n"
++			STR(PTR) " 1b,8b\n"
++			STR(PTR) " 2b,8b\n"
++			STR(PTR) " 3b,8b\n"
++			STR(PTR) " 4b,8b\n"
++			STR(PTR) " 5b,8b\n"
++			STR(PTR) " 6b,8b\n"
++			STR(PTR) " 7b,8b\n"
++			STR(PTR) " 0b,8b\n"
+ 			"	.previous\n"
+ 			"	.set    pop\n"
+ 			: "+&r"(rt), "=&r"(rs),
+@@ -1820,14 +1821,14 @@ fpu_emul:
+ 			"	j	9b\n"
+ 			"	.previous\n"
+ 			"	.section        __ex_table,\"a\"\n"
+-			"	.word	1b,8b\n"
+-			"	.word	2b,8b\n"
+-			"	.word	3b,8b\n"
+-			"	.word	4b,8b\n"
+-			"	.word	5b,8b\n"
+-			"	.word	6b,8b\n"
+-			"	.word	7b,8b\n"
+-			"	.word	0b,8b\n"
++			STR(PTR) " 1b,8b\n"
++			STR(PTR) " 2b,8b\n"
++			STR(PTR) " 3b,8b\n"
++			STR(PTR) " 4b,8b\n"
++			STR(PTR) " 5b,8b\n"
++			STR(PTR) " 6b,8b\n"
++			STR(PTR) " 7b,8b\n"
++			STR(PTR) " 0b,8b\n"
+ 			"	.previous\n"
+ 			"	.set	pop\n"
+ 			: "+&r"(rt), "=&r"(rs),
+@@ -1938,14 +1939,14 @@ fpu_emul:
+ 			"       j	9b\n"
+ 			"       .previous\n"
+ 			"       .section        __ex_table,\"a\"\n"
+-			"       .word	1b,8b\n"
+-			"       .word	2b,8b\n"
+-			"       .word	3b,8b\n"
+-			"       .word	4b,8b\n"
+-			"       .word	5b,8b\n"
+-			"       .word	6b,8b\n"
+-			"       .word	7b,8b\n"
+-			"       .word	0b,8b\n"
++			STR(PTR) " 1b,8b\n"
++			STR(PTR) " 2b,8b\n"
++			STR(PTR) " 3b,8b\n"
++			STR(PTR) " 4b,8b\n"
++			STR(PTR) " 5b,8b\n"
++			STR(PTR) " 6b,8b\n"
++			STR(PTR) " 7b,8b\n"
++			STR(PTR) " 0b,8b\n"
+ 			"       .previous\n"
+ 			"       .set	pop\n"
+ 			: "+&r"(rt), "=&r"(rs),
+@@ -2000,7 +2001,7 @@ fpu_emul:
+ 			"j	2b\n"
+ 			".previous\n"
+ 			".section        __ex_table,\"a\"\n"
+-			".word  1b, 3b\n"
++			STR(PTR) " 1b,3b\n"
+ 			".previous\n"
+ 			: "=&r"(res), "+&r"(err)
+ 			: "r"(vaddr), "i"(SIGSEGV)
+@@ -2058,7 +2059,7 @@ fpu_emul:
+ 			"j	2b\n"
+ 			".previous\n"
+ 			".section        __ex_table,\"a\"\n"
+-			".word	1b, 3b\n"
++			STR(PTR) " 1b,3b\n"
+ 			".previous\n"
+ 			: "+&r"(res), "+&r"(err)
+ 			: "r"(vaddr), "i"(SIGSEGV));
+@@ -2119,7 +2120,7 @@ fpu_emul:
+ 			"j	2b\n"
+ 			".previous\n"
+ 			".section        __ex_table,\"a\"\n"
+-			".word  1b, 3b\n"
++			STR(PTR) " 1b,3b\n"
+ 			".previous\n"
+ 			: "=&r"(res), "+&r"(err)
+ 			: "r"(vaddr), "i"(SIGSEGV)
+@@ -2182,7 +2183,7 @@ fpu_emul:
+ 			"j	2b\n"
+ 			".previous\n"
+ 			".section        __ex_table,\"a\"\n"
+-			".word	1b, 3b\n"
++			STR(PTR) " 1b,3b\n"
+ 			".previous\n"
+ 			: "+&r"(res), "+&r"(err)
+ 			: "r"(vaddr), "i"(SIGSEGV));
+diff --git a/arch/mips/kernel/pm.c b/arch/mips/kernel/pm.c
+index fefdf39d3df3..dc814892133c 100644
+--- a/arch/mips/kernel/pm.c
++++ b/arch/mips/kernel/pm.c
+@@ -56,7 +56,7 @@ static void mips_cpu_restore(void)
+ 		write_c0_userlocal(current_thread_info()->tp_value);
+ 
+ 	/* Restore watch registers */
+-	__restore_watch();
++	__restore_watch(current);
+ }
+ 
+ /**
+diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
+index 92880cee449e..d83730cd2719 100644
+--- a/arch/mips/kernel/process.c
++++ b/arch/mips/kernel/process.c
+@@ -455,7 +455,7 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
+ 		    *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
+ 			regs = (struct pt_regs *)*sp;
+ 			pc = regs->cp0_epc;
+-			if (__kernel_text_address(pc)) {
++			if (!user_mode(regs) && __kernel_text_address(pc)) {
+ 				*sp = regs->regs[29];
+ 				*ra = regs->regs[31];
+ 				return pc;
+@@ -580,11 +580,19 @@ int mips_get_process_fp_mode(struct task_struct *task)
+ 	return value;
+ }
+ 
++static void prepare_for_fp_mode_switch(void *info)
++{
++	struct mm_struct *mm = info;
++
++	if (current->mm == mm)
++		lose_fpu(1);
++}
++
+ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
+ {
+ 	const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE;
+-	unsigned long switch_count;
+ 	struct task_struct *t;
++	int max_users;
+ 
+ 	/* Check the value is valid */
+ 	if (value & ~known_bits)
+@@ -601,6 +609,9 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
+ 	if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6)
+ 		return -EOPNOTSUPP;
+ 
++	/* Proceed with the mode switch */
++	preempt_disable();
++
+ 	/* Save FP & vector context, then disable FPU & MSA */
+ 	if (task->signal == current->signal)
+ 		lose_fpu(1);
+@@ -610,31 +621,17 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
+ 	smp_mb__after_atomic();
+ 
+ 	/*
+-	 * If there are multiple online CPUs then wait until all threads whose
+-	 * FP mode is about to change have been context switched. This approach
+-	 * allows us to only worry about whether an FP mode switch is in
+-	 * progress when FP is first used in a tasks time slice. Pretty much all
+-	 * of the mode switch overhead can thus be confined to cases where mode
+-	 * switches are actually occurring. That is, to here. However for the
+-	 * thread performing the mode switch it may take a while...
++	 * If there are multiple online CPUs then force any which are running
++	 * threads in this process to lose their FPU context, which they can't
++	 * regain until fp_mode_switching is cleared later.
+ 	 */
+ 	if (num_online_cpus() > 1) {
+-		spin_lock_irq(&task->sighand->siglock);
+-
+-		for_each_thread(task, t) {
+-			if (t == current)
+-				continue;
+-
+-			switch_count = t->nvcsw + t->nivcsw;
++		/* No need to send an IPI for the local CPU */
++		max_users = (task->mm == current->mm) ? 1 : 0;
+ 
+-			do {
+-				spin_unlock_irq(&task->sighand->siglock);
+-				cond_resched();
+-				spin_lock_irq(&task->sighand->siglock);
+-			} while ((t->nvcsw + t->nivcsw) == switch_count);
+-		}
+-
+-		spin_unlock_irq(&task->sighand->siglock);
++		if (atomic_read(&current->mm->mm_users) > max_users)
++			smp_call_function(prepare_for_fp_mode_switch,
++					  (void *)current->mm, 1);
+ 	}
+ 
+ 	/*
+@@ -659,6 +656,7 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
+ 
+ 	/* Allow threads to use FP again */
+ 	atomic_set(&task->mm->context.fp_mode_switching, 0);
++	preempt_enable();
+ 
+ 	return 0;
+ }
+diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
+index a5279b2f3198..4285d8b7c566 100644
+--- a/arch/mips/kernel/ptrace.c
++++ b/arch/mips/kernel/ptrace.c
+@@ -57,8 +57,7 @@ static void init_fp_ctx(struct task_struct *target)
+ 	/* Begin with data registers set to all 1s... */
+ 	memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr));
+ 
+-	/* ...and FCSR zeroed */
+-	target->thread.fpu.fcr31 = 0;
++	/* FCSR has been preset by `mips_set_personality_nan'.  */
+ 
+ 	/*
+ 	 * Record that the target has "used" math, such that the context
+@@ -80,6 +79,22 @@ void ptrace_disable(struct task_struct *child)
+ }
+ 
+ /*
++ * Poke at FCSR according to its mask.  Don't set the cause bits as
++ * this is currently not handled correctly in FP context restoration
++ * and will cause an oops if a corresponding enable bit is set.
++ */
++static void ptrace_setfcr31(struct task_struct *child, u32 value)
++{
++	u32 fcr31;
++	u32 mask;
++
++	value &= ~FPU_CSR_ALL_X;
++	fcr31 = child->thread.fpu.fcr31;
++	mask = boot_cpu_data.fpu_msk31;
++	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
++}
++
++/*
+  * Read a general register set.	 We always use the 64-bit format, even
+  * for 32-bit kernels and for 32-bit processes on a 64-bit kernel.
+  * Registers are sign extended to fill the available space.
+@@ -159,9 +174,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
+ {
+ 	union fpureg *fregs;
+ 	u64 fpr_val;
+-	u32 fcr31;
+ 	u32 value;
+-	u32 mask;
+ 	int i;
+ 
+ 	if (!access_ok(VERIFY_READ, data, 33 * 8))
+@@ -176,9 +189,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
+ 	}
+ 
+ 	__get_user(value, data + 64);
+-	fcr31 = child->thread.fpu.fcr31;
+-	mask = boot_cpu_data.fpu_msk31;
+-	child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
++	ptrace_setfcr31(child, value);
+ 
+ 	/* FIR may not be written.  */
+ 
+@@ -805,7 +816,7 @@ long arch_ptrace(struct task_struct *child, long request,
+ 			break;
+ #endif
+ 		case FPC_CSR:
+-			child->thread.fpu.fcr31 = data & ~FPU_CSR_ALL_X;
++			ptrace_setfcr31(child, data);
+ 			break;
+ 		case DSP_BASE ... DSP_BASE + 5: {
+ 			dspreg_t *dregs;
+diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
+index 17732f876eff..56d86b09c917 100644
+--- a/arch/mips/kernel/r4k_fpu.S
++++ b/arch/mips/kernel/r4k_fpu.S
+@@ -244,17 +244,17 @@ LEAF(\name)
+ 	.set	push
+ 	.set	noat
+ #ifdef CONFIG_64BIT
+-	copy_u_d \wr, 1
++	copy_s_d \wr, 1
+ 	EX sd	$1, \off(\base)
+ #elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+-	copy_u_w \wr, 2
++	copy_s_w \wr, 2
+ 	EX sw	$1, \off(\base)
+-	copy_u_w \wr, 3
++	copy_s_w \wr, 3
+ 	EX sw	$1, (\off+4)(\base)
+ #else /* CONFIG_CPU_BIG_ENDIAN */
+-	copy_u_w \wr, 2
++	copy_s_w \wr, 2
+ 	EX sw	$1, (\off+4)(\base)
+-	copy_u_w \wr, 3
++	copy_s_w \wr, 3
+ 	EX sw	$1, \off(\base)
+ #endif
+ 	.set	pop
+diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
+index 4f607341a793..d20caacfdbd3 100644
+--- a/arch/mips/kernel/setup.c
++++ b/arch/mips/kernel/setup.c
+@@ -706,6 +706,9 @@ static void __init arch_mem_init(char **cmdline_p)
+ 	for_each_memblock(reserved, reg)
+ 		if (reg->size != 0)
+ 			reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
++
++	reserve_bootmem_region(__pa_symbol(&__nosave_begin),
++			__pa_symbol(&__nosave_end)); /* Reserve for hibernation */
+ }
+ 
+ static void __init resource_init(void)
+diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
+index bf792e2839a6..9e35b6b26aa8 100644
+--- a/arch/mips/kernel/signal.c
++++ b/arch/mips/kernel/signal.c
+@@ -195,6 +195,9 @@ static int restore_msa_extcontext(void __user *buf, unsigned int size)
+ 	unsigned int csr;
+ 	int i, err;
+ 
++	if (!config_enabled(CONFIG_CPU_HAS_MSA))
++		return SIGSYS;
++
+ 	if (size != sizeof(*msa))
+ 		return -EINVAL;
+ 
+@@ -398,8 +401,8 @@ int protected_restore_fp_context(void __user *sc)
+ 	}
+ 
+ fp_done:
+-	if (used & USED_EXTCONTEXT)
+-		err |= restore_extcontext(sc_to_extcontext(sc));
++	if (!err && (used & USED_EXTCONTEXT))
++		err = restore_extcontext(sc_to_extcontext(sc));
+ 
+ 	return err ?: sig;
+ }
+@@ -767,15 +770,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+ 	sigset_t *oldset = sigmask_to_save();
+ 	int ret;
+ 	struct mips_abi *abi = current->thread.abi;
+-#ifdef CONFIG_CPU_MICROMIPS
+-	void *vdso;
+-	unsigned long tmp = (unsigned long)current->mm->context.vdso;
+-
+-	set_isa16_mode(tmp);
+-	vdso = (void *)tmp;
+-#else
+ 	void *vdso = current->mm->context.vdso;
+-#endif
+ 
+ 	if (regs->regs[0]) {
+ 		switch(regs->regs[2]) {
+diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
+index ae0c89d23ad7..5aa1d5c2659a 100644
+--- a/arch/mips/kernel/traps.c
++++ b/arch/mips/kernel/traps.c
+@@ -145,7 +145,7 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs)
+ 	if (!task)
+ 		task = current;
+ 
+-	if (raw_show_trace || !__kernel_text_address(pc)) {
++	if (raw_show_trace || user_mode(regs) || !__kernel_text_address(pc)) {
+ 		show_raw_backtrace(sp);
+ 		return;
+ 	}
+@@ -1249,7 +1249,7 @@ static int enable_restore_fp_context(int msa)
+ 		err = init_fpu();
+ 		if (msa && !err) {
+ 			enable_msa();
+-			_init_msa_upper();
++			init_msa_upper();
+ 			set_thread_flag(TIF_USEDMSA);
+ 			set_thread_flag(TIF_MSA_CTX_LIVE);
+ 		}
+@@ -1312,7 +1312,7 @@ static int enable_restore_fp_context(int msa)
+ 	 */
+ 	prior_msa = test_and_set_thread_flag(TIF_MSA_CTX_LIVE);
+ 	if (!prior_msa && was_fpu_owner) {
+-		_init_msa_upper();
++		init_msa_upper();
+ 
+ 		goto out;
+ 	}
+@@ -1329,7 +1329,7 @@ static int enable_restore_fp_context(int msa)
+ 		 * of each vector register such that it cannot see data left
+ 		 * behind by another task.
+ 		 */
+-		_init_msa_upper();
++		init_msa_upper();
+ 	} else {
+ 		/* We need to restore the vector context. */
+ 		restore_msa(current);
+diff --git a/arch/mips/kernel/watch.c b/arch/mips/kernel/watch.c
+index 2a03abb5bd2c..9b78e375118e 100644
+--- a/arch/mips/kernel/watch.c
++++ b/arch/mips/kernel/watch.c
+@@ -15,10 +15,9 @@
+  * Install the watch registers for the current thread.	A maximum of
+  * four registers are installed although the machine may have more.
+  */
+-void mips_install_watch_registers(void)
++void mips_install_watch_registers(struct task_struct *t)
+ {
+-	struct mips3264_watch_reg_state *watches =
+-		&current->thread.watch.mips3264;
++	struct mips3264_watch_reg_state *watches = &t->thread.watch.mips3264;
+ 	switch (current_cpu_data.watch_reg_use_cnt) {
+ 	default:
+ 		BUG();
+diff --git a/arch/mips/lib/ashldi3.c b/arch/mips/lib/ashldi3.c
+index beb80f316095..927dc94a030f 100644
+--- a/arch/mips/lib/ashldi3.c
++++ b/arch/mips/lib/ashldi3.c
+@@ -2,7 +2,7 @@
+ 
+ #include "libgcc.h"
+ 
+-long long __ashldi3(long long u, word_type b)
++long long notrace __ashldi3(long long u, word_type b)
+ {
+ 	DWunion uu, w;
+ 	word_type bm;
+diff --git a/arch/mips/lib/ashrdi3.c b/arch/mips/lib/ashrdi3.c
+index c884a912b660..9fdf1a598428 100644
+--- a/arch/mips/lib/ashrdi3.c
++++ b/arch/mips/lib/ashrdi3.c
+@@ -2,7 +2,7 @@
+ 
+ #include "libgcc.h"
+ 
+-long long __ashrdi3(long long u, word_type b)
++long long notrace __ashrdi3(long long u, word_type b)
+ {
+ 	DWunion uu, w;
+ 	word_type bm;
+diff --git a/arch/mips/lib/bswapdi.c b/arch/mips/lib/bswapdi.c
+index 77e5f9c1f005..e3e77aa52c95 100644
+--- a/arch/mips/lib/bswapdi.c
++++ b/arch/mips/lib/bswapdi.c
+@@ -1,6 +1,6 @@
+ #include <linux/module.h>
+ 
+-unsigned long long __bswapdi2(unsigned long long u)
++unsigned long long notrace __bswapdi2(unsigned long long u)
+ {
+ 	return (((u) & 0xff00000000000000ull) >> 56) |
+ 	       (((u) & 0x00ff000000000000ull) >> 40) |
+diff --git a/arch/mips/lib/bswapsi.c b/arch/mips/lib/bswapsi.c
+index 2b302ff121d2..530a8afe6fda 100644
+--- a/arch/mips/lib/bswapsi.c
++++ b/arch/mips/lib/bswapsi.c
+@@ -1,6 +1,6 @@
+ #include <linux/module.h>
+ 
+-unsigned int __bswapsi2(unsigned int u)
++unsigned int notrace __bswapsi2(unsigned int u)
+ {
+ 	return (((u) & 0xff000000) >> 24) |
+ 	       (((u) & 0x00ff0000) >>  8) |
+diff --git a/arch/mips/lib/cmpdi2.c b/arch/mips/lib/cmpdi2.c
+index 8c1306437ed1..06857da96993 100644
+--- a/arch/mips/lib/cmpdi2.c
++++ b/arch/mips/lib/cmpdi2.c
+@@ -2,7 +2,7 @@
+ 
+ #include "libgcc.h"
+ 
+-word_type __cmpdi2(long long a, long long b)
++word_type notrace __cmpdi2(long long a, long long b)
+ {
+ 	const DWunion au = {
+ 		.ll = a
+diff --git a/arch/mips/lib/lshrdi3.c b/arch/mips/lib/lshrdi3.c
+index dcf8d6810b7c..364547449c65 100644
+--- a/arch/mips/lib/lshrdi3.c
++++ b/arch/mips/lib/lshrdi3.c
+@@ -2,7 +2,7 @@
+ 
+ #include "libgcc.h"
+ 
+-long long __lshrdi3(long long u, word_type b)
++long long notrace __lshrdi3(long long u, word_type b)
+ {
+ 	DWunion uu, w;
+ 	word_type bm;
+diff --git a/arch/mips/lib/ucmpdi2.c b/arch/mips/lib/ucmpdi2.c
+index bb4cb2f828ea..bd599f58234c 100644
+--- a/arch/mips/lib/ucmpdi2.c
++++ b/arch/mips/lib/ucmpdi2.c
+@@ -2,7 +2,7 @@
+ 
+ #include "libgcc.h"
+ 
+-word_type __ucmpdi2(unsigned long long a, unsigned long long b)
++word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b)
+ {
+ 	const DWunion au = {.ll = a};
+ 	const DWunion bu = {.ll = b};
+diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
+index 85d808924c94..0fce4608aa88 100644
+--- a/arch/mips/loongson64/Platform
++++ b/arch/mips/loongson64/Platform
+@@ -31,7 +31,7 @@ cflags-$(CONFIG_CPU_LOONGSON3)	+= -Wa,--trap
+ # can't easily be used safely within the kbuild framework.
+ #
+ ifeq ($(call cc-ifversion, -ge, 0409, y), y)
+-  ifeq ($(call ld-ifversion, -ge, 22500000, y), y)
++  ifeq ($(call ld-ifversion, -ge, 225000000, y), y)
+     cflags-$(CONFIG_CPU_LOONGSON3)  += \
+       $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+   else
+diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
+index 6f9e010cec4d..282c5a8c2fcd 100644
+--- a/arch/mips/loongson64/loongson-3/numa.c
++++ b/arch/mips/loongson64/loongson-3/numa.c
+@@ -213,10 +213,10 @@ static void __init node_mem_init(unsigned int node)
+ 		BOOTMEM_DEFAULT);
+ 
+ 	if (node == 0 && node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT)) {
+-		/* Reserve 0xff800000~0xffffffff for RS780E integrated GPU */
++		/* Reserve 0xfe000000~0xffffffff for RS780E integrated GPU */
+ 		reserve_bootmem_node(NODE_DATA(node),
+-				(node_addrspace_offset | 0xff800000),
+-				8 << 20, BOOTMEM_DEFAULT);
++				(node_addrspace_offset | 0xfe000000),
++				32 << 20, BOOTMEM_DEFAULT);
+ 	}
+ 
+ 	sparse_memory_present_with_active_regions(node);
+diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
+index cdfd44ffa51c..41d3e0e7defa 100644
+--- a/arch/mips/math-emu/cp1emu.c
++++ b/arch/mips/math-emu/cp1emu.c
+@@ -445,9 +445,11 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
+ 	case spec_op:
+ 		switch (insn.r_format.func) {
+ 		case jalr_op:
+-			regs->regs[insn.r_format.rd] =
+-				regs->cp0_epc + dec_insn.pc_inc +
+-				dec_insn.next_pc_inc;
++			if (insn.r_format.rd != 0) {
++				regs->regs[insn.r_format.rd] =
++					regs->cp0_epc + dec_insn.pc_inc +
++					dec_insn.next_pc_inc;
++			}
+ 			/* Fall through */
+ 		case jr_op:
+ 			/* For R6, JR already emulated in jalr_op */
+diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
+index 3f159caf6dbc..bf04c6c479a4 100644
+--- a/arch/mips/mm/cache.c
++++ b/arch/mips/mm/cache.c
+@@ -16,6 +16,7 @@
+ #include <linux/mm.h>
+ 
+ #include <asm/cacheflush.h>
++#include <asm/highmem.h>
+ #include <asm/processor.h>
+ #include <asm/cpu.h>
+ #include <asm/cpu-features.h>
+@@ -83,8 +84,6 @@ void __flush_dcache_page(struct page *page)
+ 	struct address_space *mapping = page_mapping(page);
+ 	unsigned long addr;
+ 
+-	if (PageHighMem(page))
+-		return;
+ 	if (mapping && !mapping_mapped(mapping)) {
+ 		SetPageDcacheDirty(page);
+ 		return;
+@@ -95,8 +94,15 @@ void __flush_dcache_page(struct page *page)
+ 	 * case is for exec env/arg pages and those are %99 certainly going to
+ 	 * get faulted into the tlb (and thus flushed) anyways.
+ 	 */
+-	addr = (unsigned long) page_address(page);
++	if (PageHighMem(page))
++		addr = (unsigned long)kmap_atomic(page);
++	else
++		addr = (unsigned long)page_address(page);
++
+ 	flush_data_cache_page(addr);
++
++	if (PageHighMem(page))
++		__kunmap_atomic((void *)addr);
+ }
+ 
+ EXPORT_SYMBOL(__flush_dcache_page);
+@@ -119,33 +125,28 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr)
+ 
+ EXPORT_SYMBOL(__flush_anon_page);
+ 
+-void __flush_icache_page(struct vm_area_struct *vma, struct page *page)
+-{
+-	unsigned long addr;
+-
+-	if (PageHighMem(page))
+-		return;
+-
+-	addr = (unsigned long) page_address(page);
+-	flush_data_cache_page(addr);
+-}
+-EXPORT_SYMBOL_GPL(__flush_icache_page);
+-
+-void __update_cache(struct vm_area_struct *vma, unsigned long address,
+-	pte_t pte)
++void __update_cache(unsigned long address, pte_t pte)
+ {
+ 	struct page *page;
+ 	unsigned long pfn, addr;
+-	int exec = (vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc;
++	int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc;
+ 
+ 	pfn = pte_pfn(pte);
+ 	if (unlikely(!pfn_valid(pfn)))
+ 		return;
+ 	page = pfn_to_page(pfn);
+-	if (page_mapping(page) && Page_dcache_dirty(page)) {
+-		addr = (unsigned long) page_address(page);
++	if (Page_dcache_dirty(page)) {
++		if (PageHighMem(page))
++			addr = (unsigned long)kmap_atomic(page);
++		else
++			addr = (unsigned long)page_address(page);
++
+ 		if (exec || pages_do_alias(addr, address & PAGE_MASK))
+ 			flush_data_cache_page(addr);
++
++		if (PageHighMem(page))
++			__kunmap_atomic((void *)addr);
++
+ 		ClearPageDcacheDirty(page);
+ 	}
+ }
+diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
+index ee3617c0c5e2..f37e6ba40f52 100644
+--- a/arch/mips/vdso/Makefile
++++ b/arch/mips/vdso/Makefile
+@@ -5,10 +5,12 @@ obj-vdso-y := elf.o gettimeofday.o sigreturn.o
+ ccflags-vdso := \
+ 	$(filter -I%,$(KBUILD_CFLAGS)) \
+ 	$(filter -E%,$(KBUILD_CFLAGS)) \
++	$(filter -mmicromips,$(KBUILD_CFLAGS)) \
+ 	$(filter -march=%,$(KBUILD_CFLAGS))
+ cflags-vdso := $(ccflags-vdso) \
+ 	$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
+-	-O2 -g -fPIC -fno-common -fno-builtin -G 0 -DDISABLE_BRANCH_PROFILING \
++	-O2 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
++	-DDISABLE_BRANCH_PROFILING \
+ 	$(call cc-option, -fno-stack-protector)
+ aflags-vdso := $(ccflags-vdso) \
+ 	$(filter -I%,$(KBUILD_CFLAGS)) \
+diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
+index 6544017eb90b..67584847973a 100644
+--- a/arch/powerpc/kernel/eeh.c
++++ b/arch/powerpc/kernel/eeh.c
+@@ -1068,7 +1068,7 @@ void eeh_add_device_early(struct pci_dn *pdn)
+ 	struct pci_controller *phb;
+ 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ 
+-	if (!edev || !eeh_enabled())
++	if (!edev)
+ 		return;
+ 
+ 	if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
+diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
+index fb6207d2c604..31e4c7e1a4b4 100644
+--- a/arch/powerpc/kernel/eeh_driver.c
++++ b/arch/powerpc/kernel/eeh_driver.c
+@@ -171,6 +171,16 @@ static void *eeh_dev_save_state(void *data, void *userdata)
+ 	if (!edev)
+ 		return NULL;
+ 
++	/*
++	 * We cannot access the config space on some adapters.
++	 * Otherwise, it will cause fenced PHB. We don't save
++	 * the content in their config space and will restore
++	 * from the initial config space saved when the EEH
++	 * device is created.
++	 */
++	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
++		return NULL;
++
+ 	pdev = eeh_dev_to_pci_dev(edev);
+ 	if (!pdev)
+ 		return NULL;
+@@ -312,6 +322,19 @@ static void *eeh_dev_restore_state(void *data, void *userdata)
+ 	if (!edev)
+ 		return NULL;
+ 
++	/*
++	 * The content in the config space isn't saved because
++	 * the blocked config space on some adapters. We have
++	 * to restore the initial saved config space when the
++	 * EEH device is created.
++	 */
++	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
++		if (list_is_last(&edev->list, &edev->pe->edevs))
++			eeh_pe_restore_bars(edev->pe);
++
++		return NULL;
++	}
++
+ 	pdev = eeh_dev_to_pci_dev(edev);
+ 	if (!pdev)
+ 		return NULL;
+@@ -564,9 +587,6 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
+ 	/* Save states */
+ 	eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
+ 
+-	/* Report error */
+-	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+-
+ 	/* Issue reset */
+ 	ret = eeh_reset_pe(pe);
+ 	if (ret) {
+diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
+index 7716cebf4b8e..2b66f25d40db 100644
+--- a/arch/powerpc/kernel/exceptions-64s.S
++++ b/arch/powerpc/kernel/exceptions-64s.S
+@@ -953,11 +953,6 @@ hv_facility_unavailable_relon_trampoline:
+ #endif
+ 	STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
+ 
+-	/* Other future vectors */
+-	.align	7
+-	.globl	__end_interrupts
+-__end_interrupts:
+-
+ 	.align	7
+ system_call_entry:
+ 	b	system_call_common
+@@ -1244,6 +1239,17 @@ __end_handlers:
+ 	STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
+ 	STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
+ 
++	/*
++	 * The __end_interrupts marker must be past the out-of-line (OOL)
++	 * handlers, so that they are copied to real address 0x100 when running
++	 * a relocatable kernel. This ensures they can be reached from the short
++	 * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch
++	 * directly, without using LOAD_HANDLER().
++	 */
++	.align	7
++	.globl	__end_interrupts
++__end_interrupts:
++
+ #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+ /*
+  * Data area reserved for FWNMI option.
+diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
+index 7ab29518a3b9..e345891450c3 100644
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -393,6 +393,9 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
+ 	unsigned long i = 0;
+ 	unsigned long n = end_pfn - start_pfn;
+ 
++	if (remap_pfn == 0)
++		remap_pfn = nr_pages;
++
+ 	while (i < n) {
+ 		unsigned long cur_pfn = start_pfn + i;
+ 		unsigned long left = n - i;
+@@ -438,17 +441,29 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
+ 	return remap_pfn;
+ }
+ 
+-static void __init xen_set_identity_and_remap(unsigned long nr_pages)
++static unsigned long __init xen_count_remap_pages(
++	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
++	unsigned long remap_pages)
++{
++	if (start_pfn >= nr_pages)
++		return remap_pages;
++
++	return remap_pages + min(end_pfn, nr_pages) - start_pfn;
++}
++
++static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages,
++	unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn,
++			      unsigned long nr_pages, unsigned long last_val))
+ {
+ 	phys_addr_t start = 0;
+-	unsigned long last_pfn = nr_pages;
++	unsigned long ret_val = 0;
+ 	const struct e820entry *entry = xen_e820_map;
+ 	int i;
+ 
+ 	/*
+ 	 * Combine non-RAM regions and gaps until a RAM region (or the
+-	 * end of the map) is reached, then set the 1:1 map and
+-	 * remap the memory in those non-RAM regions.
++	 * end of the map) is reached, then call the provided function
++	 * to perform its duty on the non-RAM region.
+ 	 *
+ 	 * The combined non-RAM regions are rounded to a whole number
+ 	 * of pages so any partial pages are accessible via the 1:1
+@@ -466,14 +481,13 @@ static void __init xen_set_identity_and_remap(unsigned long nr_pages)
+ 				end_pfn = PFN_UP(entry->addr);
+ 
+ 			if (start_pfn < end_pfn)
+-				last_pfn = xen_set_identity_and_remap_chunk(
+-						start_pfn, end_pfn, nr_pages,
+-						last_pfn);
++				ret_val = func(start_pfn, end_pfn, nr_pages,
++					       ret_val);
+ 			start = end;
+ 		}
+ 	}
+ 
+-	pr_info("Released %ld page(s)\n", xen_released_pages);
++	return ret_val;
+ }
+ 
+ /*
+@@ -596,35 +610,6 @@ static void __init xen_ignore_unusable(void)
+ 	}
+ }
+ 
+-static unsigned long __init xen_count_remap_pages(unsigned long max_pfn)
+-{
+-	unsigned long extra = 0;
+-	unsigned long start_pfn, end_pfn;
+-	const struct e820entry *entry = xen_e820_map;
+-	int i;
+-
+-	end_pfn = 0;
+-	for (i = 0; i < xen_e820_map_entries; i++, entry++) {
+-		start_pfn = PFN_DOWN(entry->addr);
+-		/* Adjacent regions on non-page boundaries handling! */
+-		end_pfn = min(end_pfn, start_pfn);
+-
+-		if (start_pfn >= max_pfn)
+-			return extra + max_pfn - end_pfn;
+-
+-		/* Add any holes in map to result. */
+-		extra += start_pfn - end_pfn;
+-
+-		end_pfn = PFN_UP(entry->addr + entry->size);
+-		end_pfn = min(end_pfn, max_pfn);
+-
+-		if (entry->type != E820_RAM)
+-			extra += end_pfn - start_pfn;
+-	}
+-
+-	return extra;
+-}
+-
+ bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
+ {
+ 	struct e820entry *entry;
+@@ -804,7 +789,7 @@ char * __init xen_memory_setup(void)
+ 	max_pages = xen_get_max_pages();
+ 
+ 	/* How many extra pages do we need due to remapping? */
+-	max_pages += xen_count_remap_pages(max_pfn);
++	max_pages += xen_foreach_remap_area(max_pfn, xen_count_remap_pages);
+ 
+ 	if (max_pages > max_pfn)
+ 		extra_pages += max_pages - max_pfn;
+@@ -922,7 +907,9 @@ char * __init xen_memory_setup(void)
+ 	 * Set identity map on non-RAM pages and prepare remapping the
+ 	 * underlying RAM.
+ 	 */
+-	xen_set_identity_and_remap(max_pfn);
++	xen_foreach_remap_area(max_pfn, xen_set_identity_and_remap_chunk);
++
++	pr_info("Released %ld page(s)\n", xen_released_pages);
+ 
+ 	return "Xen";
+ }
+diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
+index 40de03f49ff8..bdd0d753ce5d 100644
+--- a/crypto/asymmetric_keys/pkcs7_parser.c
++++ b/crypto/asymmetric_keys/pkcs7_parser.c
+@@ -237,6 +237,7 @@ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen,
+ 		break;
+ 	case OID_sha224:
+ 		ctx->sinfo->sig.hash_algo = "sha224";
++		break;
+ 	default:
+ 		printk("Unsupported digest algo: %u\n", ctx->last_oid);
+ 		return -ENOPKG;
+diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
+index 6e7c3ccea24b..27aea96036c4 100644
+--- a/drivers/base/power/main.c
++++ b/drivers/base/power/main.c
+@@ -1267,14 +1267,15 @@ int dpm_suspend_late(pm_message_t state)
+ 		error = device_suspend_late(dev);
+ 
+ 		mutex_lock(&dpm_list_mtx);
++		if (!list_empty(&dev->power.entry))
++			list_move(&dev->power.entry, &dpm_late_early_list);
++
+ 		if (error) {
+ 			pm_dev_err(dev, state, " late", error);
+ 			dpm_save_failed_dev(dev_name(dev));
+ 			put_device(dev);
+ 			break;
+ 		}
+-		if (!list_empty(&dev->power.entry))
+-			list_move(&dev->power.entry, &dpm_late_early_list);
+ 		put_device(dev);
+ 
+ 		if (async_error)
+diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
+index 4c7055009bd6..b74690418504 100644
+--- a/drivers/base/power/runtime.c
++++ b/drivers/base/power/runtime.c
+@@ -1506,11 +1506,16 @@ int pm_runtime_force_resume(struct device *dev)
+ 		goto out;
+ 	}
+ 
+-	ret = callback(dev);
++	ret = pm_runtime_set_active(dev);
+ 	if (ret)
+ 		goto out;
+ 
+-	pm_runtime_set_active(dev);
++	ret = callback(dev);
++	if (ret) {
++		pm_runtime_set_suspended(dev);
++		goto out;
++	}
++
+ 	pm_runtime_mark_last_busy(dev);
+ out:
+ 	pm_runtime_enable(dev);
+diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c
+index ada081232528..b98a141ea89c 100644
+--- a/drivers/char/hw_random/exynos-rng.c
++++ b/drivers/char/hw_random/exynos-rng.c
+@@ -89,6 +89,7 @@ static int exynos_read(struct hwrng *rng, void *buf,
+ 						struct exynos_rng, rng);
+ 	u32 *data = buf;
+ 	int retry = 100;
++	int ret = 4;
+ 
+ 	pm_runtime_get_sync(exynos_rng->dev);
+ 
+@@ -97,17 +98,20 @@ static int exynos_read(struct hwrng *rng, void *buf,
+ 	while (!(exynos_rng_readl(exynos_rng,
+ 			EXYNOS_PRNG_STATUS_OFFSET) & PRNG_DONE) && --retry)
+ 		cpu_relax();
+-	if (!retry)
+-		return -ETIMEDOUT;
++	if (!retry) {
++		ret = -ETIMEDOUT;
++		goto out;
++	}
+ 
+ 	exynos_rng_writel(exynos_rng, PRNG_DONE, EXYNOS_PRNG_STATUS_OFFSET);
+ 
+ 	*data = exynos_rng_readl(exynos_rng, EXYNOS_PRNG_OUT1_OFFSET);
+ 
++out:
+ 	pm_runtime_mark_last_busy(exynos_rng->dev);
+ 	pm_runtime_put_sync_autosuspend(exynos_rng->dev);
+ 
+-	return 4;
++	return ret;
+ }
+ 
+ static int exynos_rng_probe(struct platform_device *pdev)
+diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c
+index 819f5842fa66..8e20c8a76db7 100644
+--- a/drivers/clk/at91/clk-h32mx.c
++++ b/drivers/clk/at91/clk-h32mx.c
+@@ -114,7 +114,7 @@ static void __init of_sama5d4_clk_h32mx_setup(struct device_node *np)
+ 	h32mxclk->regmap = regmap;
+ 
+ 	clk = clk_register(NULL, &h32mxclk->hw);
+-	if (!clk) {
++	if (IS_ERR(clk)) {
+ 		kfree(h32mxclk);
+ 		return;
+ 	}
+diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
+index 90338c38e38a..1f79f48d5adc 100644
+--- a/drivers/clk/bcm/clk-bcm2835.c
++++ b/drivers/clk/bcm/clk-bcm2835.c
+@@ -51,6 +51,7 @@
+ #define CM_GNRICCTL		0x000
+ #define CM_GNRICDIV		0x004
+ # define CM_DIV_FRAC_BITS	12
++# define CM_DIV_FRAC_MASK	GENMASK(CM_DIV_FRAC_BITS - 1, 0)
+ 
+ #define CM_VPUCTL		0x008
+ #define CM_VPUDIV		0x00c
+@@ -128,6 +129,7 @@
+ # define CM_GATE			BIT(CM_GATE_BIT)
+ # define CM_BUSY			BIT(7)
+ # define CM_BUSYD			BIT(8)
++# define CM_FRAC			BIT(9)
+ # define CM_SRC_SHIFT			0
+ # define CM_SRC_BITS			4
+ # define CM_SRC_MASK			0xf
+@@ -644,6 +646,7 @@ struct bcm2835_clock_data {
+ 	u32 frac_bits;
+ 
+ 	bool is_vpu_clock;
++	bool is_mash_clock;
+ };
+ 
+ static const char *const bcm2835_clock_per_parents[] = {
+@@ -825,6 +828,7 @@ static const struct bcm2835_clock_data bcm2835_clock_pwm_data = {
+ 	.div_reg = CM_PWMDIV,
+ 	.int_bits = 12,
+ 	.frac_bits = 12,
++	.is_mash_clock = true,
+ };
+ 
+ struct bcm2835_pll {
+@@ -910,8 +914,14 @@ static void bcm2835_pll_off(struct clk_hw *hw)
+ 	struct bcm2835_cprman *cprman = pll->cprman;
+ 	const struct bcm2835_pll_data *data = pll->data;
+ 
+-	cprman_write(cprman, data->cm_ctrl_reg, CM_PLL_ANARST);
+-	cprman_write(cprman, data->a2w_ctrl_reg, A2W_PLL_CTRL_PWRDN);
++	spin_lock(&cprman->regs_lock);
++	cprman_write(cprman, data->cm_ctrl_reg,
++		     cprman_read(cprman, data->cm_ctrl_reg) |
++		     CM_PLL_ANARST);
++	cprman_write(cprman, data->a2w_ctrl_reg,
++		     cprman_read(cprman, data->a2w_ctrl_reg) |
++		     A2W_PLL_CTRL_PWRDN);
++	spin_unlock(&cprman->regs_lock);
+ }
+ 
+ static int bcm2835_pll_on(struct clk_hw *hw)
+@@ -921,6 +931,10 @@ static int bcm2835_pll_on(struct clk_hw *hw)
+ 	const struct bcm2835_pll_data *data = pll->data;
+ 	ktime_t timeout;
+ 
++	cprman_write(cprman, data->a2w_ctrl_reg,
++		     cprman_read(cprman, data->a2w_ctrl_reg) &
++		     ~A2W_PLL_CTRL_PWRDN);
++
+ 	/* Take the PLL out of reset. */
+ 	cprman_write(cprman, data->cm_ctrl_reg,
+ 		     cprman_read(cprman, data->cm_ctrl_reg) & ~CM_PLL_ANARST);
+@@ -1174,7 +1188,7 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw,
+ 		GENMASK(CM_DIV_FRAC_BITS - data->frac_bits, 0) >> 1;
+ 	u64 temp = (u64)parent_rate << CM_DIV_FRAC_BITS;
+ 	u64 rem;
+-	u32 div;
++	u32 div, mindiv, maxdiv;
+ 
+ 	rem = do_div(temp, rate);
+ 	div = temp;
+@@ -1184,10 +1198,23 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw,
+ 		div += unused_frac_mask + 1;
+ 	div &= ~unused_frac_mask;
+ 
+-	/* Clamp to the limits. */
+-	div = max(div, unused_frac_mask + 1);
+-	div = min_t(u32, div, GENMASK(data->int_bits + CM_DIV_FRAC_BITS - 1,
+-				      CM_DIV_FRAC_BITS - data->frac_bits));
++	/* different clamping limits apply for a mash clock */
++	if (data->is_mash_clock) {
++		/* clamp to min divider of 2 */
++		mindiv = 2 << CM_DIV_FRAC_BITS;
++		/* clamp to the highest possible integer divider */
++		maxdiv = (BIT(data->int_bits) - 1) << CM_DIV_FRAC_BITS;
++	} else {
++		/* clamp to min divider of 1 */
++		mindiv = 1 << CM_DIV_FRAC_BITS;
++		/* clamp to the highest possible fractional divider */
++		maxdiv = GENMASK(data->int_bits + CM_DIV_FRAC_BITS - 1,
++				 CM_DIV_FRAC_BITS - data->frac_bits);
++	}
++
++	/* apply the clamping  limits */
++	div = max_t(u32, div, mindiv);
++	div = min_t(u32, div, maxdiv);
+ 
+ 	return div;
+ }
+@@ -1281,9 +1308,26 @@ static int bcm2835_clock_set_rate(struct clk_hw *hw,
+ 	struct bcm2835_cprman *cprman = clock->cprman;
+ 	const struct bcm2835_clock_data *data = clock->data;
+ 	u32 div = bcm2835_clock_choose_div(hw, rate, parent_rate, false);
++	u32 ctl;
++
++	spin_lock(&cprman->regs_lock);
++
++	/*
++	 * Setting up frac support
++	 *
++	 * In principle it is recommended to stop/start the clock first,
++	 * but as we set CLK_SET_RATE_GATE during registration of the
++	 * clock this requirement should be take care of by the
++	 * clk-framework.
++	 */
++	ctl = cprman_read(cprman, data->ctl_reg) & ~CM_FRAC;
++	ctl |= (div & CM_DIV_FRAC_MASK) ? CM_FRAC : 0;
++	cprman_write(cprman, data->ctl_reg, ctl);
+ 
+ 	cprman_write(cprman, data->div_reg, div);
+ 
++	spin_unlock(&cprman->regs_lock);
++
+ 	return 0;
+ }
+ 
+diff --git a/drivers/clk/imx/clk-imx35.c b/drivers/clk/imx/clk-imx35.c
+index a71d24cb4c06..b0978d3b83e2 100644
+--- a/drivers/clk/imx/clk-imx35.c
++++ b/drivers/clk/imx/clk-imx35.c
+@@ -66,7 +66,7 @@ static const char *std_sel[] = {"ppll", "arm"};
+ static const char *ipg_per_sel[] = {"ahb_per_div", "arm_per_div"};
+ 
+ enum mx35_clks {
+-	ckih, ckil, mpll, ppll, mpll_075, arm, hsp, hsp_div, hsp_sel, ahb, ipg,
++	ckih, mpll, ppll, mpll_075, arm, hsp, hsp_div, hsp_sel, ahb, ipg,
+ 	arm_per_div, ahb_per_div, ipg_per, uart_sel, uart_div, esdhc_sel,
+ 	esdhc1_div, esdhc2_div, esdhc3_div, spdif_sel, spdif_div_pre,
+ 	spdif_div_post, ssi_sel, ssi1_div_pre, ssi1_div_post, ssi2_div_pre,
+@@ -79,7 +79,7 @@ enum mx35_clks {
+ 	rtc_gate, rtic_gate, scc_gate, sdma_gate, spba_gate, spdif_gate,
+ 	ssi1_gate, ssi2_gate, uart1_gate, uart2_gate, uart3_gate, usbotg_gate,
+ 	wdog_gate, max_gate, admux_gate, csi_gate, csi_div, csi_sel, iim_gate,
+-	gpu2d_gate, clk_max
++	gpu2d_gate, ckil, clk_max
+ };
+ 
+ static struct clk *clk[clk_max];
+diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
+index f996efc56605..0db185762a10 100644
+--- a/drivers/cpuidle/cpuidle.c
++++ b/drivers/cpuidle/cpuidle.c
+@@ -214,7 +214,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
+ 		tick_broadcast_exit();
+ 	}
+ 
+-	if (!cpuidle_state_is_coupled(drv, entered_state))
++	if (!cpuidle_state_is_coupled(drv, index))
+ 		local_irq_enable();
+ 
+ 	diff = ktime_to_us(ktime_sub(time_end, time_start));
+@@ -433,6 +433,8 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
+ 	list_del(&dev->device_list);
+ 	per_cpu(cpuidle_devices, dev->cpu) = NULL;
+ 	module_put(drv->owner);
++
++	dev->registered = 0;
+ }
+ 
+ static void __cpuidle_device_init(struct cpuidle_device *dev)
+diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
+index 6eb94fc561dc..22228ef50f36 100644
+--- a/drivers/gpu/drm/Makefile
++++ b/drivers/gpu/drm/Makefile
+@@ -23,7 +23,7 @@ drm-$(CONFIG_AGP) += drm_agpsupport.o
+ 
+ drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
+ 		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
+-		drm_kms_helper_common.o
++		drm_kms_helper_common.o drm_dp_dual_mode_helper.o
+ 
+ drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
+ drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+index 119cdc2c43e7..7ef2c13921b4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+@@ -194,12 +194,12 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
+ 				bpc = 8;
+ 				DRM_DEBUG("%s: HDMI deep color 10 bpc exceeds max tmds clock. Using %d bpc.\n",
+ 					  connector->name, bpc);
+-			} else if (bpc > 8) {
+-				/* max_tmds_clock missing, but hdmi spec mandates it for deep color. */
+-				DRM_DEBUG("%s: Required max tmds clock for HDMI deep color missing. Using 8 bpc.\n",
+-					  connector->name);
+-				bpc = 8;
+ 			}
++		} else if (bpc > 8) {
++			/* max_tmds_clock missing, but hdmi spec mandates it for deep color. */
++			DRM_DEBUG("%s: Required max tmds clock for HDMI deep color missing. Using 8 bpc.\n",
++				  connector->name);
++			bpc = 8;
+ 		}
+ 	}
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+index 7b7f4aba60c0..fe36caf1b7d7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+@@ -150,7 +150,7 @@ u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev)
+ 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ 			amdgpu_crtc = to_amdgpu_crtc(crtc);
+ 			if (crtc->enabled && amdgpu_crtc->enabled && amdgpu_crtc->hw_mode.clock) {
+-				vrefresh = amdgpu_crtc->hw_mode.vrefresh;
++				vrefresh = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
+ 				break;
+ 			}
+ 		}
+diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
+index 8ee1db866e80..d307d9627887 100644
+--- a/drivers/gpu/drm/drm_atomic.c
++++ b/drivers/gpu/drm/drm_atomic.c
+@@ -139,7 +139,7 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
+ 	for (i = 0; i < state->num_connector; i++) {
+ 		struct drm_connector *connector = state->connectors[i];
+ 
+-		if (!connector)
++		if (!connector || !connector->funcs)
+ 			continue;
+ 
+ 		/*
+diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
+new file mode 100644
+index 000000000000..a7b2a751f6fe
+--- /dev/null
++++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
+@@ -0,0 +1,366 @@
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include <linux/errno.h>
++#include <linux/export.h>
++#include <linux/i2c.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <drm/drm_dp_dual_mode_helper.h>
++#include <drm/drmP.h>
++
++/**
++ * DOC: dp dual mode helpers
++ *
++ * Helper functions to deal with DP dual mode (aka. DP++) adaptors.
++ *
++ * Type 1:
++ * Adaptor registers (if any) and the sink DDC bus may be accessed via I2C.
++ *
++ * Type 2:
++ * Adaptor registers and sink DDC bus can be accessed either via I2C or
++ * I2C-over-AUX. Source devices may choose to implement either of these
++ * access methods.
++ */
++
++#define DP_DUAL_MODE_SLAVE_ADDRESS 0x40
++
++/**
++ * drm_dp_dual_mode_read - Read from the DP dual mode adaptor register(s)
++ * @adapter: I2C adapter for the DDC bus
++ * @offset: register offset
++ * @buffer: buffer for return data
++ * @size: sizo of the buffer
++ *
++ * Reads @size bytes from the DP dual mode adaptor registers
++ * starting at @offset.
++ *
++ * Returns:
++ * 0 on success, negative error code on failure
++ */
++ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter,
++			      u8 offset, void *buffer, size_t size)
++{
++	struct i2c_msg msgs[] = {
++		{
++			.addr = DP_DUAL_MODE_SLAVE_ADDRESS,
++			.flags = 0,
++			.len = 1,
++			.buf = &offset,
++		},
++		{
++			.addr = DP_DUAL_MODE_SLAVE_ADDRESS,
++			.flags = I2C_M_RD,
++			.len = size,
++			.buf = buffer,
++		},
++	};
++	int ret;
++
++	ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs));
++	if (ret < 0)
++		return ret;
++	if (ret != ARRAY_SIZE(msgs))
++		return -EPROTO;
++
++	return 0;
++}
++EXPORT_SYMBOL(drm_dp_dual_mode_read);
++
++/**
++ * drm_dp_dual_mode_write - Write to the DP dual mode adaptor register(s)
++ * @adapter: I2C adapter for the DDC bus
++ * @offset: register offset
++ * @buffer: buffer for write data
++ * @size: sizo of the buffer
++ *
++ * Writes @size bytes to the DP dual mode adaptor registers
++ * starting at @offset.
++ *
++ * Returns:
++ * 0 on success, negative error code on failure
++ */
++ssize_t drm_dp_dual_mode_write(struct i2c_adapter *adapter,
++			       u8 offset, const void *buffer, size_t size)
++{
++	struct i2c_msg msg = {
++		.addr = DP_DUAL_MODE_SLAVE_ADDRESS,
++		.flags = 0,
++		.len = 1 + size,
++		.buf = NULL,
++	};
++	void *data;
++	int ret;
++
++	data = kmalloc(msg.len, GFP_TEMPORARY);
++	if (!data)
++		return -ENOMEM;
++
++	msg.buf = data;
++
++	memcpy(data, &offset, 1);
++	memcpy(data + 1, buffer, size);
++
++	ret = i2c_transfer(adapter, &msg, 1);
++
++	kfree(data);
++
++	if (ret < 0)
++		return ret;
++	if (ret != 1)
++		return -EPROTO;
++
++	return 0;
++}
++EXPORT_SYMBOL(drm_dp_dual_mode_write);
++
++static bool is_hdmi_adaptor(const char hdmi_id[DP_DUAL_MODE_HDMI_ID_LEN])
++{
++	static const char dp_dual_mode_hdmi_id[DP_DUAL_MODE_HDMI_ID_LEN] =
++		"DP-HDMI ADAPTOR\x04";
++
++	return memcmp(hdmi_id, dp_dual_mode_hdmi_id,
++		      sizeof(dp_dual_mode_hdmi_id)) == 0;
++}
++
++static bool is_type2_adaptor(uint8_t adaptor_id)
++{
++	return adaptor_id == (DP_DUAL_MODE_TYPE_TYPE2 |
++			      DP_DUAL_MODE_REV_TYPE2);
++}
++
++/**
++ * drm_dp_dual_mode_detect - Identify the DP dual mode adaptor
++ * @adapter: I2C adapter for the DDC bus
++ *
++ * Attempt to identify the type of the DP dual mode adaptor used.
++ *
++ * Note that when the answer is @DRM_DP_DUAL_MODE_UNKNOWN it's not
++ * certain whether we're dealing with a native HDMI port or
++ * a type 1 DVI dual mode adaptor. The driver will have to use
++ * some other hardware/driver specific mechanism to make that
++ * distinction.
++ *
++ * Returns:
++ * The type of the DP dual mode adaptor used
++ */
++enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(struct i2c_adapter *adapter)
++{
++	char hdmi_id[DP_DUAL_MODE_HDMI_ID_LEN] = {};
++	uint8_t adaptor_id = 0x00;
++	ssize_t ret;
++
++	/*
++	 * Let's see if the adaptor is there the by reading the
++	 * HDMI ID registers.
++	 *
++	 * Note that type 1 DVI adaptors are not required to implemnt
++	 * any registers, and that presents a problem for detection.
++	 * If the i2c transfer is nacked, we may or may not be dealing
++	 * with a type 1 DVI adaptor. Some other mechanism of detecting
++	 * the presence of the adaptor is required. One way would be
++	 * to check the state of the CONFIG1 pin, Another method would
++	 * simply require the driver to know whether the port is a DP++
++	 * port or a native HDMI port. Both of these methods are entirely
++	 * hardware/driver specific so we can't deal with them here.
++	 */
++	ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_HDMI_ID,
++				    hdmi_id, sizeof(hdmi_id));
++	if (ret)
++		return DRM_DP_DUAL_MODE_UNKNOWN;
++
++	/*
++	 * Sigh. Some (maybe all?) type 1 adaptors are broken and ack
++	 * the offset but ignore it, and instead they just always return
++	 * data from the start of the HDMI ID buffer. So for a broken
++	 * type 1 HDMI adaptor a single byte read will always give us
++	 * 0x44, and for a type 1 DVI adaptor it should give 0x00
++	 * (assuming it implements any registers). Fortunately neither
++	 * of those values will match the type 2 signature of the
++	 * DP_DUAL_MODE_ADAPTOR_ID register so we can proceed with
++	 * the type 2 adaptor detection safely even in the presence
++	 * of broken type 1 adaptors.
++	 */
++	ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID,
++				    &adaptor_id, sizeof(adaptor_id));
++	if (ret == 0) {
++		if (is_type2_adaptor(adaptor_id)) {
++			if (is_hdmi_adaptor(hdmi_id))
++				return DRM_DP_DUAL_MODE_TYPE2_HDMI;
++			else
++				return DRM_DP_DUAL_MODE_TYPE2_DVI;
++		}
++	}
++
++	if (is_hdmi_adaptor(hdmi_id))
++		return DRM_DP_DUAL_MODE_TYPE1_HDMI;
++	else
++		return DRM_DP_DUAL_MODE_TYPE1_DVI;
++}
++EXPORT_SYMBOL(drm_dp_dual_mode_detect);
++
++/**
++ * drm_dp_dual_mode_max_tmds_clock - Max TMDS clock for DP dual mode adaptor
++ * @type: DP dual mode adaptor type
++ * @adapter: I2C adapter for the DDC bus
++ *
++ * Determine the max TMDS clock the adaptor supports based on the
++ * type of the dual mode adaptor and the DP_DUAL_MODE_MAX_TMDS_CLOCK
++ * register (on type2 adaptors). As some type 1 adaptors have
++ * problems with registers (see comments in drm_dp_dual_mode_detect())
++ * we don't read the register on those, instead we simply assume
++ * a 165 MHz limit based on the specification.
++ *
++ * Returns:
++ * Maximum supported TMDS clock rate for the DP dual mode adaptor in kHz.
++ */
++int drm_dp_dual_mode_max_tmds_clock(enum drm_dp_dual_mode_type type,
++				    struct i2c_adapter *adapter)
++{
++	uint8_t max_tmds_clock;
++	ssize_t ret;
++
++	/* native HDMI so no limit */
++	if (type == DRM_DP_DUAL_MODE_NONE)
++		return 0;
++
++	/*
++	 * Type 1 adaptors are limited to 165MHz
++	 * Type 2 adaptors can tells us their limit
++	 */
++	if (type < DRM_DP_DUAL_MODE_TYPE2_DVI)
++		return 165000;
++
++	ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_MAX_TMDS_CLOCK,
++				    &max_tmds_clock, sizeof(max_tmds_clock));
++	if (ret || max_tmds_clock == 0x00 || max_tmds_clock == 0xff) {
++		DRM_DEBUG_KMS("Failed to query max TMDS clock\n");
++		return 165000;
++	}
++
++	return max_tmds_clock * 5000 / 2;
++}
++EXPORT_SYMBOL(drm_dp_dual_mode_max_tmds_clock);
++
++/**
++ * drm_dp_dual_mode_get_tmds_output - Get the state of the TMDS output buffers in the DP dual mode adaptor
++ * @type: DP dual mode adaptor type
++ * @adapter: I2C adapter for the DDC bus
++ * @enabled: current state of the TMDS output buffers
++ *
++ * Get the state of the TMDS output buffers in the adaptor. For
++ * type2 adaptors this is queried from the DP_DUAL_MODE_TMDS_OEN
++ * register. As some type 1 adaptors have problems with registers
++ * (see comments in drm_dp_dual_mode_detect()) we don't read the
++ * register on those, instead we simply assume that the buffers
++ * are always enabled.
++ *
++ * Returns:
++ * 0 on success, negative error code on failure
++ */
++int drm_dp_dual_mode_get_tmds_output(enum drm_dp_dual_mode_type type,
++				     struct i2c_adapter *adapter,
++				     bool *enabled)
++{
++	uint8_t tmds_oen;
++	ssize_t ret;
++
++	if (type < DRM_DP_DUAL_MODE_TYPE2_DVI) {
++		*enabled = true;
++		return 0;
++	}
++
++	ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_TMDS_OEN,
++				    &tmds_oen, sizeof(tmds_oen));
++	if (ret) {
++		DRM_DEBUG_KMS("Failed to query state of TMDS output buffers\n");
++		return ret;
++	}
++
++	*enabled = !(tmds_oen & DP_DUAL_MODE_TMDS_DISABLE);
++
++	return 0;
++}
++EXPORT_SYMBOL(drm_dp_dual_mode_get_tmds_output);
++
++/**
++ * drm_dp_dual_mode_set_tmds_output - Enable/disable TMDS output buffers in the DP dual mode adaptor
++ * @type: DP dual mode adaptor type
++ * @adapter: I2C adapter for the DDC bus
++ * @enable: enable (as opposed to disable) the TMDS output buffers
++ *
++ * Set the state of the TMDS output buffers in the adaptor. For
++ * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. As
++ * some type 1 adaptors have problems with registers (see comments
++ * in drm_dp_dual_mode_detect()) we avoid touching the register,
++ * making this function a no-op on type 1 adaptors.
++ *
++ * Returns:
++ * 0 on success, negative error code on failure
++ */
++int drm_dp_dual_mode_set_tmds_output(enum drm_dp_dual_mode_type type,
++				     struct i2c_adapter *adapter, bool enable)
++{
++	uint8_t tmds_oen = enable ? 0 : DP_DUAL_MODE_TMDS_DISABLE;
++	ssize_t ret;
++
++	if (type < DRM_DP_DUAL_MODE_TYPE2_DVI)
++		return 0;
++
++	ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN,
++				     &tmds_oen, sizeof(tmds_oen));
++	if (ret) {
++		DRM_DEBUG_KMS("Failed to %s TMDS output buffers\n",
++			      enable ? "enable" : "disable");
++		return ret;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL(drm_dp_dual_mode_set_tmds_output);
++
++/**
++ * drm_dp_get_dual_mode_type_name - Get the name of the DP dual mode adaptor type as a string
++ * @type: DP dual mode adaptor type
++ *
++ * Returns:
++ * String representation of the DP dual mode adaptor type
++ */
++const char *drm_dp_get_dual_mode_type_name(enum drm_dp_dual_mode_type type)
++{
++	switch (type) {
++	case DRM_DP_DUAL_MODE_NONE:
++		return "none";
++	case DRM_DP_DUAL_MODE_TYPE1_DVI:
++		return "type 1 DVI";
++	case DRM_DP_DUAL_MODE_TYPE1_HDMI:
++		return "type 1 HDMI";
++	case DRM_DP_DUAL_MODE_TYPE2_DVI:
++		return "type 2 DVI";
++	case DRM_DP_DUAL_MODE_TYPE2_HDMI:
++		return "type 2 HDMI";
++	default:
++		WARN_ON(type != DRM_DP_DUAL_MODE_UNKNOWN);
++		return "unknown";
++	}
++}
++EXPORT_SYMBOL(drm_dp_get_dual_mode_type_name);
+diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
+index 855108e6e1bd..fe4df976f0b8 100644
+--- a/drivers/gpu/drm/drm_fb_helper.c
++++ b/drivers/gpu/drm/drm_fb_helper.c
+@@ -1895,7 +1895,6 @@ static int drm_pick_crtcs(struct drm_fb_helper *fb_helper,
+ 			  int n, int width, int height)
+ {
+ 	int c, o;
+-	struct drm_device *dev = fb_helper->dev;
+ 	struct drm_connector *connector;
+ 	const struct drm_connector_helper_funcs *connector_funcs;
+ 	struct drm_encoder *encoder;
+@@ -1914,7 +1913,7 @@ static int drm_pick_crtcs(struct drm_fb_helper *fb_helper,
+ 	if (modes[n] == NULL)
+ 		return best_score;
+ 
+-	crtcs = kzalloc(dev->mode_config.num_connector *
++	crtcs = kzalloc(fb_helper->connector_count *
+ 			sizeof(struct drm_fb_helper_crtc *), GFP_KERNEL);
+ 	if (!crtcs)
+ 		return best_score;
+@@ -1960,7 +1959,7 @@ static int drm_pick_crtcs(struct drm_fb_helper *fb_helper,
+ 		if (score > best_score) {
+ 			best_score = score;
+ 			memcpy(best_crtcs, crtcs,
+-			       dev->mode_config.num_connector *
++			       fb_helper->connector_count *
+ 			       sizeof(struct drm_fb_helper_crtc *));
+ 		}
+ 	}
+diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c b/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c
+index 6b43ae3ffd73..1616af209bfc 100644
+--- a/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c
++++ b/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c
+@@ -72,7 +72,7 @@ static const char *const dsi_errors[] = {
+ 	"RX Prot Violation",
+ 	"HS Generic Write FIFO Full",
+ 	"LP Generic Write FIFO Full",
+-	"Generic Read Data Avail"
++	"Generic Read Data Avail",
+ 	"Special Packet Sent",
+ 	"Tearing Effect",
+ };
+diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
+index 8e579a8505ac..e7c1686e479c 100644
+--- a/drivers/gpu/drm/i915/intel_atomic.c
++++ b/drivers/gpu/drm/i915/intel_atomic.c
+@@ -96,7 +96,8 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc)
+ 	crtc_state->update_pipe = false;
+ 	crtc_state->disable_lp_wm = false;
+ 	crtc_state->disable_cxsr = false;
+-	crtc_state->wm_changed = false;
++	crtc_state->update_wm_pre = false;
++	crtc_state->update_wm_post = false;
+ 	crtc_state->fb_changed = false;
+ 
+ 	return &crtc_state->base;
+diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
+index e0b851a0004a..7de7721f65bc 100644
+--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
++++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
+@@ -195,12 +195,10 @@ static void intel_plane_atomic_update(struct drm_plane *plane,
+ 	struct intel_plane_state *intel_state =
+ 		to_intel_plane_state(plane->state);
+ 	struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc;
+-	struct drm_crtc_state *crtc_state =
+-		drm_atomic_get_existing_crtc_state(old_state->state, crtc);
+ 
+ 	if (intel_state->visible)
+ 		intel_plane->update_plane(plane,
+-					  to_intel_crtc_state(crtc_state),
++					  to_intel_crtc_state(crtc->state),
+ 					  intel_state);
+ 	else
+ 		intel_plane->disable_plane(plane, crtc);
+diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
+index 96ffcc541e17..50f5b0c975e3 100644
+--- a/drivers/gpu/drm/i915/intel_ddi.c
++++ b/drivers/gpu/drm/i915/intel_ddi.c
+@@ -2309,6 +2309,12 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
+ 	enum port port = intel_ddi_get_encoder_port(intel_encoder);
+ 	int type = intel_encoder->type;
+ 
++	if (type == INTEL_OUTPUT_HDMI) {
++		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
++
++		intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
++	}
++
+ 	intel_prepare_ddi_buffer(intel_encoder);
+ 
+ 	if (type == INTEL_OUTPUT_EDP) {
+@@ -2375,6 +2381,12 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder)
+ 					DPLL_CTRL2_DDI_CLK_OFF(port)));
+ 	else if (INTEL_INFO(dev)->gen < 9)
+ 		I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE);
++
++	if (type == INTEL_OUTPUT_HDMI) {
++		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
++
++		intel_dp_dual_mode_set_tmds_output(intel_hdmi, false);
++	}
+ }
+ 
+ static void intel_enable_ddi(struct intel_encoder *intel_encoder)
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 0104a06d01fd..7741efbd5e57 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -4796,7 +4796,7 @@ static void intel_post_plane_update(struct intel_crtc *crtc)
+ 
+ 	crtc->wm.cxsr_allowed = true;
+ 
+-	if (pipe_config->wm_changed && pipe_config->base.active)
++	if (pipe_config->update_wm_post && pipe_config->base.active)
+ 		intel_update_watermarks(&crtc->base);
+ 
+ 	if (atomic->update_fbc)
+@@ -4843,7 +4843,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state)
+ 			intel_set_memory_cxsr(dev_priv, false);
+ 	}
+ 
+-	if (!needs_modeset(&pipe_config->base) && pipe_config->wm_changed)
++	if (!needs_modeset(&pipe_config->base) && pipe_config->update_wm_pre)
+ 		intel_update_watermarks(&crtc->base);
+ }
+ 
+@@ -6210,6 +6210,7 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
+ 
+ 	intel_crtc_load_lut(crtc);
+ 
++	intel_update_watermarks(crtc);
+ 	intel_enable_pipe(intel_crtc);
+ 
+ 	assert_vblank_disabled(crtc);
+@@ -11833,14 +11834,22 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
+ 			 plane->base.id, was_visible, visible,
+ 			 turn_off, turn_on, mode_changed);
+ 
+-	if (turn_on || turn_off) {
+-		pipe_config->wm_changed = true;
++	if (turn_on) {
++		pipe_config->update_wm_pre = true;
++
++		/* must disable cxsr around plane enable/disable */
++		if (plane->type != DRM_PLANE_TYPE_CURSOR)
++			pipe_config->disable_cxsr = true;
++	} else if (turn_off) {
++		pipe_config->update_wm_post = true;
+ 
+ 		/* must disable cxsr around plane enable/disable */
+ 		if (plane->type != DRM_PLANE_TYPE_CURSOR)
+ 			pipe_config->disable_cxsr = true;
+ 	} else if (intel_wm_need_update(plane, plane_state)) {
+-		pipe_config->wm_changed = true;
++		/* FIXME bollocks */
++		pipe_config->update_wm_pre = true;
++		pipe_config->update_wm_post = true;
+ 	}
+ 
+ 	if (visible || was_visible)
+@@ -11940,7 +11949,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
+ 	}
+ 
+ 	if (mode_changed && !crtc_state->active)
+-		pipe_config->wm_changed = true;
++		pipe_config->update_wm_post = true;
+ 
+ 	if (mode_changed && crtc_state->enable &&
+ 	    dev_priv->display.crtc_compute_clock &&
+@@ -13453,12 +13462,12 @@ static bool needs_vblank_wait(struct intel_crtc_state *crtc_state)
+ 		return true;
+ 
+ 	/* wm changes, need vblank before final wm's */
+-	if (crtc_state->wm_changed)
++	if (crtc_state->update_wm_post)
+ 		return true;
+ 
+ 	/*
+ 	 * cxsr is re-enabled after vblank.
+-	 * This is already handled by crtc_state->wm_changed,
++	 * This is already handled by crtc_state->update_wm_post,
+ 	 * but added for clarity.
+ 	 */
+ 	if (crtc_state->disable_cxsr)
+@@ -15958,6 +15967,18 @@ void intel_display_resume(struct drm_device *dev)
+ retry:
+ 	ret = drm_modeset_lock_all_ctx(dev, &ctx);
+ 
++	/*
++	 * With MST, the number of connectors can change between suspend and
++	 * resume, which means that the state we want to restore might now be
++	 * impossible to use since it'll be pointing to non-existant
++	 * connectors.
++	 */
++	if (ret == 0 && state &&
++	    state->num_connector != dev->mode_config.num_connector) {
++		drm_atomic_state_free(state);
++		state = NULL;
++	}
++
+ 	if (ret == 0 && !setup) {
+ 		setup = true;
+ 
+diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
+index 9d0770c23fde..3a30b37d6885 100644
+--- a/drivers/gpu/drm/i915/intel_drv.h
++++ b/drivers/gpu/drm/i915/intel_drv.h
+@@ -33,6 +33,7 @@
+ #include <drm/drm_crtc.h>
+ #include <drm/drm_crtc_helper.h>
+ #include <drm/drm_fb_helper.h>
++#include <drm/drm_dp_dual_mode_helper.h>
+ #include <drm/drm_dp_mst_helper.h>
+ #include <drm/drm_rect.h>
+ #include <drm/drm_atomic.h>
+@@ -378,7 +379,7 @@ struct intel_crtc_state {
+ 
+ 	bool update_pipe; /* can a fast modeset be performed? */
+ 	bool disable_cxsr;
+-	bool wm_changed; /* watermarks are updated */
++	bool update_wm_pre, update_wm_post; /* watermarks are updated */
+ 	bool fb_changed; /* fb on any of the planes is changed */
+ 
+ 	/* Pipe source size (ie. panel fitter input size)
+@@ -703,6 +704,10 @@ struct cxsr_latency {
+ struct intel_hdmi {
+ 	i915_reg_t hdmi_reg;
+ 	int ddc_bus;
++	struct {
++		enum drm_dp_dual_mode_type type;
++		int max_tmds_clock;
++	} dp_dual_mode;
+ 	bool limited_color_range;
+ 	bool color_range_auto;
+ 	bool has_hdmi_sink;
+@@ -1351,6 +1356,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
+ struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
+ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
+ 			       struct intel_crtc_state *pipe_config);
++void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable);
+ 
+ 
+ /* intel_lvds.c */
+diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
+index 97a91e631915..c607217c13ea 100644
+--- a/drivers/gpu/drm/i915/intel_fbdev.c
++++ b/drivers/gpu/drm/i915/intel_fbdev.c
+@@ -366,12 +366,12 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
+ 	uint64_t conn_configured = 0, mask;
+ 	int pass = 0;
+ 
+-	save_enabled = kcalloc(dev->mode_config.num_connector, sizeof(bool),
++	save_enabled = kcalloc(fb_helper->connector_count, sizeof(bool),
+ 			       GFP_KERNEL);
+ 	if (!save_enabled)
+ 		return false;
+ 
+-	memcpy(save_enabled, enabled, dev->mode_config.num_connector);
++	memcpy(save_enabled, enabled, fb_helper->connector_count);
+ 	mask = (1 << fb_helper->connector_count) - 1;
+ retry:
+ 	for (i = 0; i < fb_helper->connector_count; i++) {
+@@ -510,7 +510,7 @@ retry:
+ 	if (fallback) {
+ bail:
+ 		DRM_DEBUG_KMS("Not using firmware configuration\n");
+-		memcpy(enabled, save_enabled, dev->mode_config.num_connector);
++		memcpy(enabled, save_enabled, fb_helper->connector_count);
+ 		kfree(save_enabled);
+ 		return false;
+ 	}
+diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
+index 1ab6f687f640..3ddb4fac53fa 100644
+--- a/drivers/gpu/drm/i915/intel_hdmi.c
++++ b/drivers/gpu/drm/i915/intel_hdmi.c
+@@ -836,6 +836,22 @@ static void hsw_set_infoframes(struct drm_encoder *encoder,
+ 	intel_hdmi_set_hdmi_infoframe(encoder, adjusted_mode);
+ }
+ 
++void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable)
++{
++	struct drm_i915_private *dev_priv = to_i915(intel_hdmi_to_dev(hdmi));
++	struct i2c_adapter *adapter =
++		intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
++
++	if (hdmi->dp_dual_mode.type < DRM_DP_DUAL_MODE_TYPE2_DVI)
++		return;
++
++	DRM_DEBUG_KMS("%s DP dual mode adaptor TMDS output\n",
++		      enable ? "Enabling" : "Disabling");
++
++	drm_dp_dual_mode_set_tmds_output(hdmi->dp_dual_mode.type,
++					 adapter, enable);
++}
++
+ static void intel_hdmi_prepare(struct intel_encoder *encoder)
+ {
+ 	struct drm_device *dev = encoder->base.dev;
+@@ -845,6 +861,8 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder)
+ 	const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode;
+ 	u32 hdmi_val;
+ 
++	intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
++
+ 	hdmi_val = SDVO_ENCODING_HDMI;
+ 	if (!HAS_PCH_SPLIT(dev) && crtc->config->limited_color_range)
+ 		hdmi_val |= HDMI_COLOR_RANGE_16_235;
+@@ -1143,6 +1161,8 @@ static void intel_disable_hdmi(struct intel_encoder *encoder)
+ 	}
+ 
+ 	intel_hdmi->set_infoframes(&encoder->base, false, NULL);
++
++	intel_dp_dual_mode_set_tmds_output(intel_hdmi, false);
+ }
+ 
+ static void g4x_disable_hdmi(struct intel_encoder *encoder)
+@@ -1168,27 +1188,42 @@ static void pch_post_disable_hdmi(struct intel_encoder *encoder)
+ 	intel_disable_hdmi(encoder);
+ }
+ 
+-static int hdmi_port_clock_limit(struct intel_hdmi *hdmi, bool respect_dvi_limit)
++static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv)
+ {
+-	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
+-
+-	if ((respect_dvi_limit && !hdmi->has_hdmi_sink) || IS_G4X(dev))
++	if (IS_G4X(dev_priv))
+ 		return 165000;
+-	else if (IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8)
++	else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
+ 		return 300000;
+ 	else
+ 		return 225000;
+ }
+ 
++static int hdmi_port_clock_limit(struct intel_hdmi *hdmi,
++				 bool respect_downstream_limits)
++{
++	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
++	int max_tmds_clock = intel_hdmi_source_max_tmds_clock(to_i915(dev));
++
++	if (respect_downstream_limits) {
++		if (hdmi->dp_dual_mode.max_tmds_clock)
++			max_tmds_clock = min(max_tmds_clock,
++					     hdmi->dp_dual_mode.max_tmds_clock);
++		if (!hdmi->has_hdmi_sink)
++			max_tmds_clock = min(max_tmds_clock, 165000);
++	}
++
++	return max_tmds_clock;
++}
++
+ static enum drm_mode_status
+ hdmi_port_clock_valid(struct intel_hdmi *hdmi,
+-		      int clock, bool respect_dvi_limit)
++		      int clock, bool respect_downstream_limits)
+ {
+ 	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
+ 
+ 	if (clock < 25000)
+ 		return MODE_CLOCK_LOW;
+-	if (clock > hdmi_port_clock_limit(hdmi, respect_dvi_limit))
++	if (clock > hdmi_port_clock_limit(hdmi, respect_downstream_limits))
+ 		return MODE_CLOCK_HIGH;
+ 
+ 	/* BXT DPLL can't generate 223-240 MHz */
+@@ -1312,7 +1347,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
+ 	 * within limits.
+ 	 */
+ 	if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink &&
+-	    hdmi_port_clock_valid(intel_hdmi, clock_12bpc, false) == MODE_OK &&
++	    hdmi_port_clock_valid(intel_hdmi, clock_12bpc, true) == MODE_OK &&
+ 	    hdmi_12bpc_possible(pipe_config)) {
+ 		DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n");
+ 		desired_bpp = 12*3;
+@@ -1352,10 +1387,35 @@ intel_hdmi_unset_edid(struct drm_connector *connector)
+ 	intel_hdmi->has_audio = false;
+ 	intel_hdmi->rgb_quant_range_selectable = false;
+ 
++	intel_hdmi->dp_dual_mode.type = DRM_DP_DUAL_MODE_NONE;
++	intel_hdmi->dp_dual_mode.max_tmds_clock = 0;
++
+ 	kfree(to_intel_connector(connector)->detect_edid);
+ 	to_intel_connector(connector)->detect_edid = NULL;
+ }
+ 
++static void
++intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector)
++{
++	struct drm_i915_private *dev_priv = to_i915(connector->dev);
++	struct intel_hdmi *hdmi = intel_attached_hdmi(connector);
++	struct i2c_adapter *adapter =
++		intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
++	enum drm_dp_dual_mode_type type = drm_dp_dual_mode_detect(adapter);
++
++	if (type == DRM_DP_DUAL_MODE_NONE ||
++	    type == DRM_DP_DUAL_MODE_UNKNOWN)
++		return;
++
++	hdmi->dp_dual_mode.type = type;
++	hdmi->dp_dual_mode.max_tmds_clock =
++		drm_dp_dual_mode_max_tmds_clock(type, adapter);
++
++	DRM_DEBUG_KMS("DP dual mode adaptor (%s) detected (max TMDS clock: %d kHz)\n",
++		      drm_dp_get_dual_mode_type_name(type),
++		      hdmi->dp_dual_mode.max_tmds_clock);
++}
++
+ static bool
+ intel_hdmi_set_edid(struct drm_connector *connector, bool force)
+ {
+@@ -1371,6 +1431,8 @@ intel_hdmi_set_edid(struct drm_connector *connector, bool force)
+ 				    intel_gmbus_get_adapter(dev_priv,
+ 				    intel_hdmi->ddc_bus));
+ 
++		intel_hdmi_dp_dual_mode_detect(connector);
++
+ 		intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+ 	}
+ 
+diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
+index 3425d8e737b3..54ab023427c7 100644
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -3845,6 +3845,8 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
+ 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+ 		hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
+ 
++	memset(active, 0, sizeof(*active));
++
+ 	active->pipe_enabled = intel_crtc->active;
+ 
+ 	if (active->pipe_enabled) {
+diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
+index 0b42ada338c8..bd322d8fba40 100644
+--- a/drivers/gpu/drm/i915/intel_psr.c
++++ b/drivers/gpu/drm/i915/intel_psr.c
+@@ -280,7 +280,10 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp)
+ 	 * with the 5 or 6 idle patterns.
+ 	 */
+ 	uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames);
+-	uint32_t val = 0x0;
++	uint32_t val = EDP_PSR_ENABLE;
++
++	val |= max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT;
++	val |= idle_frames << EDP_PSR_IDLE_FRAME_SHIFT;
+ 
+ 	if (IS_HASWELL(dev))
+ 		val |= EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES;
+@@ -288,14 +291,50 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp)
+ 	if (dev_priv->psr.link_standby)
+ 		val |= EDP_PSR_LINK_STANDBY;
+ 
+-	I915_WRITE(EDP_PSR_CTL, val |
+-		   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
+-		   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
+-		   EDP_PSR_ENABLE);
++	if (dev_priv->vbt.psr.tp1_wakeup_time > 5)
++		val |= EDP_PSR_TP1_TIME_2500us;
++	else if (dev_priv->vbt.psr.tp1_wakeup_time > 1)
++		val |= EDP_PSR_TP1_TIME_500us;
++	else if (dev_priv->vbt.psr.tp1_wakeup_time > 0)
++		val |= EDP_PSR_TP1_TIME_100us;
++	else
++		val |= EDP_PSR_TP1_TIME_0us;
++
++	if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 5)
++		val |= EDP_PSR_TP2_TP3_TIME_2500us;
++	else if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 1)
++		val |= EDP_PSR_TP2_TP3_TIME_500us;
++	else if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 0)
++		val |= EDP_PSR_TP2_TP3_TIME_100us;
++	else
++		val |= EDP_PSR_TP2_TP3_TIME_0us;
++
++	if (intel_dp_source_supports_hbr2(intel_dp) &&
++	    drm_dp_tps3_supported(intel_dp->dpcd))
++		val |= EDP_PSR_TP1_TP3_SEL;
++	else
++		val |= EDP_PSR_TP1_TP2_SEL;
++
++	I915_WRITE(EDP_PSR_CTL, val);
++
++	if (!dev_priv->psr.psr2_support)
++		return;
++
++	/* FIXME: selective update is probably totally broken because it doesn't
++	 * mesh at all with our frontbuffer tracking. And the hw alone isn't
++	 * good enough. */
++	val = EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE;
++
++	if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 5)
++		val |= EDP_PSR2_TP2_TIME_2500;
++	else if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 1)
++		val |= EDP_PSR2_TP2_TIME_500;
++	else if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 0)
++		val |= EDP_PSR2_TP2_TIME_100;
++	else
++		val |= EDP_PSR2_TP2_TIME_50;
+ 
+-	if (dev_priv->psr.psr2_support)
+-		I915_WRITE(EDP_PSR2_CTL, EDP_PSR2_ENABLE |
+-				EDP_SU_TRACK_ENABLE | EDP_PSR2_TP2_TIME_100);
++	I915_WRITE(EDP_PSR2_CTL, val);
+ }
+ 
+ static bool intel_psr_match_conditions(struct intel_dp *intel_dp)
+diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
+index e26dcdec2aba..05229b960e0c 100644
+--- a/drivers/gpu/drm/imx/imx-drm-core.c
++++ b/drivers/gpu/drm/imx/imx-drm-core.c
+@@ -25,6 +25,7 @@
+ #include <drm/drm_fb_cma_helper.h>
+ #include <drm/drm_plane_helper.h>
+ #include <drm/drm_of.h>
++#include <video/imx-ipu-v3.h>
+ 
+ #include "imx-drm.h"
+ 
+@@ -444,6 +445,13 @@ static int compare_of(struct device *dev, void *data)
+ {
+ 	struct device_node *np = data;
+ 
++	/* Special case for DI, dev->of_node may not be set yet */
++	if (strcmp(dev->driver->name, "imx-ipuv3-crtc") == 0) {
++		struct ipu_client_platformdata *pdata = dev->platform_data;
++
++		return pdata->of_node == np;
++	}
++
+ 	/* Special case for LDB, one device for two channels */
+ 	if (of_node_cmp(np->name, "lvds-channel") == 0) {
+ 		np = of_get_parent(np);
+diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
+index dee8e8b3523b..b2c30b8d9816 100644
+--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
++++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
+@@ -473,7 +473,7 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
+ 
+ 	ret = imx_drm_add_crtc(drm, &ipu_crtc->base, &ipu_crtc->imx_crtc,
+ 			&ipu_crtc->plane[0]->base, &ipu_crtc_helper_funcs,
+-			ipu_crtc->dev->of_node);
++			pdata->of_node);
+ 	if (ret) {
+ 		dev_err(ipu_crtc->dev, "adding crtc failed with %d.\n", ret);
+ 		goto err_put_resources;
+diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
+index c03b96709179..701c51ed3e7c 100644
+--- a/drivers/gpu/drm/msm/msm_drv.c
++++ b/drivers/gpu/drm/msm/msm_drv.c
+@@ -465,7 +465,6 @@ static void msm_preclose(struct drm_device *dev, struct drm_file *file)
+ {
+ 	struct msm_drm_private *priv = dev->dev_private;
+ 	struct msm_file_private *ctx = file->driver_priv;
+-	struct msm_kms *kms = priv->kms;
+ 
+ 	mutex_lock(&dev->struct_mutex);
+ 	if (ctx == priv->lastctx)
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+index 6cbb7d4bdd11..f2cf9231872a 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -628,6 +628,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
+ 	mutex_init(&dev_priv->cmdbuf_mutex);
+ 	mutex_init(&dev_priv->release_mutex);
+ 	mutex_init(&dev_priv->binding_mutex);
++	mutex_init(&dev_priv->global_kms_state_mutex);
+ 	rwlock_init(&dev_priv->resource_lock);
+ 	ttm_lock_init(&dev_priv->reservation_sem);
+ 	spin_lock_init(&dev_priv->hw_lock);
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+index 019a6ca3e8e9..6db358a85b46 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+@@ -412,6 +412,7 @@ struct vmw_private {
+ 	struct drm_property *implicit_placement_property;
+ 	unsigned num_implicit;
+ 	struct vmw_framebuffer *implicit_fb;
++	struct mutex global_kms_state_mutex;
+ 
+ 	/*
+ 	 * Context and surface management.
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+index 4742ec4ead27..b07543b5cea4 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+@@ -2143,13 +2143,13 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
+ void vmw_kms_del_active(struct vmw_private *dev_priv,
+ 			struct vmw_display_unit *du)
+ {
+-	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+-
++	mutex_lock(&dev_priv->global_kms_state_mutex);
+ 	if (du->active_implicit) {
+ 		if (--(dev_priv->num_implicit) == 0)
+ 			dev_priv->implicit_fb = NULL;
+ 		du->active_implicit = false;
+ 	}
++	mutex_unlock(&dev_priv->global_kms_state_mutex);
+ }
+ 
+ /**
+@@ -2165,8 +2165,7 @@ void vmw_kms_add_active(struct vmw_private *dev_priv,
+ 			struct vmw_display_unit *du,
+ 			struct vmw_framebuffer *vfb)
+ {
+-	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+-
++	mutex_lock(&dev_priv->global_kms_state_mutex);
+ 	WARN_ON_ONCE(!dev_priv->num_implicit && dev_priv->implicit_fb);
+ 
+ 	if (!du->active_implicit && du->is_implicit) {
+@@ -2174,6 +2173,7 @@ void vmw_kms_add_active(struct vmw_private *dev_priv,
+ 		du->active_implicit = true;
+ 		dev_priv->num_implicit++;
+ 	}
++	mutex_unlock(&dev_priv->global_kms_state_mutex);
+ }
+ 
+ /**
+@@ -2190,16 +2190,13 @@ bool vmw_kms_crtc_flippable(struct vmw_private *dev_priv,
+ 			    struct drm_crtc *crtc)
+ {
+ 	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
++	bool ret;
+ 
+-	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+-
+-	if (!du->is_implicit)
+-		return true;
+-
+-	if (dev_priv->num_implicit != 1)
+-		return false;
++	mutex_lock(&dev_priv->global_kms_state_mutex);
++	ret = !du->is_implicit || dev_priv->num_implicit == 1;
++	mutex_unlock(&dev_priv->global_kms_state_mutex);
+ 
+-	return true;
++	return ret;
+ }
+ 
+ /**
+@@ -2214,16 +2211,18 @@ void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv,
+ 	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
+ 	struct vmw_framebuffer *vfb;
+ 
+-	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
++	mutex_lock(&dev_priv->global_kms_state_mutex);
+ 
+ 	if (!du->is_implicit)
+-		return;
++		goto out_unlock;
+ 
+ 	vfb = vmw_framebuffer_to_vfb(crtc->primary->fb);
+ 	WARN_ON_ONCE(dev_priv->num_implicit != 1 &&
+ 		     dev_priv->implicit_fb != vfb);
+ 
+ 	dev_priv->implicit_fb = vfb;
++out_unlock:
++	mutex_unlock(&dev_priv->global_kms_state_mutex);
+ }
+ 
+ /**
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+index 0ea22fd112c9..b74eae2b8594 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+@@ -285,14 +285,17 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
+ 	}
+ 
+ 	/* Only one active implicit frame-buffer at a time. */
++	mutex_lock(&dev_priv->global_kms_state_mutex);
+ 	if (sou->base.is_implicit &&
+ 	    dev_priv->implicit_fb && vfb &&
+ 	    !(dev_priv->num_implicit == 1 &&
+ 	      sou->base.active_implicit) &&
+ 	    dev_priv->implicit_fb != vfb) {
++		mutex_unlock(&dev_priv->global_kms_state_mutex);
+ 		DRM_ERROR("Multiple implicit framebuffers not supported.\n");
+ 		return -EINVAL;
+ 	}
++	mutex_unlock(&dev_priv->global_kms_state_mutex);
+ 
+ 	/* since they always map one to one these are safe */
+ 	connector = &sou->base.connector;
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+index b949102ad864..9ca818fb034c 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+@@ -553,12 +553,15 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
+ 	}
+ 
+ 	/* Only one active implicit frame-buffer at a time. */
++	mutex_lock(&dev_priv->global_kms_state_mutex);
+ 	if (!turning_off && stdu->base.is_implicit && dev_priv->implicit_fb &&
+ 	    !(dev_priv->num_implicit == 1 && stdu->base.active_implicit)
+ 	    && dev_priv->implicit_fb != vfb) {
++		mutex_unlock(&dev_priv->global_kms_state_mutex);
+ 		DRM_ERROR("Multiple implicit framebuffers not supported.\n");
+ 		return -EINVAL;
+ 	}
++	mutex_unlock(&dev_priv->global_kms_state_mutex);
+ 
+ 	/* Since they always map one to one these are safe */
+ 	connector = &stdu->base.connector;
+diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
+index abb98c77bad2..99dcacf05b99 100644
+--- a/drivers/gpu/ipu-v3/ipu-common.c
++++ b/drivers/gpu/ipu-v3/ipu-common.c
+@@ -997,7 +997,7 @@ struct ipu_platform_reg {
+ };
+ 
+ /* These must be in the order of the corresponding device tree port nodes */
+-static const struct ipu_platform_reg client_reg[] = {
++static struct ipu_platform_reg client_reg[] = {
+ 	{
+ 		.pdata = {
+ 			.csi = 0,
+@@ -1048,7 +1048,7 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
+ 	mutex_unlock(&ipu_client_id_mutex);
+ 
+ 	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
+-		const struct ipu_platform_reg *reg = &client_reg[i];
++		struct ipu_platform_reg *reg = &client_reg[i];
+ 		struct platform_device *pdev;
+ 		struct device_node *of_node;
+ 
+@@ -1070,6 +1070,7 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
+ 
+ 		pdev->dev.parent = dev;
+ 
++		reg->pdata.of_node = of_node;
+ 		ret = platform_device_add_data(pdev, &reg->pdata,
+ 					       sizeof(reg->pdata));
+ 		if (!ret)
+diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
+index 6c99ee7bafa3..ee396ff167d9 100644
+--- a/drivers/hwmon/ads7828.c
++++ b/drivers/hwmon/ads7828.c
+@@ -120,6 +120,7 @@ static int ads7828_probe(struct i2c_client *client,
+ 	unsigned int vref_mv = ADS7828_INT_VREF_MV;
+ 	bool diff_input = false;
+ 	bool ext_vref = false;
++	unsigned int regval;
+ 
+ 	data = devm_kzalloc(dev, sizeof(struct ads7828_data), GFP_KERNEL);
+ 	if (!data)
+@@ -154,6 +155,15 @@ static int ads7828_probe(struct i2c_client *client,
+ 	if (!diff_input)
+ 		data->cmd_byte |= ADS7828_CMD_SD_SE;
+ 
++	/*
++	 * Datasheet specifies internal reference voltage is disabled by
++	 * default. The internal reference voltage needs to be enabled and
++	 * voltage needs to settle before getting valid ADC data. So perform a
++	 * dummy read to enable the internal reference voltage.
++	 */
++	if (!ext_vref)
++		regmap_read(data->regmap, data->cmd_byte, &regval);
++
+ 	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
+ 							   data,
+ 							   ads7828_groups);
+diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
+index 1142a93dd90b..ca62a6e11846 100644
+--- a/drivers/input/joystick/xpad.c
++++ b/drivers/input/joystick/xpad.c
+@@ -457,6 +457,10 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d
+ static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev,
+ 				   u16 cmd, unsigned char *data)
+ {
++	/* valid pad data */
++	if (data[0] != 0x00)
++		return;
++
+ 	/* digital pad */
+ 	if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
+ 		/* dpad as buttons (left, right, up, down) */
+@@ -756,6 +760,7 @@ static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad)
+ 	if (packet) {
+ 		memcpy(xpad->odata, packet->data, packet->len);
+ 		xpad->irq_out->transfer_buffer_length = packet->len;
++		packet->pending = false;
+ 		return true;
+ 	}
+ 
+@@ -797,7 +802,6 @@ static void xpad_irq_out(struct urb *urb)
+ 	switch (status) {
+ 	case 0:
+ 		/* success */
+-		xpad->out_packets[xpad->last_out_packet].pending = false;
+ 		xpad->irq_out_active = xpad_prepare_next_out_packet(xpad);
+ 		break;
+ 
+diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
+index abe1a927b332..65ebbd111702 100644
+--- a/drivers/input/misc/uinput.c
++++ b/drivers/input/misc/uinput.c
+@@ -981,9 +981,15 @@ static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+ }
+ 
+ #ifdef CONFIG_COMPAT
++
++#define UI_SET_PHYS_COMPAT	_IOW(UINPUT_IOCTL_BASE, 108, compat_uptr_t)
++
+ static long uinput_compat_ioctl(struct file *file,
+ 				unsigned int cmd, unsigned long arg)
+ {
++	if (cmd == UI_SET_PHYS_COMPAT)
++		cmd = UI_SET_PHYS;
++
+ 	return uinput_ioctl_handler(file, cmd, arg, compat_ptr(arg));
+ }
+ #endif
+diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+index 019644ff627d..bacecbd68a6d 100644
+--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
++++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+@@ -280,7 +280,8 @@ static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user
+ static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up)
+ {
+ 	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_create_buffers32)) ||
+-	    copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)))
++	    copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) ||
++	    copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved)))
+ 		return -EFAULT;
+ 	return __put_v4l2_format32(&kp->format, &up->format);
+ }
+diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c
+index 1bbbe877ba7e..807a3e3ec29a 100644
+--- a/drivers/mfd/intel-lpss.c
++++ b/drivers/mfd/intel-lpss.c
+@@ -34,6 +34,7 @@
+ #define LPSS_DEV_SIZE		0x200
+ #define LPSS_PRIV_OFFSET	0x200
+ #define LPSS_PRIV_SIZE		0x100
++#define LPSS_PRIV_REG_COUNT	(LPSS_PRIV_SIZE / 4)
+ #define LPSS_IDMA64_OFFSET	0x800
+ #define LPSS_IDMA64_SIZE	0x800
+ 
+@@ -76,6 +77,7 @@ struct intel_lpss {
+ 	struct mfd_cell *cell;
+ 	struct device *dev;
+ 	void __iomem *priv;
++	u32 priv_ctx[LPSS_PRIV_REG_COUNT];
+ 	int devid;
+ 	u32 caps;
+ 	u32 active_ltr;
+@@ -493,6 +495,16 @@ EXPORT_SYMBOL_GPL(intel_lpss_prepare);
+ 
+ int intel_lpss_suspend(struct device *dev)
+ {
++	struct intel_lpss *lpss = dev_get_drvdata(dev);
++	unsigned int i;
++
++	/* Save device context */
++	for (i = 0; i < LPSS_PRIV_REG_COUNT; i++)
++		lpss->priv_ctx[i] = readl(lpss->priv + i * 4);
++
++	/* Put the device into reset state */
++	writel(0, lpss->priv + LPSS_PRIV_RESETS);
++
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(intel_lpss_suspend);
+@@ -500,8 +512,13 @@ EXPORT_SYMBOL_GPL(intel_lpss_suspend);
+ int intel_lpss_resume(struct device *dev)
+ {
+ 	struct intel_lpss *lpss = dev_get_drvdata(dev);
++	unsigned int i;
+ 
+-	intel_lpss_init_dev(lpss);
++	intel_lpss_deassert_reset(lpss);
++
++	/* Restore device context */
++	for (i = 0; i < LPSS_PRIV_REG_COUNT; i++)
++		writel(lpss->priv_ctx[i], lpss->priv + i * 4);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/mfd/intel_quark_i2c_gpio.c b/drivers/mfd/intel_quark_i2c_gpio.c
+index bdc5e27222c0..7450f5d8770c 100644
+--- a/drivers/mfd/intel_quark_i2c_gpio.c
++++ b/drivers/mfd/intel_quark_i2c_gpio.c
+@@ -139,6 +139,7 @@ static int intel_quark_register_i2c_clk(struct intel_quark_mfd *quark_mfd)
+ 						INTEL_QUARK_I2C_CONTROLLER_CLK);
+ 
+ 	if (!quark_mfd->i2c_clk_lookup) {
++		clk_unregister(quark_mfd->i2c_clk);
+ 		dev_err(&pdev->dev, "Fixed clk register failed\n");
+ 		return -ENOMEM;
+ 	}
+@@ -150,7 +151,7 @@ static void intel_quark_unregister_i2c_clk(struct pci_dev *pdev)
+ {
+ 	struct intel_quark_mfd *quark_mfd = dev_get_drvdata(&pdev->dev);
+ 
+-	if (!quark_mfd->i2c_clk || !quark_mfd->i2c_clk_lookup)
++	if (!quark_mfd->i2c_clk_lookup)
+ 		return;
+ 
+ 	clkdev_drop(quark_mfd->i2c_clk_lookup);
+@@ -246,25 +247,33 @@ static int intel_quark_mfd_probe(struct pci_dev *pdev,
+ 	quark_mfd = devm_kzalloc(&pdev->dev, sizeof(*quark_mfd), GFP_KERNEL);
+ 	if (!quark_mfd)
+ 		return -ENOMEM;
++
+ 	quark_mfd->pdev = pdev;
++	dev_set_drvdata(&pdev->dev, quark_mfd);
+ 
+ 	ret = intel_quark_register_i2c_clk(quark_mfd);
+ 	if (ret)
+ 		return ret;
+ 
+-	dev_set_drvdata(&pdev->dev, quark_mfd);
+-
+ 	ret = intel_quark_i2c_setup(pdev, &intel_quark_mfd_cells[1]);
+ 	if (ret)
+-		return ret;
++		goto err_unregister_i2c_clk;
+ 
+ 	ret = intel_quark_gpio_setup(pdev, &intel_quark_mfd_cells[0]);
+ 	if (ret)
+-		return ret;
++		goto err_unregister_i2c_clk;
+ 
+-	return mfd_add_devices(&pdev->dev, 0, intel_quark_mfd_cells,
+-			       ARRAY_SIZE(intel_quark_mfd_cells), NULL, 0,
+-			       NULL);
++	ret = mfd_add_devices(&pdev->dev, 0, intel_quark_mfd_cells,
++			      ARRAY_SIZE(intel_quark_mfd_cells), NULL, 0,
++			      NULL);
++	if (ret)
++		goto err_unregister_i2c_clk;
++
++	return 0;
++
++err_unregister_i2c_clk:
++	intel_quark_unregister_i2c_clk(pdev);
++	return ret;
+ }
+ 
+ static void intel_quark_mfd_remove(struct pci_dev *pdev)
+diff --git a/drivers/mfd/intel_soc_pmic_core.c b/drivers/mfd/intel_soc_pmic_core.c
+index d9e15cf7c6c8..12d6ebb4ae5d 100644
+--- a/drivers/mfd/intel_soc_pmic_core.c
++++ b/drivers/mfd/intel_soc_pmic_core.c
+@@ -35,6 +35,7 @@ static struct gpiod_lookup_table panel_gpio_table = {
+ 	.table = {
+ 		/* Panel EN/DISABLE */
+ 		GPIO_LOOKUP("gpio_crystalcove", 94, "panel", GPIO_ACTIVE_HIGH),
++		{ },
+ 	},
+ };
+ 
+diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
+index c84c2d30ef1f..fb793612c7bd 100644
+--- a/drivers/net/wireless/ath/ath10k/core.c
++++ b/drivers/net/wireless/ath/ath10k/core.c
+@@ -1723,6 +1723,10 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode)
+ 		goto err_hif_stop;
+ 	}
+ 
++	ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1;
++
++	INIT_LIST_HEAD(&ar->arvifs);
++
+ 	/* we don't care about HTT in UTF mode */
+ 	if (mode == ATH10K_FIRMWARE_MODE_NORMAL) {
+ 		status = ath10k_htt_setup(&ar->htt);
+@@ -1736,10 +1740,6 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode)
+ 	if (status)
+ 		goto err_hif_stop;
+ 
+-	ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1;
+-
+-	INIT_LIST_HEAD(&ar->arvifs);
+-
+ 	return 0;
+ 
+ err_hif_stop:
+diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
+index 076d29b53ddf..0f834646e6a7 100644
+--- a/drivers/net/wireless/ath/ath10k/debug.c
++++ b/drivers/net/wireless/ath/ath10k/debug.c
+@@ -2019,7 +2019,12 @@ static ssize_t ath10k_write_pktlog_filter(struct file *file,
+ 		goto out;
+ 	}
+ 
+-	if (filter && (filter != ar->debug.pktlog_filter)) {
++	if (filter == ar->debug.pktlog_filter) {
++		ret = count;
++		goto out;
++	}
++
++	if (filter) {
+ 		ret = ath10k_wmi_pdev_pktlog_enable(ar, filter);
+ 		if (ret) {
+ 			ath10k_warn(ar, "failed to enable pktlog filter %x: %d\n",
+diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
+index 78999c9de23b..e11160ab44bb 100644
+--- a/drivers/net/wireless/ath/ath10k/mac.c
++++ b/drivers/net/wireless/ath/ath10k/mac.c
+@@ -4489,7 +4489,10 @@ static int ath10k_add_interface(struct ieee80211_hw *hw,
+ 		goto err_vdev_delete;
+ 	}
+ 
+-	if (ar->cfg_tx_chainmask) {
++	/* Configuring number of spatial stream for monitor interface is causing
++	 * target assert in qca9888 and qca6174.
++	 */
++	if (ar->cfg_tx_chainmask && (vif->type != NL80211_IFTYPE_MONITOR)) {
+ 		u16 nss = get_nss_from_chainmask(ar->cfg_tx_chainmask);
+ 
+ 		vdev_param = ar->wmi.vdev_param->nss;
+@@ -6450,7 +6453,13 @@ ath10k_mac_update_rx_channel(struct ath10k *ar,
+ 			def = &vifs[0].new_ctx->def;
+ 
+ 		ar->rx_channel = def->chan;
+-	} else if (ctx && ath10k_mac_num_chanctxs(ar) == 0) {
++	} else if ((ctx && ath10k_mac_num_chanctxs(ar) == 0) ||
++		   (ctx && (ar->state == ATH10K_STATE_RESTARTED))) {
++		/* During driver restart due to firmware assert, since mac80211
++		 * already has valid channel context for given radio, channel
++		 * context iteration return num_chanctx > 0. So fix rx_channel
++		 * when restart is in progress.
++		 */
+ 		ar->rx_channel = ctx->def.chan;
+ 	} else {
+ 		ar->rx_channel = NULL;
+diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c
+index 803030fd17d3..6a2a16856763 100644
+--- a/drivers/net/wireless/ath/ath5k/led.c
++++ b/drivers/net/wireless/ath/ath5k/led.c
+@@ -77,7 +77,7 @@ static const struct pci_device_id ath5k_led_devices[] = {
+ 	/* HP Compaq CQ60-206US (ddreggors@jumptv.com) */
+ 	{ ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137a), ATH_LED(3, 1) },
+ 	/* HP Compaq C700 (nitrousnrg@gmail.com) */
+-	{ ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 1) },
++	{ ATH_SDEVICE(PCI_VENDOR_ID_HP, 0x0137b), ATH_LED(3, 0) },
+ 	/* LiteOn AR5BXB63 (magooz@salug.it) */
+ 	{ ATH_SDEVICE(PCI_VENDOR_ID_ATHEROS, 0x3067), ATH_LED(3, 0) },
+ 	/* IBM-specific AR5212 (all others) */
+diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
+index 1c226d63bb03..deded2a9a860 100644
+--- a/drivers/net/wireless/ath/ath9k/init.c
++++ b/drivers/net/wireless/ath/ath9k/init.c
+@@ -49,6 +49,10 @@ int ath9k_led_blink;
+ module_param_named(blink, ath9k_led_blink, int, 0444);
+ MODULE_PARM_DESC(blink, "Enable LED blink on activity");
+ 
++static int ath9k_led_active_high = -1;
++module_param_named(led_active_high, ath9k_led_active_high, int, 0444);
++MODULE_PARM_DESC(led_active_high, "Invert LED polarity");
++
+ static int ath9k_btcoex_enable;
+ module_param_named(btcoex_enable, ath9k_btcoex_enable, int, 0444);
+ MODULE_PARM_DESC(btcoex_enable, "Enable wifi-BT coexistence");
+@@ -600,6 +604,9 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
+ 	if (ret)
+ 		return ret;
+ 
++	if (ath9k_led_active_high != -1)
++		ah->config.led_active_high = ath9k_led_active_high == 1;
++
+ 	/*
+ 	 * Enable WLAN/BT RX Antenna diversity only when:
+ 	 *
+diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
+index e6fef1be9977..7cdaf40c3057 100644
+--- a/drivers/net/wireless/ath/ath9k/pci.c
++++ b/drivers/net/wireless/ath/ath9k/pci.c
+@@ -28,6 +28,16 @@ static const struct pci_device_id ath_pci_id_table[] = {
+ 	{ PCI_VDEVICE(ATHEROS, 0x0024) }, /* PCI-E */
+ 	{ PCI_VDEVICE(ATHEROS, 0x0027) }, /* PCI   */
+ 	{ PCI_VDEVICE(ATHEROS, 0x0029) }, /* PCI   */
++
++#ifdef CONFIG_ATH9K_PCOEM
++	/* Mini PCI AR9220 MB92 cards: Compex WLM200NX, Wistron DNMA-92 */
++	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS,
++			 0x0029,
++			 PCI_VENDOR_ID_ATHEROS,
++			 0x2096),
++	  .driver_data = ATH9K_PCI_LED_ACT_HI },
++#endif
++
+ 	{ PCI_VDEVICE(ATHEROS, 0x002A) }, /* PCI-E */
+ 
+ #ifdef CONFIG_ATH9K_PCOEM
+diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
+index 0517a4f2d3f2..7a40d8dffa36 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/base.c
++++ b/drivers/net/wireless/realtek/rtlwifi/base.c
+@@ -1660,9 +1660,9 @@ void rtl_watchdog_wq_callback(void *data)
+ 		if (((rtlpriv->link_info.num_rx_inperiod +
+ 		      rtlpriv->link_info.num_tx_inperiod) > 8) ||
+ 		    (rtlpriv->link_info.num_rx_inperiod > 2))
+-			rtl_lps_enter(hw);
+-		else
+ 			rtl_lps_leave(hw);
++		else
++			rtl_lps_enter(hw);
+ 	}
+ 
+ 	rtlpriv->link_info.num_rx_inperiod = 0;
+diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
+index c43ab59a690a..77cbd10e807d 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
++++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
+@@ -1203,7 +1203,6 @@ static void btc8723b2ant_set_ant_path(struct btc_coexist *btcoexist,
+ 
+ 		/* Force GNT_BT to low */
+ 		btcoexist->btc_write_1byte_bitmask(btcoexist, 0x765, 0x18, 0x0);
+-		btcoexist->btc_write_2byte(btcoexist, 0x948, 0x0);
+ 
+ 		if (board_info->btdm_ant_pos == BTC_ANTENNA_AT_MAIN_PORT) {
+ 			/* tell firmware "no antenna inverse" */
+@@ -1211,19 +1210,25 @@ static void btc8723b2ant_set_ant_path(struct btc_coexist *btcoexist,
+ 			h2c_parameter[1] = 1;  /* ext switch type */
+ 			btcoexist->btc_fill_h2c(btcoexist, 0x65, 2,
+ 						h2c_parameter);
++			btcoexist->btc_write_2byte(btcoexist, 0x948, 0x0);
+ 		} else {
+ 			/* tell firmware "antenna inverse" */
+ 			h2c_parameter[0] = 1;
+ 			h2c_parameter[1] = 1;  /* ext switch type */
+ 			btcoexist->btc_fill_h2c(btcoexist, 0x65, 2,
+ 						h2c_parameter);
++			btcoexist->btc_write_2byte(btcoexist, 0x948, 0x280);
+ 		}
+ 	}
+ 
+ 	/* ext switch setting */
+ 	if (use_ext_switch) {
+ 		/* fixed internal switch S1->WiFi, S0->BT */
+-		btcoexist->btc_write_2byte(btcoexist, 0x948, 0x0);
++		if (board_info->btdm_ant_pos == BTC_ANTENNA_AT_MAIN_PORT)
++			btcoexist->btc_write_2byte(btcoexist, 0x948, 0x0);
++		else
++			btcoexist->btc_write_2byte(btcoexist, 0x948, 0x280);
++
+ 		switch (antpos_type) {
+ 		case BTC_ANT_WIFI_AT_MAIN:
+ 			/* ext switch main at wifi */
+diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+index b2791c893417..babd1490f20c 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
++++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+@@ -965,13 +965,38 @@ void exhalbtc_set_chip_type(u8 chip_type)
+ 	}
+ }
+ 
+-void exhalbtc_set_ant_num(u8 type, u8 ant_num)
++void exhalbtc_set_ant_num(struct rtl_priv *rtlpriv, u8 type, u8 ant_num)
+ {
+ 	if (BT_COEX_ANT_TYPE_PG == type) {
+ 		gl_bt_coexist.board_info.pg_ant_num = ant_num;
+ 		gl_bt_coexist.board_info.btdm_ant_num = ant_num;
++		/* The antenna position:
++		 * Main (default) or Aux for pgAntNum=2 && btdmAntNum =1.
++		 * The antenna position should be determined by
++		 * auto-detect mechanism.
++		 * The following is assumed to main,
++		 * and those must be modified
++		 * if y auto-detect mechanism is ready
++		 */
++		if ((gl_bt_coexist.board_info.pg_ant_num == 2) &&
++		    (gl_bt_coexist.board_info.btdm_ant_num == 1))
++			gl_bt_coexist.board_info.btdm_ant_pos =
++						       BTC_ANTENNA_AT_MAIN_PORT;
++		else
++			gl_bt_coexist.board_info.btdm_ant_pos =
++						       BTC_ANTENNA_AT_MAIN_PORT;
+ 	} else if (BT_COEX_ANT_TYPE_ANTDIV == type) {
+ 		gl_bt_coexist.board_info.btdm_ant_num = ant_num;
++		gl_bt_coexist.board_info.btdm_ant_pos =
++						       BTC_ANTENNA_AT_MAIN_PORT;
++	} else if (type == BT_COEX_ANT_TYPE_DETECTED) {
++		gl_bt_coexist.board_info.btdm_ant_num = ant_num;
++		if (rtlpriv->cfg->mod_params->ant_sel == 1)
++			gl_bt_coexist.board_info.btdm_ant_pos =
++				BTC_ANTENNA_AT_AUX_PORT;
++		else
++			gl_bt_coexist.board_info.btdm_ant_pos =
++				BTC_ANTENNA_AT_MAIN_PORT;
+ 	}
+ }
+ 
+diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+index 0a903ea179ef..f41ca57dd8a7 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
++++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+@@ -535,7 +535,7 @@ void exhalbtc_set_bt_patch_version(u16 bt_hci_version, u16 bt_patch_version);
+ void exhalbtc_update_min_bt_rssi(char bt_rssi);
+ void exhalbtc_set_bt_exist(bool bt_exist);
+ void exhalbtc_set_chip_type(u8 chip_type);
+-void exhalbtc_set_ant_num(u8 type, u8 ant_num);
++void exhalbtc_set_ant_num(struct rtl_priv *rtlpriv, u8 type, u8 ant_num);
+ void exhalbtc_display_bt_coex_info(struct btc_coexist *btcoexist);
+ void exhalbtc_signal_compensation(struct btc_coexist *btcoexist,
+ 				  u8 *rssi_wifi, u8 *rssi_bt);
+diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c
+index b9b0cb7af8ea..d3fd9211b3a4 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c
++++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c
+@@ -72,7 +72,10 @@ void rtl_btc_init_hal_vars(struct rtl_priv *rtlpriv)
+ 		 __func__, bt_type);
+ 	exhalbtc_set_chip_type(bt_type);
+ 
+-	exhalbtc_set_ant_num(BT_COEX_ANT_TYPE_PG, ant_num);
++	if (rtlpriv->cfg->mod_params->ant_sel == 1)
++		exhalbtc_set_ant_num(rtlpriv, BT_COEX_ANT_TYPE_DETECTED, 1);
++	else
++		exhalbtc_set_ant_num(rtlpriv, BT_COEX_ANT_TYPE_PG, ant_num);
+ }
+ 
+ void rtl_btc_init_hw_config(struct rtl_priv *rtlpriv)
+diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
+index 283d608b9973..e5037d13b772 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
++++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
+@@ -1573,7 +1573,7 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw)
+ 							 true,
+ 							 HW_DESC_TXBUFF_ADDR),
+ 						 skb->len, PCI_DMA_TODEVICE);
+-				kfree_skb(skb);
++				dev_kfree_skb_irq(skb);
+ 				ring->idx = (ring->idx + 1) % ring->entries;
+ 			}
+ 			ring->idx = 0;
+diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+index c983d2fe147f..5a3df9198ddf 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+@@ -2684,6 +2684,7 @@ void rtl8723be_read_bt_coexist_info_from_hwpg(struct ieee80211_hw *hw,
+ 					      bool auto_load_fail, u8 *hwinfo)
+ {
+ 	struct rtl_priv *rtlpriv = rtl_priv(hw);
++	struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params;
+ 	u8 value;
+ 	u32 tmpu_32;
+ 
+@@ -2702,6 +2703,10 @@ void rtl8723be_read_bt_coexist_info_from_hwpg(struct ieee80211_hw *hw,
+ 		rtlpriv->btcoexist.btc_info.ant_num = ANT_X2;
+ 	}
+ 
++	/* override ant_num / ant_path */
++	if (mod_params->ant_sel)
++		rtlpriv->btcoexist.btc_info.ant_num =
++			(mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1);
+ }
+ 
+ void rtl8723be_bt_reg_init(struct ieee80211_hw *hw)
+diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
+index a78eaeda0008..2101793438ed 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
+@@ -273,6 +273,7 @@ static struct rtl_mod_params rtl8723be_mod_params = {
+ 	.msi_support = false,
+ 	.disable_watchdog = false,
+ 	.debug = DBG_EMERG,
++	.ant_sel = 0,
+ };
+ 
+ static struct rtl_hal_cfg rtl8723be_hal_cfg = {
+@@ -394,6 +395,7 @@ module_param_named(fwlps, rtl8723be_mod_params.fwctrl_lps, bool, 0444);
+ module_param_named(msi, rtl8723be_mod_params.msi_support, bool, 0444);
+ module_param_named(disable_watchdog, rtl8723be_mod_params.disable_watchdog,
+ 		   bool, 0444);
++module_param_named(ant_sel, rtl8723be_mod_params.ant_sel, int, 0444);
+ MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n");
+ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
+ MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n");
+@@ -402,6 +404,7 @@ MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n");
+ MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)");
+ MODULE_PARM_DESC(disable_watchdog,
+ 		 "Set to 1 to disable the watchdog (default 0)\n");
++MODULE_PARM_DESC(ant_sel, "Set to 1 or 2 to force antenna number (default 0)\n");
+ 
+ static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume);
+ 
+diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
+index 554d81420f19..93bd7fcd2b61 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
++++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
+@@ -2246,6 +2246,9 @@ struct rtl_mod_params {
+ 
+ 	/* default 0: 1 means do not disable interrupts */
+ 	bool int_clear;
++
++	/* select antenna */
++	int ant_sel;
+ };
+ 
+ struct rtl_hal_usbint_cfg {
+diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
+index 8004f67c57ec..bf8405fb4ace 100644
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -179,9 +179,6 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
+ 	u16 orig_cmd;
+ 	struct pci_bus_region region, inverted_region;
+ 
+-	if (dev->non_compliant_bars)
+-		return 0;
+-
+ 	mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
+ 
+ 	/* No printks while decoding is disabled! */
+@@ -322,6 +319,9 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
+ {
+ 	unsigned int pos, reg;
+ 
++	if (dev->non_compliant_bars)
++		return;
++
+ 	for (pos = 0; pos < howmany; pos++) {
+ 		struct resource *res = &dev->resource[pos];
+ 		reg = PCI_BASE_ADDRESS_0 + (pos << 2);
+diff --git a/drivers/pinctrl/samsung/pinctrl-exynos5440.c b/drivers/pinctrl/samsung/pinctrl-exynos5440.c
+index 00ab63abf1d9..dbbdf652c34a 100644
+--- a/drivers/pinctrl/samsung/pinctrl-exynos5440.c
++++ b/drivers/pinctrl/samsung/pinctrl-exynos5440.c
+@@ -107,6 +107,7 @@ struct exynos5440_pmx_func {
+  * @nr_groups: number of pin groups available.
+  * @pmx_functions: list of pin functions parsed from device tree.
+  * @nr_functions: number of pin functions available.
++ * @range: gpio range to register with pinctrl
+  */
+ struct exynos5440_pinctrl_priv_data {
+ 	void __iomem			*reg_base;
+@@ -117,6 +118,7 @@ struct exynos5440_pinctrl_priv_data {
+ 	unsigned int			nr_groups;
+ 	const struct exynos5440_pmx_func	*pmx_functions;
+ 	unsigned int			nr_functions;
++	struct pinctrl_gpio_range	range;
+ };
+ 
+ /**
+@@ -742,7 +744,6 @@ static int exynos5440_pinctrl_register(struct platform_device *pdev,
+ 	struct pinctrl_desc *ctrldesc;
+ 	struct pinctrl_dev *pctl_dev;
+ 	struct pinctrl_pin_desc *pindesc, *pdesc;
+-	struct pinctrl_gpio_range grange;
+ 	char *pin_names;
+ 	int pin, ret;
+ 
+@@ -794,12 +795,12 @@ static int exynos5440_pinctrl_register(struct platform_device *pdev,
+ 		return PTR_ERR(pctl_dev);
+ 	}
+ 
+-	grange.name = "exynos5440-pctrl-gpio-range";
+-	grange.id = 0;
+-	grange.base = 0;
+-	grange.npins = EXYNOS5440_MAX_PINS;
+-	grange.gc = priv->gc;
+-	pinctrl_add_gpio_range(pctl_dev, &grange);
++	priv->range.name = "exynos5440-pctrl-gpio-range";
++	priv->range.id = 0;
++	priv->range.base = 0;
++	priv->range.npins = EXYNOS5440_MAX_PINS;
++	priv->range.gc = priv->gc;
++	pinctrl_add_gpio_range(pctl_dev, &priv->range);
+ 	return 0;
+ }
+ 
+diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
+index e0b764284773..fd0e4e37f4e1 100644
+--- a/drivers/regulator/core.c
++++ b/drivers/regulator/core.c
+@@ -3840,6 +3840,11 @@ static void rdev_init_debugfs(struct regulator_dev *rdev)
+ 			   &rdev->bypass_count);
+ }
+ 
++static int regulator_register_resolve_supply(struct device *dev, void *data)
++{
++	return regulator_resolve_supply(dev_to_rdev(dev));
++}
++
+ /**
+  * regulator_register - register regulator
+  * @regulator_desc: regulator to register
+@@ -3986,8 +3991,11 @@ regulator_register(const struct regulator_desc *regulator_desc,
+ 	}
+ 
+ 	rdev_init_debugfs(rdev);
+-out:
+ 	mutex_unlock(&regulator_list_mutex);
++
++	/* try to resolve regulators supply since a new one was registered */
++	class_for_each_device(&regulator_class, NULL, NULL,
++			      regulator_register_resolve_supply);
+ 	kfree(config);
+ 	return rdev;
+ 
+@@ -3998,15 +4006,16 @@ scrub:
+ 	regulator_ena_gpio_free(rdev);
+ 	device_unregister(&rdev->dev);
+ 	/* device core frees rdev */
+-	rdev = ERR_PTR(ret);
+ 	goto out;
+ 
+ wash:
+ 	regulator_ena_gpio_free(rdev);
+ clean:
+ 	kfree(rdev);
+-	rdev = ERR_PTR(ret);
+-	goto out;
++out:
++	mutex_unlock(&regulator_list_mutex);
++	kfree(config);
++	return ERR_PTR(ret);
+ }
+ EXPORT_SYMBOL_GPL(regulator_register);
+ 
+diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
+index efa493cf1bc6..edc2643df4d6 100644
+--- a/drivers/scsi/aacraid/aacraid.h
++++ b/drivers/scsi/aacraid/aacraid.h
+@@ -29,6 +29,7 @@ enum {
+ #define AAC_INT_MODE_MSI		(1<<1)
+ #define AAC_INT_MODE_AIF		(1<<2)
+ #define AAC_INT_MODE_SYNC		(1<<3)
++#define AAC_INT_MODE_MSIX		(1<<16)
+ 
+ #define AAC_INT_ENABLE_TYPE1_INTX	0xfffffffb
+ #define AAC_INT_ENABLE_TYPE1_MSIX	0xfffffffa
+diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
+index 2b4e75380ae6..87397deffb1f 100644
+--- a/drivers/scsi/aacraid/comminit.c
++++ b/drivers/scsi/aacraid/comminit.c
+@@ -37,6 +37,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/slab.h>
+ #include <linux/blkdev.h>
++#include <linux/delay.h>
+ #include <linux/completion.h>
+ #include <linux/mm.h>
+ #include <scsi/scsi_host.h>
+@@ -47,6 +48,20 @@ struct aac_common aac_config = {
+ 	.irq_mod = 1
+ };
+ 
++static inline int aac_is_msix_mode(struct aac_dev *dev)
++{
++	u32 status;
++
++	status = src_readl(dev, MUnit.OMR);
++	return (status & AAC_INT_MODE_MSIX);
++}
++
++static inline void aac_change_to_intx(struct aac_dev *dev)
++{
++	aac_src_access_devreg(dev, AAC_DISABLE_MSIX);
++	aac_src_access_devreg(dev, AAC_ENABLE_INTX);
++}
++
+ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long commsize, unsigned long commalign)
+ {
+ 	unsigned char *base;
+@@ -427,6 +442,15 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
+ 	dev->comm_interface = AAC_COMM_PRODUCER;
+ 	dev->raw_io_interface = dev->raw_io_64 = 0;
+ 
++
++	/*
++	 * Enable INTX mode, if not done already Enabled
++	 */
++	if (aac_is_msix_mode(dev)) {
++		aac_change_to_intx(dev);
++		dev_info(&dev->pdev->dev, "Changed firmware to INTX mode");
++	}
++
+ 	if ((!aac_adapter_sync_cmd(dev, GET_ADAPTER_PROPERTIES,
+ 		0, 0, 0, 0, 0, 0,
+ 		status+0, status+1, status+2, status+3, NULL)) &&
+diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
+index 511bbc575062..bb7988d53216 100644
+--- a/drivers/scsi/aacraid/commsup.c
++++ b/drivers/scsi/aacraid/commsup.c
+@@ -637,10 +637,10 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
+ 					}
+ 					return -EFAULT;
+ 				}
+-				/* We used to udelay() here but that absorbed
+-				 * a CPU when a timeout occured. Not very
+-				 * useful. */
+-				cpu_relax();
++				/*
++				 * Allow other processes / CPUS to use core
++				 */
++				schedule();
+ 			}
+ 		} else if (down_interruptible(&fibptr->event_wait)) {
+ 			/* Do nothing ... satisfy
+@@ -1996,6 +1996,10 @@ int aac_command_thread(void *data)
+ 		if (difference <= 0)
+ 			difference = 1;
+ 		set_current_state(TASK_INTERRUPTIBLE);
++
++		if (kthread_should_stop())
++			break;
++
+ 		schedule_timeout(difference);
+ 
+ 		if (kthread_should_stop())
+diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
+index ff6caab8cc8b..79a1cec1a51f 100644
+--- a/drivers/scsi/aacraid/linit.c
++++ b/drivers/scsi/aacraid/linit.c
+@@ -1431,8 +1431,8 @@ static int aac_acquire_resources(struct aac_dev *dev)
+ 		/* After EEH recovery or suspend resume, max_msix count
+ 		 * may change, therfore updating in init as well.
+ 		 */
+-		aac_adapter_start(dev);
+ 		dev->init->Sa_MSIXVectors = cpu_to_le32(dev->max_msix);
++		aac_adapter_start(dev);
+ 	}
+ 	return 0;
+ 
+diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
+index 4fb3581d4614..3fa65338d3f5 100644
+--- a/drivers/scsi/lpfc/lpfc_mem.c
++++ b/drivers/scsi/lpfc/lpfc_mem.c
+@@ -231,13 +231,15 @@ lpfc_mem_free(struct lpfc_hba *phba)
+ 	if (phba->lpfc_hbq_pool)
+ 		pci_pool_destroy(phba->lpfc_hbq_pool);
+ 	phba->lpfc_hbq_pool = NULL;
+-	mempool_destroy(phba->rrq_pool);
++
++	if (phba->rrq_pool)
++		mempool_destroy(phba->rrq_pool);
+ 	phba->rrq_pool = NULL;
+ 
+ 	/* Free NLP memory pool */
+ 	mempool_destroy(phba->nlp_mem_pool);
+ 	phba->nlp_mem_pool = NULL;
+-	if (phba->sli_rev == LPFC_SLI_REV4) {
++	if (phba->sli_rev == LPFC_SLI_REV4 && phba->active_rrq_pool) {
+ 		mempool_destroy(phba->active_rrq_pool);
+ 		phba->active_rrq_pool = NULL;
+ 	}
+diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
+index dc9119e1b458..0401955b3b04 100644
+--- a/drivers/staging/rdma/hfi1/qp.c
++++ b/drivers/staging/rdma/hfi1/qp.c
+@@ -512,6 +512,7 @@ static void iowait_wakeup(struct iowait *wait, int reason)
+ static void iowait_sdma_drained(struct iowait *wait)
+ {
+ 	struct rvt_qp *qp = iowait_to_qp(wait);
++	unsigned long flags;
+ 
+ 	/*
+ 	 * This happens when the send engine notes
+@@ -519,12 +520,12 @@ static void iowait_sdma_drained(struct iowait *wait)
+ 	 * do the flush work until that QP's
+ 	 * sdma work has finished.
+ 	 */
+-	spin_lock(&qp->s_lock);
++	spin_lock_irqsave(&qp->s_lock, flags);
+ 	if (qp->s_flags & RVT_S_WAIT_DMA) {
+ 		qp->s_flags &= ~RVT_S_WAIT_DMA;
+ 		hfi1_schedule_send(qp);
+ 	}
+-	spin_unlock(&qp->s_lock);
++	spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+ 
+ /**
+diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
+index 1a7dc3c590b1..481eb2989a1e 100644
+--- a/drivers/tty/serial/ucc_uart.c
++++ b/drivers/tty/serial/ucc_uart.c
+@@ -1478,6 +1478,9 @@ static const struct of_device_id ucc_uart_match[] = {
+ 		.type = "serial",
+ 		.compatible = "ucc_uart",
+ 	},
++	{
++		.compatible = "fsl,t1040-ucc-uart",
++	},
+ 	{},
+ };
+ MODULE_DEVICE_TABLE(of, ucc_uart_match);
+diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
+index cb7138c97c69..71d49a95f8c0 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -487,7 +487,8 @@ static void eoi_pirq(struct irq_data *data)
+ 	if (!VALID_EVTCHN(evtchn))
+ 		return;
+ 
+-	if (unlikely(irqd_is_setaffinity_pending(data))) {
++	if (unlikely(irqd_is_setaffinity_pending(data)) &&
++	    likely(!irqd_irq_disabled(data))) {
+ 		int masked = test_and_set_mask(evtchn);
+ 
+ 		clear_evtchn(evtchn);
+@@ -1370,7 +1371,8 @@ static void ack_dynirq(struct irq_data *data)
+ 	if (!VALID_EVTCHN(evtchn))
+ 		return;
+ 
+-	if (unlikely(irqd_is_setaffinity_pending(data))) {
++	if (unlikely(irqd_is_setaffinity_pending(data)) &&
++	    likely(!irqd_irq_disabled(data))) {
+ 		int masked = test_and_set_mask(evtchn);
+ 
+ 		clear_evtchn(evtchn);
+diff --git a/fs/affs/super.c b/fs/affs/super.c
+index 2a6713b6b9f4..d6384863192c 100644
+--- a/fs/affs/super.c
++++ b/fs/affs/super.c
+@@ -528,7 +528,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
+ 	char			*prefix = NULL;
+ 
+ 	new_opts = kstrdup(data, GFP_KERNEL);
+-	if (!new_opts)
++	if (data && !new_opts)
+ 		return -ENOMEM;
+ 
+ 	pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
+@@ -546,7 +546,8 @@ affs_remount(struct super_block *sb, int *flags, char *data)
+ 	}
+ 
+ 	flush_delayed_work(&sbi->sb_work);
+-	replace_mount_options(sb, new_opts);
++	if (new_opts)
++		replace_mount_options(sb, new_opts);
+ 
+ 	sbi->s_flags = mount_flags;
+ 	sbi->s_mode  = mode;
+diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
+index 237b877d316d..3da4cf8d18b6 100644
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -1150,25 +1150,20 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
+ 	unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
+ 	ext4_group_t block_group;
+ 	int bit;
+-	struct buffer_head *bitmap_bh;
++	struct buffer_head *bitmap_bh = NULL;
+ 	struct inode *inode = NULL;
+-	long err = -EIO;
++	int err = -EFSCORRUPTED;
+ 
+-	/* Error cases - e2fsck has already cleaned up for us */
+-	if (ino > max_ino) {
+-		ext4_warning(sb, "bad orphan ino %lu!  e2fsck was run?", ino);
+-		err = -EFSCORRUPTED;
+-		goto error;
+-	}
++	if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
++		goto bad_orphan;
+ 
+ 	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+ 	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
+ 	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
+ 	if (IS_ERR(bitmap_bh)) {
+-		err = PTR_ERR(bitmap_bh);
+-		ext4_warning(sb, "inode bitmap error %ld for orphan %lu",
+-			     ino, err);
+-		goto error;
++		ext4_error(sb, "inode bitmap error %ld for orphan %lu",
++			   ino, PTR_ERR(bitmap_bh));
++		return (struct inode *) bitmap_bh;
+ 	}
+ 
+ 	/* Having the inode bit set should be a 100% indicator that this
+@@ -1179,15 +1174,21 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
+ 		goto bad_orphan;
+ 
+ 	inode = ext4_iget(sb, ino);
+-	if (IS_ERR(inode))
+-		goto iget_failed;
++	if (IS_ERR(inode)) {
++		err = PTR_ERR(inode);
++		ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
++			   ino, err);
++		return inode;
++	}
+ 
+ 	/*
+-	 * If the orphans has i_nlinks > 0 then it should be able to be
+-	 * truncated, otherwise it won't be removed from the orphan list
+-	 * during processing and an infinite loop will result.
++	 * If the orphans has i_nlinks > 0 then it should be able to
++	 * be truncated, otherwise it won't be removed from the orphan
++	 * list during processing and an infinite loop will result.
++	 * Similarly, it must not be a bad inode.
+ 	 */
+-	if (inode->i_nlink && !ext4_can_truncate(inode))
++	if ((inode->i_nlink && !ext4_can_truncate(inode)) ||
++	    is_bad_inode(inode))
+ 		goto bad_orphan;
+ 
+ 	if (NEXT_ORPHAN(inode) > max_ino)
+@@ -1195,29 +1196,25 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
+ 	brelse(bitmap_bh);
+ 	return inode;
+ 
+-iget_failed:
+-	err = PTR_ERR(inode);
+-	inode = NULL;
+ bad_orphan:
+-	ext4_warning(sb, "bad orphan inode %lu!  e2fsck was run?", ino);
+-	printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
+-	       bit, (unsigned long long)bitmap_bh->b_blocknr,
+-	       ext4_test_bit(bit, bitmap_bh->b_data));
+-	printk(KERN_WARNING "inode=%p\n", inode);
++	ext4_error(sb, "bad orphan inode %lu", ino);
++	if (bitmap_bh)
++		printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n",
++		       bit, (unsigned long long)bitmap_bh->b_blocknr,
++		       ext4_test_bit(bit, bitmap_bh->b_data));
+ 	if (inode) {
+-		printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
++		printk(KERN_ERR "is_bad_inode(inode)=%d\n",
+ 		       is_bad_inode(inode));
+-		printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
++		printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n",
+ 		       NEXT_ORPHAN(inode));
+-		printk(KERN_WARNING "max_ino=%lu\n", max_ino);
+-		printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
++		printk(KERN_ERR "max_ino=%lu\n", max_ino);
++		printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink);
+ 		/* Avoid freeing blocks if we got a bad deleted inode */
+ 		if (inode->i_nlink == 0)
+ 			inode->i_blocks = 0;
+ 		iput(inode);
+ 	}
+ 	brelse(bitmap_bh);
+-error:
+ 	return ERR_PTR(err);
+ }
+ 
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 981a1fc30eaa..250c2df04a92 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -684,6 +684,21 @@ out_sem:
+ 		ret = check_block_validity(inode, map);
+ 		if (ret != 0)
+ 			return ret;
++
++		/*
++		 * Inodes with freshly allocated blocks where contents will be
++		 * visible after transaction commit must be on transaction's
++		 * ordered data list.
++		 */
++		if (map->m_flags & EXT4_MAP_NEW &&
++		    !(map->m_flags & EXT4_MAP_UNWRITTEN) &&
++		    !(flags & EXT4_GET_BLOCKS_ZERO) &&
++		    !IS_NOQUOTA(inode) &&
++		    ext4_should_order_data(inode)) {
++			ret = ext4_jbd2_file_inode(handle, inode);
++			if (ret)
++				return ret;
++		}
+ 	}
+ 	return retval;
+ }
+@@ -1289,15 +1304,6 @@ static int ext4_write_end(struct file *file,
+ 	int i_size_changed = 0;
+ 
+ 	trace_ext4_write_end(inode, pos, len, copied);
+-	if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) {
+-		ret = ext4_jbd2_file_inode(handle, inode);
+-		if (ret) {
+-			unlock_page(page);
+-			put_page(page);
+-			goto errout;
+-		}
+-	}
+-
+ 	if (ext4_has_inline_data(inode)) {
+ 		ret = ext4_write_inline_data_end(inode, pos, len,
+ 						 copied, page);
+diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
+index eae5917c534e..0acf8cacb2be 100644
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -365,7 +365,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+ 		struct dquot *transfer_to[MAXQUOTAS] = { };
+ 
+ 		transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+-		if (transfer_to[PRJQUOTA]) {
++		if (!IS_ERR(transfer_to[PRJQUOTA])) {
+ 			err = __dquot_transfer(inode, transfer_to);
+ 			dqput(transfer_to[PRJQUOTA]);
+ 			if (err)
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index eeeade76012e..9d26fa2188f6 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -1266,6 +1266,7 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
+ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
+ {
+ 	int order = 1;
++	int bb_incr = 1 << (e4b->bd_blkbits - 1);
+ 	void *bb;
+ 
+ 	BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
+@@ -1278,7 +1279,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
+ 			/* this block is part of buddy of order 'order' */
+ 			return order;
+ 		}
+-		bb += 1 << (e4b->bd_blkbits - order);
++		bb += bb_incr;
++		bb_incr >>= 1;
+ 		order++;
+ 	}
+ 	return 0;
+@@ -2583,7 +2585,7 @@ int ext4_mb_init(struct super_block *sb)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	unsigned i, j;
+-	unsigned offset;
++	unsigned offset, offset_incr;
+ 	unsigned max;
+ 	int ret;
+ 
+@@ -2612,11 +2614,13 @@ int ext4_mb_init(struct super_block *sb)
+ 
+ 	i = 1;
+ 	offset = 0;
++	offset_incr = 1 << (sb->s_blocksize_bits - 1);
+ 	max = sb->s_blocksize << 2;
+ 	do {
+ 		sbi->s_mb_offsets[i] = offset;
+ 		sbi->s_mb_maxs[i] = max;
+-		offset += 1 << (sb->s_blocksize_bits - i);
++		offset += offset_incr;
++		offset_incr = offset_incr >> 1;
+ 		max = max >> 1;
+ 		i++;
+ 	} while (i <= sb->s_blocksize_bits + 1);
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 48e4b8907826..fdd151f91522 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -2828,7 +2828,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
+ 			 * list entries can cause panics at unmount time.
+ 			 */
+ 			mutex_lock(&sbi->s_orphan_lock);
+-			list_del(&EXT4_I(inode)->i_orphan);
++			list_del_init(&EXT4_I(inode)->i_orphan);
+ 			mutex_unlock(&sbi->s_orphan_lock);
+ 		}
+ 	}
+diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
+index 5dafb9cef12e..1ca31c8e27ed 100644
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -1480,7 +1480,8 @@ restart:
+ 		if (pos + len <= MAX_INLINE_DATA) {
+ 			read_inline_data(page, ipage);
+ 			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+-			set_inline_node(ipage);
++			if (inode->i_nlink)
++				set_inline_node(ipage);
+ 		} else {
+ 			err = f2fs_convert_inline_page(&dn, page);
+ 			if (err)
+diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
+index 4aaed890048f..776dccbc306d 100644
+--- a/fs/nfs/pnfs_nfs.c
++++ b/fs/nfs/pnfs_nfs.c
+@@ -246,6 +246,23 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
+ 
+ }
+ 
++/* Helper function for pnfs_generic_commit_pagelist to catch an empty
++ * page list. This can happen when two commits race. */
++static bool
++pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
++					  struct nfs_commit_data *data,
++					  struct nfs_commit_info *cinfo)
++{
++	if (list_empty(pages)) {
++		if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
++			wake_up_atomic_t(&cinfo->mds->rpcs_out);
++		nfs_commitdata_release(data);
++		return true;
++	}
++
++	return false;
++}
++
+ /* This follows nfs_commit_list pretty closely */
+ int
+ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
+@@ -280,6 +297,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
+ 	list_for_each_entry_safe(data, tmp, &list, pages) {
+ 		list_del_init(&data->pages);
+ 		if (data->ds_commit_index < 0) {
++			/* another commit raced with us */
++			if (pnfs_generic_commit_cancel_empty_pagelist(mds_pages,
++				data, cinfo))
++				continue;
++
+ 			nfs_init_commit(data, mds_pages, NULL, cinfo);
+ 			nfs_initiate_commit(NFS_CLIENT(inode), data,
+ 					    NFS_PROTO(data->inode),
+@@ -288,6 +310,12 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
+ 			LIST_HEAD(pages);
+ 
+ 			pnfs_fetch_commit_bucket_list(&pages, data, cinfo);
++
++			/* another commit raced with us */
++			if (pnfs_generic_commit_cancel_empty_pagelist(&pages,
++				data, cinfo))
++				continue;
++
+ 			nfs_init_commit(data, &pages, data->lseg, cinfo);
+ 			initiate_commit(data, how);
+ 		}
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index 5f4fd53e5764..f5e613395fc2 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -1709,6 +1709,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
+ {
+ 	struct nfs_commit_data	*data;
+ 
++	/* another commit raced with us */
++	if (list_empty(head))
++		return 0;
++
+ 	data = nfs_commitdata_alloc();
+ 
+ 	if (!data)
+diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
+index ee3aaa0a5317..ca0d3eb44925 100644
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -243,8 +243,8 @@ xfs_growfs_data_private(
+ 		agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
+ 		agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
+ 		agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+-		agf->agf_flfirst = 0;
+-		agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
++		agf->agf_flfirst = cpu_to_be32(1);
++		agf->agf_fllast = 0;
+ 		agf->agf_flcount = 0;
+ 		tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
+ 		agf->agf_freeblks = cpu_to_be32(tmpsize);
+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
+index 96f606deee31..3cbc9031731b 100644
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -3205,13 +3205,14 @@ xfs_iflush_cluster(
+ 		 * We need to check under the i_flags_lock for a valid inode
+ 		 * here. Skip it if it is not valid or the wrong inode.
+ 		 */
+-		spin_lock(&ip->i_flags_lock);
+-		if (!ip->i_ino ||
++		spin_lock(&iq->i_flags_lock);
++		if (!iq->i_ino ||
++		    __xfs_iflags_test(iq, XFS_ISTALE) ||
+ 		    (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
+-			spin_unlock(&ip->i_flags_lock);
++			spin_unlock(&iq->i_flags_lock);
+ 			continue;
+ 		}
+-		spin_unlock(&ip->i_flags_lock);
++		spin_unlock(&iq->i_flags_lock);
+ 
+ 		/*
+ 		 * Do an un-protected check to see if the inode is dirty and
+@@ -3327,7 +3328,7 @@ xfs_iflush(
+ 	struct xfs_buf		**bpp)
+ {
+ 	struct xfs_mount	*mp = ip->i_mount;
+-	struct xfs_buf		*bp;
++	struct xfs_buf		*bp = NULL;
+ 	struct xfs_dinode	*dip;
+ 	int			error;
+ 
+@@ -3369,14 +3370,22 @@ xfs_iflush(
+ 	}
+ 
+ 	/*
+-	 * Get the buffer containing the on-disk inode.
++	 * Get the buffer containing the on-disk inode. We are doing a try-lock
++	 * operation here, so we may get  an EAGAIN error. In that case, we
++	 * simply want to return with the inode still dirty.
++	 *
++	 * If we get any other error, we effectively have a corruption situation
++	 * and we cannot flush the inode, so we treat it the same as failing
++	 * xfs_iflush_int().
+ 	 */
+ 	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
+ 			       0);
+-	if (error || !bp) {
++	if (error == -EAGAIN) {
+ 		xfs_ifunlock(ip);
+ 		return error;
+ 	}
++	if (error)
++		goto corrupt_out;
+ 
+ 	/*
+ 	 * First flush out the inode that xfs_iflush was called with.
+@@ -3404,7 +3413,8 @@ xfs_iflush(
+ 	return 0;
+ 
+ corrupt_out:
+-	xfs_buf_relse(bp);
++	if (bp)
++		xfs_buf_relse(bp);
+ 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ cluster_corrupt_out:
+ 	error = -EFSCORRUPTED;
+diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
+index 187e14b696c2..cedf601ca0fe 100644
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -928,7 +928,7 @@ xfs_fs_alloc_inode(
+ 
+ /*
+  * Now that the generic code is guaranteed not to be accessing
+- * the linux inode, we can reclaim the inode.
++ * the linux inode, we can inactivate and reclaim the inode.
+  */
+ STATIC void
+ xfs_fs_destroy_inode(
+@@ -938,9 +938,14 @@ xfs_fs_destroy_inode(
+ 
+ 	trace_xfs_destroy_inode(ip);
+ 
+-	XFS_STATS_INC(ip->i_mount, vn_reclaim);
++	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
++	XFS_STATS_INC(ip->i_mount, vn_rele);
++	XFS_STATS_INC(ip->i_mount, vn_remove);
++
++	xfs_inactive(ip);
+ 
+ 	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
++	XFS_STATS_INC(ip->i_mount, vn_reclaim);
+ 
+ 	/*
+ 	 * We should never get here with one of the reclaim flags already set.
+@@ -987,24 +992,6 @@ xfs_fs_inode_init_once(
+ 		     "xfsino", ip->i_ino);
+ }
+ 
+-STATIC void
+-xfs_fs_evict_inode(
+-	struct inode		*inode)
+-{
+-	xfs_inode_t		*ip = XFS_I(inode);
+-
+-	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+-
+-	trace_xfs_evict_inode(ip);
+-
+-	truncate_inode_pages_final(&inode->i_data);
+-	clear_inode(inode);
+-	XFS_STATS_INC(ip->i_mount, vn_rele);
+-	XFS_STATS_INC(ip->i_mount, vn_remove);
+-
+-	xfs_inactive(ip);
+-}
+-
+ /*
+  * We do an unlocked check for XFS_IDONTCACHE here because we are already
+  * serialised against cache hits here via the inode->i_lock and igrab() in
+@@ -1276,6 +1263,16 @@ xfs_fs_remount(
+ 			return -EINVAL;
+ 		}
+ 
++		if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
++		    xfs_sb_has_ro_compat_feature(sbp,
++					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
++			xfs_warn(mp,
++"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
++				(sbp->sb_features_ro_compat &
++					XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
++			return -EINVAL;
++		}
++
+ 		mp->m_flags &= ~XFS_MOUNT_RDONLY;
+ 
+ 		/*
+@@ -1663,7 +1660,6 @@ xfs_fs_free_cached_objects(
+ static const struct super_operations xfs_super_operations = {
+ 	.alloc_inode		= xfs_fs_alloc_inode,
+ 	.destroy_inode		= xfs_fs_destroy_inode,
+-	.evict_inode		= xfs_fs_evict_inode,
+ 	.drop_inode		= xfs_fs_drop_inode,
+ 	.put_super		= xfs_fs_put_super,
+ 	.sync_fs		= xfs_fs_sync_fs,
+diff --git a/include/drm/drm_dp_dual_mode_helper.h b/include/drm/drm_dp_dual_mode_helper.h
+new file mode 100644
+index 000000000000..e8a9dfd0e055
+--- /dev/null
++++ b/include/drm/drm_dp_dual_mode_helper.h
+@@ -0,0 +1,92 @@
++/*
++ * Copyright © 2016 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef DRM_DP_DUAL_MODE_HELPER_H
++#define DRM_DP_DUAL_MODE_HELPER_H
++
++#include <linux/types.h>
++
++/*
++ * Optional for type 1 DVI adaptors
++ * Mandatory for type 1 HDMI and type 2 adaptors
++ */
++#define DP_DUAL_MODE_HDMI_ID 0x00 /* 00-0f */
++#define  DP_DUAL_MODE_HDMI_ID_LEN 16
++/*
++ * Optional for type 1 adaptors
++ * Mandatory for type 2 adaptors
++ */
++#define DP_DUAL_MODE_ADAPTOR_ID 0x10
++#define  DP_DUAL_MODE_REV_MASK 0x07
++#define  DP_DUAL_MODE_REV_TYPE2 0x00
++#define  DP_DUAL_MODE_TYPE_MASK 0xf0
++#define  DP_DUAL_MODE_TYPE_TYPE2 0xa0
++#define DP_DUAL_MODE_IEEE_OUI 0x11 /* 11-13*/
++#define  DP_DUAL_IEEE_OUI_LEN 3
++#define DP_DUAL_DEVICE_ID 0x14 /* 14-19 */
++#define  DP_DUAL_DEVICE_ID_LEN 6
++#define DP_DUAL_MODE_HARDWARE_REV 0x1a
++#define DP_DUAL_MODE_FIRMWARE_MAJOR_REV 0x1b
++#define DP_DUAL_MODE_FIRMWARE_MINOR_REV 0x1c
++#define DP_DUAL_MODE_MAX_TMDS_CLOCK 0x1d
++#define DP_DUAL_MODE_I2C_SPEED_CAP 0x1e
++#define DP_DUAL_MODE_TMDS_OEN 0x20
++#define  DP_DUAL_MODE_TMDS_DISABLE 0x01
++#define DP_DUAL_MODE_HDMI_PIN_CTRL 0x21
++#define  DP_DUAL_MODE_CEC_ENABLE 0x01
++#define DP_DUAL_MODE_I2C_SPEED_CTRL 0x22
++
++struct i2c_adapter;
++
++ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter,
++			      u8 offset, void *buffer, size_t size);
++ssize_t drm_dp_dual_mode_write(struct i2c_adapter *adapter,
++			       u8 offset, const void *buffer, size_t size);
++
++/**
++ * enum drm_dp_dual_mode_type - Type of the DP dual mode adaptor
++ * @DRM_DP_DUAL_MODE_NONE: No DP dual mode adaptor
++ * @DRM_DP_DUAL_MODE_UNKNOWN: Could be either none or type 1 DVI adaptor
++ * @DRM_DP_DUAL_MODE_TYPE1_DVI: Type 1 DVI adaptor
++ * @DRM_DP_DUAL_MODE_TYPE1_HDMI: Type 1 HDMI adaptor
++ * @DRM_DP_DUAL_MODE_TYPE2_DVI: Type 2 DVI adaptor
++ * @DRM_DP_DUAL_MODE_TYPE2_HDMI: Type 2 HDMI adaptor
++ */
++enum drm_dp_dual_mode_type {
++	DRM_DP_DUAL_MODE_NONE,
++	DRM_DP_DUAL_MODE_UNKNOWN,
++	DRM_DP_DUAL_MODE_TYPE1_DVI,
++	DRM_DP_DUAL_MODE_TYPE1_HDMI,
++	DRM_DP_DUAL_MODE_TYPE2_DVI,
++	DRM_DP_DUAL_MODE_TYPE2_HDMI,
++};
++
++enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(struct i2c_adapter *adapter);
++int drm_dp_dual_mode_max_tmds_clock(enum drm_dp_dual_mode_type type,
++				    struct i2c_adapter *adapter);
++int drm_dp_dual_mode_get_tmds_output(enum drm_dp_dual_mode_type type,
++				     struct i2c_adapter *adapter, bool *enabled);
++int drm_dp_dual_mode_set_tmds_output(enum drm_dp_dual_mode_type type,
++				     struct i2c_adapter *adapter, bool enable);
++const char *drm_dp_get_dual_mode_type_name(enum drm_dp_dual_mode_type type);
++
++#endif
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 8f468e0d2534..02f7f31e540f 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1782,7 +1782,7 @@ extern void free_highmem_page(struct page *page);
+ extern void adjust_managed_page_count(struct page *page, long count);
+ extern void mem_init_print_info(const char *str);
+ 
+-extern void reserve_bootmem_region(unsigned long start, unsigned long end);
++extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
+ 
+ /* Free the reserved page into the buddy system, so it gets managed. */
+ static inline void __free_reserved_page(struct page *page)
+diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
+index ad66589f2ae6..3a2a79401789 100644
+--- a/include/video/imx-ipu-v3.h
++++ b/include/video/imx-ipu-v3.h
+@@ -16,6 +16,7 @@
+ #include <linux/videodev2.h>
+ #include <linux/bitmap.h>
+ #include <linux/fb.h>
++#include <linux/of.h>
+ #include <media/v4l2-mediabus.h>
+ #include <video/videomode.h>
+ 
+@@ -345,6 +346,7 @@ struct ipu_client_platformdata {
+ 	int dc;
+ 	int dp;
+ 	int dma[2];
++	struct device_node *of_node;
+ };
+ 
+ #endif /* __DRM_IPU_H__ */
+diff --git a/kernel/exit.c b/kernel/exit.c
+index fd90195667e1..79c7e38a203b 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -918,17 +918,28 @@ static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
+ 		task_pid_type(p, wo->wo_type) == wo->wo_pid;
+ }
+ 
+-static int eligible_child(struct wait_opts *wo, struct task_struct *p)
++static int
++eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p)
+ {
+ 	if (!eligible_pid(wo, p))
+ 		return 0;
+-	/* Wait for all children (clone and not) if __WALL is set;
+-	 * otherwise, wait for clone children *only* if __WCLONE is
+-	 * set; otherwise, wait for non-clone children *only*.  (Note:
+-	 * A "clone" child here is one that reports to its parent
+-	 * using a signal other than SIGCHLD.) */
+-	if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
+-	    && !(wo->wo_flags & __WALL))
++
++	/*
++	 * Wait for all children (clone and not) if __WALL is set or
++	 * if it is traced by us.
++	 */
++	if (ptrace || (wo->wo_flags & __WALL))
++		return 1;
++
++	/*
++	 * Otherwise, wait for clone children *only* if __WCLONE is set;
++	 * otherwise, wait for non-clone children *only*.
++	 *
++	 * Note: a "clone" child here is one that reports to its parent
++	 * using a signal other than SIGCHLD, or a non-leader thread which
++	 * we can only see if it is traced by us.
++	 */
++	if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
+ 		return 0;
+ 
+ 	return 1;
+@@ -1300,7 +1311,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
+ 	if (unlikely(exit_state == EXIT_DEAD))
+ 		return 0;
+ 
+-	ret = eligible_child(wo, p);
++	ret = eligible_child(wo, ptrace, p);
+ 	if (!ret)
+ 		return ret;
+ 
+diff --git a/lib/dma-debug.c b/lib/dma-debug.c
+index 4a1515f4b452..51a76af25c66 100644
+--- a/lib/dma-debug.c
++++ b/lib/dma-debug.c
+@@ -657,9 +657,9 @@ static struct dma_debug_entry *dma_entry_alloc(void)
+ 	spin_lock_irqsave(&free_entries_lock, flags);
+ 
+ 	if (list_empty(&free_entries)) {
+-		pr_err("DMA-API: debugging out of memory - disabling\n");
+ 		global_disable = true;
+ 		spin_unlock_irqrestore(&free_entries_lock, flags);
++		pr_err("DMA-API: debugging out of memory - disabling\n");
+ 		return NULL;
+ 	}
+ 
+diff --git a/mm/compaction.c b/mm/compaction.c
+index 8fa254043801..f8e925eb479b 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1742,7 +1742,7 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
+ 	struct zone *zone;
+ 	enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
+ 
+-	for (zoneid = 0; zoneid < classzone_idx; zoneid++) {
++	for (zoneid = 0; zoneid <= classzone_idx; zoneid++) {
+ 		zone = &pgdat->node_zones[zoneid];
+ 
+ 		if (!populated_zone(zone))
+@@ -1777,7 +1777,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
+ 							cc.classzone_idx);
+ 	count_vm_event(KCOMPACTD_WAKE);
+ 
+-	for (zoneid = 0; zoneid < cc.classzone_idx; zoneid++) {
++	for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) {
+ 		int status;
+ 
+ 		zone = &pgdat->node_zones[zoneid];
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index c1069efcc4d7..898fe3f909f9 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1003,7 +1003,7 @@ static inline void init_reserved_page(unsigned long pfn)
+  * marks the pages PageReserved. The remaining valid pages are later
+  * sent to the buddy page allocator.
+  */
+-void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
++void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
+ {
+ 	unsigned long start_pfn = PFN_DOWN(start);
+ 	unsigned long end_pfn = PFN_UP(end);
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 307b555024ef..3ebf9c4c2f4d 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1098,6 +1098,8 @@ void page_move_anon_rmap(struct page *page,
+ 
+ 	VM_BUG_ON_PAGE(!PageLocked(page), page);
+ 	VM_BUG_ON_VMA(!anon_vma, vma);
++	if (IS_ENABLED(CONFIG_DEBUG_VM) && PageTransHuge(page))
++		address &= HPAGE_PMD_MASK;
+ 	VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
+ 
+ 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
+diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
+index d9bcbe6e7d65..91df28a100f9 100644
+--- a/net/batman-adv/bat_v_ogm.c
++++ b/net/batman-adv/bat_v_ogm.c
+@@ -529,8 +529,10 @@ static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
+ 		goto out;
+ 	}
+ 
+-	if (router)
++	if (router) {
+ 		batadv_neigh_node_put(router);
++		router = NULL;
++	}
+ 
+ 	/* Update routes, and check if the OGM is from the best next hop */
+ 	batadv_v_ogm_orig_update(bat_priv, orig_node, neigh_node, ogm2,
+diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
+index 8a4cc2f7f0db..e45cb9155039 100644
+--- a/net/bluetooth/6lowpan.c
++++ b/net/bluetooth/6lowpan.c
+@@ -431,15 +431,18 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
+ 			bdaddr_t *peer_addr, u8 *peer_addr_type)
+ {
+ 	struct in6_addr ipv6_daddr;
++	struct ipv6hdr *hdr;
+ 	struct lowpan_dev *dev;
+ 	struct lowpan_peer *peer;
+ 	bdaddr_t addr, *any = BDADDR_ANY;
+ 	u8 *daddr = any->b;
+ 	int err, status = 0;
+ 
++	hdr = ipv6_hdr(skb);
++
+ 	dev = lowpan_dev(netdev);
+ 
+-	memcpy(&ipv6_daddr, &lowpan_cb(skb)->addr, sizeof(ipv6_daddr));
++	memcpy(&ipv6_daddr, &hdr->daddr, sizeof(ipv6_daddr));
+ 
+ 	if (ipv6_addr_is_multicast(&ipv6_daddr)) {
+ 		lowpan_cb(skb)->chan = NULL;
+@@ -489,15 +492,9 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev,
+ 			 unsigned short type, const void *_daddr,
+ 			 const void *_saddr, unsigned int len)
+ {
+-	struct ipv6hdr *hdr;
+-
+ 	if (type != ETH_P_IPV6)
+ 		return -EINVAL;
+ 
+-	hdr = ipv6_hdr(skb);
+-
+-	memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, sizeof(struct in6_addr));
+-
+ 	return 0;
+ }
+ 
+diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
+index 1095be9c80ab..4605dc73def6 100644
+--- a/net/sunrpc/auth_gss/svcauth_gss.c
++++ b/net/sunrpc/auth_gss/svcauth_gss.c
+@@ -857,8 +857,8 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
+ 		goto out;
+ 	if (svc_getnl(&buf->head[0]) != seq)
+ 		goto out;
+-	/* trim off the mic at the end before returning */
+-	xdr_buf_trim(buf, mic.len + 4);
++	/* trim off the mic and padding at the end before returning */
++	xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
+ 	stat = 0;
+ out:
+ 	kfree(mic.data);
+diff --git a/scripts/package/Makefile b/scripts/package/Makefile
+index c2c7389bfbab..71b4a8af9d4d 100644
+--- a/scripts/package/Makefile
++++ b/scripts/package/Makefile
+@@ -52,7 +52,7 @@ rpm-pkg rpm: FORCE
+ 	$(call cmd,src_tar,$(KERNELPATH),kernel.spec)
+ 	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
+ 	mv -f $(objtree)/.tmp_version $(objtree)/.version
+-	rpmbuild --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
++	rpmbuild $(RPMOPTS) --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
+ 	rm $(KERNELPATH).tar.gz kernel.spec
+ 
+ # binrpm-pkg
+@@ -63,7 +63,7 @@ binrpm-pkg: FORCE
+ 	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
+ 	mv -f $(objtree)/.tmp_version $(objtree)/.version
+ 
+-	rpmbuild --define "_builddir $(objtree)" --target \
++	rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \
+ 		$(UTS_MACHINE) -bb $(objtree)/binkernel.spec
+ 	rm binkernel.spec
+ 
+diff --git a/sound/soc/codecs/ak4642.c b/sound/soc/codecs/ak4642.c
+index cda27c22812a..eb8fe212e163 100644
+--- a/sound/soc/codecs/ak4642.c
++++ b/sound/soc/codecs/ak4642.c
+@@ -560,6 +560,7 @@ static const struct regmap_config ak4642_regmap = {
+ 	.max_register		= FIL1_3,
+ 	.reg_defaults		= ak4642_reg,
+ 	.num_reg_defaults	= NUM_AK4642_REG_DEFAULTS,
++	.cache_type		= REGCACHE_RBTREE,
+ };
+ 
+ static const struct regmap_config ak4643_regmap = {
+@@ -568,6 +569,7 @@ static const struct regmap_config ak4643_regmap = {
+ 	.max_register		= SPK_MS,
+ 	.reg_defaults		= ak4643_reg,
+ 	.num_reg_defaults	= ARRAY_SIZE(ak4643_reg),
++	.cache_type		= REGCACHE_RBTREE,
+ };
+ 
+ static const struct regmap_config ak4648_regmap = {
+@@ -576,6 +578,7 @@ static const struct regmap_config ak4648_regmap = {
+ 	.max_register		= EQ_FBEQE,
+ 	.reg_defaults		= ak4648_reg,
+ 	.num_reg_defaults	= ARRAY_SIZE(ak4648_reg),
++	.cache_type		= REGCACHE_RBTREE,
+ };
+ 
+ static const struct ak4642_drvdata ak4642_drvdata = {


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-06-24 20:41 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-06-24 20:41 UTC (permalink / raw
  To: gentoo-commits

commit:     a36656eef665f797c9827b7866e5c9894656770c
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Fri Jun 24 20:41:39 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Fri Jun 24 20:41:39 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=a36656ee

Linux patch 4.6.3

 0000_README            |    4 +
 1002_linux-4.6.3.patch | 4713 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 4717 insertions(+)

diff --git a/0000_README b/0000_README
index 61071b1..8feba45 100644
--- a/0000_README
+++ b/0000_README
@@ -51,6 +51,10 @@ Patch:  1001_linux-4.6.2.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.6.2
 
+Patch:  1002_linux-4.6.3.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.6.3
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1002_linux-4.6.3.patch b/1002_linux-4.6.3.patch
new file mode 100644
index 0000000..e81ef72
--- /dev/null
+++ b/1002_linux-4.6.3.patch
@@ -0,0 +1,4713 @@
+diff --git a/Makefile b/Makefile
+index 93068c2d0656..c62b531d5a85 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 6
+-SUBLEVEL = 2
++SUBLEVEL = 3
+ EXTRAVERSION =
+ NAME = Charred Weasel
+ 
+diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
+index ef9119f7462e..4d9375814b53 100644
+--- a/arch/arm/kernel/ptrace.c
++++ b/arch/arm/kernel/ptrace.c
+@@ -733,8 +733,8 @@ static int vfp_set(struct task_struct *target,
+ 	if (ret)
+ 		return ret;
+ 
+-	vfp_flush_hwstate(thread);
+ 	thread->vfpstate.hard = new_vfp;
++	vfp_flush_hwstate(thread);
+ 
+ 	return 0;
+ }
+diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
+index 24ed037f09fd..83d48a599f69 100644
+--- a/arch/arm64/include/asm/elf.h
++++ b/arch/arm64/include/asm/elf.h
+@@ -160,14 +160,14 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+ #define STACK_RND_MASK			(0x3ffff >> (PAGE_SHIFT - 12))
+ #endif
+ 
+-#ifdef CONFIG_COMPAT
+-
+ #ifdef __AARCH64EB__
+ #define COMPAT_ELF_PLATFORM		("v8b")
+ #else
+ #define COMPAT_ELF_PLATFORM		("v8l")
+ #endif
+ 
++#ifdef CONFIG_COMPAT
++
+ #define COMPAT_ELF_ET_DYN_BASE		(2 * TASK_SIZE_32 / 3)
+ 
+ /* AArch32 registers. */
+diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
+index f0c3fb7ec8cf..2d2d7cb04a5d 100644
+--- a/arch/arm64/kernel/cpuinfo.c
++++ b/arch/arm64/kernel/cpuinfo.c
+@@ -22,6 +22,8 @@
+ 
+ #include <linux/bitops.h>
+ #include <linux/bug.h>
++#include <linux/compat.h>
++#include <linux/elf.h>
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/personality.h>
+@@ -104,6 +106,7 @@ static const char *const compat_hwcap2_str[] = {
+ static int c_show(struct seq_file *m, void *v)
+ {
+ 	int i, j;
++	bool compat = personality(current->personality) == PER_LINUX32;
+ 
+ 	for_each_online_cpu(i) {
+ 		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
+@@ -115,6 +118,9 @@ static int c_show(struct seq_file *m, void *v)
+ 		 * "processor".  Give glibc what it expects.
+ 		 */
+ 		seq_printf(m, "processor\t: %d\n", i);
++		if (compat)
++			seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n",
++				   MIDR_REVISION(midr), COMPAT_ELF_PLATFORM);
+ 
+ 		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+ 			   loops_per_jiffy / (500000UL/HZ),
+@@ -127,7 +133,7 @@ static int c_show(struct seq_file *m, void *v)
+ 		 * software which does already (at least for 32-bit).
+ 		 */
+ 		seq_puts(m, "Features\t:");
+-		if (personality(current->personality) == PER_LINUX32) {
++		if (compat) {
+ #ifdef CONFIG_COMPAT
+ 			for (j = 0; compat_hwcap_str[j]; j++)
+ 				if (compat_elf_hwcap & (1 << j))
+diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
+index fff7cd42b3a3..3129df9d3a73 100644
+--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
++++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
+@@ -190,12 +190,11 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
+ 			if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
+ 				continue;
+ 
+-			if (cpu_if->vgic_elrsr & (1 << i)) {
++			if (cpu_if->vgic_elrsr & (1 << i))
+ 				cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
+-				continue;
+-			}
++			else
++				cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
+ 
+-			cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
+ 			__gic_v3_set_lr(0, i);
+ 		}
+ 
+diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
+index 3ae4a28c4aed..10b79e9e87d1 100644
+--- a/arch/arm64/mm/fault.c
++++ b/arch/arm64/mm/fault.c
+@@ -109,7 +109,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
+ 	 * PTE_RDONLY is cleared by default in the asm below, so set it in
+ 	 * back if necessary (read-only or clean PTE).
+ 	 */
+-	if (!pte_write(entry) || !dirty)
++	if (!pte_write(entry) || !pte_sw_dirty(entry))
+ 		pte_val(entry) |= PTE_RDONLY;
+ 
+ 	/*
+diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
+index d7c0acb35ec2..8d49614d600d 100644
+--- a/arch/parisc/kernel/unaligned.c
++++ b/arch/parisc/kernel/unaligned.c
+@@ -666,7 +666,7 @@ void handle_unaligned(struct pt_regs *regs)
+ 		break;
+ 	}
+ 
+-	if (modify && R1(regs->iir))
++	if (ret == 0 && modify && R1(regs->iir))
+ 		regs->gr[R1(regs->iir)] = newbase;
+ 
+ 
+@@ -677,6 +677,14 @@ void handle_unaligned(struct pt_regs *regs)
+ 
+ 	if (ret)
+ 	{
++		/*
++		 * The unaligned handler failed.
++		 * If we were called by __get_user() or __put_user() jump
++		 * to it's exception fixup handler instead of crashing.
++		 */
++		if (!user_mode(regs) && fixup_exception(regs))
++			return;
++
+ 		printk(KERN_CRIT "Unaligned handler failed, ret = %d\n", ret);
+ 		die_if_kernel("Unaligned data reference", regs, 28);
+ 
+diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
+index f5f4c66bbbc9..166d8631747f 100644
+--- a/arch/powerpc/include/asm/reg.h
++++ b/arch/powerpc/include/asm/reg.h
+@@ -715,7 +715,7 @@
+ #define   MMCR0_FCWAIT	0x00000002UL /* freeze counter in WAIT state */
+ #define   MMCR0_FCHV	0x00000001UL /* freeze conditions in hypervisor mode */
+ #define SPRN_MMCR1	798
+-#define SPRN_MMCR2	769
++#define SPRN_MMCR2	785
+ #define SPRN_MMCRA	0x312
+ #define   MMCRA_SDSYNC	0x80000000UL /* SDAR synced with SIAR */
+ #define   MMCRA_SDAR_DCACHE_MISS 0x40000000UL
+@@ -752,13 +752,13 @@
+ #define SPRN_PMC6	792
+ #define SPRN_PMC7	793
+ #define SPRN_PMC8	794
+-#define SPRN_SIAR	780
+-#define SPRN_SDAR	781
+ #define SPRN_SIER	784
+ #define   SIER_SIPR		0x2000000	/* Sampled MSR_PR */
+ #define   SIER_SIHV		0x1000000	/* Sampled MSR_HV */
+ #define   SIER_SIAR_VALID	0x0400000	/* SIAR contents valid */
+ #define   SIER_SDAR_VALID	0x0200000	/* SDAR contents valid */
++#define SPRN_SIAR	796
++#define SPRN_SDAR	797
+ #define SPRN_TACR	888
+ #define SPRN_TCSCR	889
+ #define SPRN_CSIGR	890
+diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
+index da5192590c44..ccd2037c797f 100644
+--- a/arch/powerpc/kernel/prom_init.c
++++ b/arch/powerpc/kernel/prom_init.c
+@@ -656,6 +656,7 @@ unsigned char ibm_architecture_vec[] = {
+ 	W(0xffff0000), W(0x003e0000),	/* POWER6 */
+ 	W(0xffff0000), W(0x003f0000),	/* POWER7 */
+ 	W(0xffff0000), W(0x004b0000),	/* POWER8E */
++	W(0xffff0000), W(0x004c0000),   /* POWER8NVL */
+ 	W(0xffff0000), W(0x004d0000),	/* POWER8 */
+ 	W(0xffffffff), W(0x0f000004),	/* all 2.07-compliant */
+ 	W(0xffffffff), W(0x0f000003),	/* all 2.06-compliant */
+diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
+index 7635b1c6b5da..f4acba25fa5e 100644
+--- a/arch/powerpc/mm/hash_utils_64.c
++++ b/arch/powerpc/mm/hash_utils_64.c
+@@ -159,6 +159,19 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
+ 	},
+ };
+ 
++/*
++ * 'R' and 'C' update notes:
++ *  - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
++ *     create writeable HPTEs without C set, because the hcall H_PROTECT
++ *     that we use in that case will not update C
++ *  - The above is however not a problem, because we also don't do that
++ *     fancy "no flush" variant of eviction and we use H_REMOVE which will
++ *     do the right thing and thus we don't have the race I described earlier
++ *
++ *    - Under bare metal,  we do have the race, so we need R and C set
++ *    - We make sure R is always set and never lost
++ *    - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping
++ */
+ unsigned long htab_convert_pte_flags(unsigned long pteflags)
+ {
+ 	unsigned long rflags = 0;
+@@ -180,9 +193,14 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
+ 			rflags |= 0x1;
+ 	}
+ 	/*
+-	 * Always add "C" bit for perf. Memory coherence is always enabled
++	 * We can't allow hardware to update hpte bits. Hence always
++	 * set 'R' bit and set 'C' if it is a write fault
++	 * Memory coherence is always enabled
+ 	 */
+-	rflags |=  HPTE_R_C | HPTE_R_M;
++	rflags |=  HPTE_R_R | HPTE_R_M;
++
++	if (pteflags & _PAGE_DIRTY)
++		rflags |= HPTE_R_C;
+ 	/*
+ 	 * Add in WIG bits
+ 	 */
+diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
+index ac3ffd97e059..405baaf96864 100644
+--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
++++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
+@@ -615,29 +615,50 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
+ {
+ 	int config_addr;
+ 	int ret;
++	/* Waiting 0.2s maximum before skipping configuration */
++	int max_wait = 200;
+ 
+ 	/* Figure out the PE address */
+ 	config_addr = pe->config_addr;
+ 	if (pe->addr)
+ 		config_addr = pe->addr;
+ 
+-	/* Use new configure-pe function, if supported */
+-	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
+-		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+-				config_addr, BUID_HI(pe->phb->buid),
+-				BUID_LO(pe->phb->buid));
+-	} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
+-		ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
+-				config_addr, BUID_HI(pe->phb->buid),
+-				BUID_LO(pe->phb->buid));
+-	} else {
+-		return -EFAULT;
+-	}
++	while (max_wait > 0) {
++		/* Use new configure-pe function, if supported */
++		if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
++			ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
++					config_addr, BUID_HI(pe->phb->buid),
++					BUID_LO(pe->phb->buid));
++		} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
++			ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
++					config_addr, BUID_HI(pe->phb->buid),
++					BUID_LO(pe->phb->buid));
++		} else {
++			return -EFAULT;
++		}
+ 
+-	if (ret)
+-		pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
+-			__func__, pe->phb->global_number, pe->addr, ret);
++		if (!ret)
++			return ret;
++
++		/*
++		 * If RTAS returns a delay value that's above 100ms, cut it
++		 * down to 100ms in case firmware made a mistake.  For more
++		 * on how these delay values work see rtas_busy_delay_time
++		 */
++		if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
++		    ret <= RTAS_EXTENDED_DELAY_MAX)
++			ret = RTAS_EXTENDED_DELAY_MIN+2;
++
++		max_wait -= rtas_busy_delay_time(ret);
++
++		if (max_wait < 0)
++			break;
++
++		rtas_busy_delay(ret);
++	}
+ 
++	pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
++		__func__, pe->phb->global_number, pe->addr, ret);
+ 	return ret;
+ }
+ 
+diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
+index f010c93a88b1..fda605dbc1b4 100644
+--- a/arch/s390/net/bpf_jit.h
++++ b/arch/s390/net/bpf_jit.h
+@@ -37,7 +37,7 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
+  *	      |		      |     |
+  *	      +---------------+     |
+  *	      | 8 byte skbp   |     |
+- * R15+170 -> +---------------+     |
++ * R15+176 -> +---------------+     |
+  *	      | 8 byte hlen   |     |
+  * R15+168 -> +---------------+     |
+  *	      | 4 byte align  |     |
+@@ -58,7 +58,7 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
+ #define STK_OFF		(STK_SPACE - STK_160_UNUSED)
+ #define STK_OFF_TMP	160	/* Offset of tmp buffer on stack */
+ #define STK_OFF_HLEN	168	/* Offset of SKB header length on stack */
+-#define STK_OFF_SKBP	170	/* Offset of SKB pointer on stack */
++#define STK_OFF_SKBP	176	/* Offset of SKB pointer on stack */
+ 
+ #define STK_OFF_R6	(160 - 11 * 8)	/* Offset of r6 on stack */
+ #define STK_OFF_TCCNT	(160 - 12 * 8)	/* Offset of tail_call_cnt on stack */
+diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
+index 3c0bfc1f2694..2662fcc67f8d 100644
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -45,7 +45,7 @@ struct bpf_jit {
+ 	int labels[1];		/* Labels for local jumps */
+ };
+ 
+-#define BPF_SIZE_MAX	0x7ffff	/* Max size for program (20 bit signed displ) */
++#define BPF_SIZE_MAX	0xffff	/* Max size for program (16 bit branches) */
+ 
+ #define SEEN_SKB	1	/* skb access */
+ #define SEEN_MEM	2	/* use mem[] for temporary storage */
+@@ -446,7 +446,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
+ 		emit_load_skb_data_hlen(jit);
+ 	if (jit->seen & SEEN_SKB_CHANGE)
+ 		/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
+-		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15,
++		EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
+ 			      STK_OFF_SKBP);
+ }
+ 
+diff --git a/arch/sparc/include/asm/head_64.h b/arch/sparc/include/asm/head_64.h
+index 10e9dabc4c41..f0700cfeedd7 100644
+--- a/arch/sparc/include/asm/head_64.h
++++ b/arch/sparc/include/asm/head_64.h
+@@ -15,6 +15,10 @@
+ 
+ #define	PTREGS_OFF	(STACK_BIAS + STACKFRAME_SZ)
+ 
++#define	RTRAP_PSTATE		(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
++#define	RTRAP_PSTATE_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
++#define RTRAP_PSTATE_AG_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
++
+ #define __CHEETAH_ID	0x003e0014
+ #define __JALAPENO_ID	0x003e0016
+ #define __SERRANO_ID	0x003e0022
+diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
+index f089cfa249f3..5a189bf3c8ac 100644
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -375,7 +375,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
+ #define pgprot_noncached pgprot_noncached
+ 
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-static inline pte_t pte_mkhuge(pte_t pte)
++static inline unsigned long __pte_huge_mask(void)
+ {
+ 	unsigned long mask;
+ 
+@@ -390,8 +390,19 @@ static inline pte_t pte_mkhuge(pte_t pte)
+ 	: "=r" (mask)
+ 	: "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V));
+ 
+-	return __pte(pte_val(pte) | mask);
++	return mask;
++}
++
++static inline pte_t pte_mkhuge(pte_t pte)
++{
++	return __pte(pte_val(pte) | __pte_huge_mask());
++}
++
++static inline bool is_hugetlb_pte(pte_t pte)
++{
++	return !!(pte_val(pte) & __pte_huge_mask());
+ }
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ static inline pmd_t pmd_mkhuge(pmd_t pmd)
+ {
+@@ -403,6 +414,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
+ 	return __pmd(pte_val(pte));
+ }
+ #endif
++#else
++static inline bool is_hugetlb_pte(pte_t pte)
++{
++	return false;
++}
+ #endif
+ 
+ static inline pte_t pte_mkdirty(pte_t pte)
+@@ -858,6 +874,19 @@ static inline unsigned long pud_pfn(pud_t pud)
+ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ 		   pte_t *ptep, pte_t orig, int fullmm);
+ 
++static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
++				pte_t *ptep, pte_t orig, int fullmm)
++{
++	/* It is more efficient to let flush_tlb_kernel_range()
++	 * handle init_mm tlb flushes.
++	 *
++	 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
++	 *             and SUN4V pte layout, so this inline test is fine.
++	 */
++	if (likely(mm != &init_mm) && pte_accessible(mm, orig))
++		tlb_batch_add(mm, vaddr, ptep, orig, fullmm);
++}
++
+ #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ 					    unsigned long addr,
+@@ -874,15 +903,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+ 	pte_t orig = *ptep;
+ 
+ 	*ptep = pte;
+-
+-	/* It is more efficient to let flush_tlb_kernel_range()
+-	 * handle init_mm tlb flushes.
+-	 *
+-	 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
+-	 *             and SUN4V pte layout, so this inline test is fine.
+-	 */
+-	if (likely(mm != &init_mm) && pte_accessible(mm, orig))
+-		tlb_batch_add(mm, addr, ptep, orig, fullmm);
++	maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm);
+ }
+ 
+ #define set_pte_at(mm,addr,ptep,pte)	\
+diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h
+index dea1cfa2122b..a8e192e90700 100644
+--- a/arch/sparc/include/asm/tlbflush_64.h
++++ b/arch/sparc/include/asm/tlbflush_64.h
+@@ -8,6 +8,7 @@
+ #define TLB_BATCH_NR	192
+ 
+ struct tlb_batch {
++	bool huge;
+ 	struct mm_struct *mm;
+ 	unsigned long tlb_nr;
+ 	unsigned long active;
+@@ -16,7 +17,7 @@ struct tlb_batch {
+ 
+ void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+ void flush_tsb_user(struct tlb_batch *tb);
+-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr);
++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge);
+ 
+ /* TLB flush operations. */
+ 
+diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h
+index 71b5a67522ab..781b9f1dbdc2 100644
+--- a/arch/sparc/include/asm/ttable.h
++++ b/arch/sparc/include/asm/ttable.h
+@@ -589,8 +589,8 @@ user_rtt_fill_64bit:					\
+ 	 restored;					\
+ 	nop; nop; nop; nop; nop; nop;			\
+ 	nop; nop; nop; nop; nop;			\
+-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
+-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
++	ba,a,pt	%xcc, user_rtt_fill_fixup_dax;		\
++	ba,a,pt	%xcc, user_rtt_fill_fixup_mna;		\
+ 	ba,a,pt	%xcc, user_rtt_fill_fixup;
+ 
+ 
+@@ -652,8 +652,8 @@ user_rtt_fill_32bit:					\
+ 	 restored;					\
+ 	nop; nop; nop; nop; nop;			\
+ 	nop; nop; nop;					\
+-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
+-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
++	ba,a,pt	%xcc, user_rtt_fill_fixup_dax;		\
++	ba,a,pt	%xcc, user_rtt_fill_fixup_mna;		\
+ 	ba,a,pt	%xcc, user_rtt_fill_fixup;
+ 
+ 
+diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
+index 7cf9c6ea3f1f..fdb13327fded 100644
+--- a/arch/sparc/kernel/Makefile
++++ b/arch/sparc/kernel/Makefile
+@@ -21,6 +21,7 @@ CFLAGS_REMOVE_perf_event.o := -pg
+ CFLAGS_REMOVE_pcr.o := -pg
+ endif
+ 
++obj-$(CONFIG_SPARC64)   += urtt_fill.o
+ obj-$(CONFIG_SPARC32)   += entry.o wof.o wuf.o
+ obj-$(CONFIG_SPARC32)   += etrap_32.o
+ obj-$(CONFIG_SPARC32)   += rtrap_32.o
+diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
+index d08bdaffdbfc..216948ca4382 100644
+--- a/arch/sparc/kernel/rtrap_64.S
++++ b/arch/sparc/kernel/rtrap_64.S
+@@ -14,10 +14,6 @@
+ #include <asm/visasm.h>
+ #include <asm/processor.h>
+ 
+-#define		RTRAP_PSTATE		(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
+-#define		RTRAP_PSTATE_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
+-#define		RTRAP_PSTATE_AG_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
+-
+ #ifdef CONFIG_CONTEXT_TRACKING
+ # define SCHEDULE_USER schedule_user
+ #else
+@@ -242,52 +238,17 @@ rt_continue:	ldx			[%sp + PTREGS_OFF + PT_V9_G1], %g1
+ 		 wrpr			%g1, %cwp
+ 		ba,a,pt			%xcc, user_rtt_fill_64bit
+ 
+-user_rtt_fill_fixup:
+-		rdpr	%cwp, %g1
+-		add	%g1, 1, %g1
+-		wrpr	%g1, 0x0, %cwp
+-
+-		rdpr	%wstate, %g2
+-		sll	%g2, 3, %g2
+-		wrpr	%g2, 0x0, %wstate
+-
+-		/* We know %canrestore and %otherwin are both zero.  */
+-
+-		sethi	%hi(sparc64_kern_pri_context), %g2
+-		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g2
+-		mov	PRIMARY_CONTEXT, %g1
+-
+-661:		stxa	%g2, [%g1] ASI_DMMU
+-		.section .sun4v_1insn_patch, "ax"
+-		.word	661b
+-		stxa	%g2, [%g1] ASI_MMU
+-		.previous
+-
+-		sethi	%hi(KERNBASE), %g1
+-		flush	%g1
++user_rtt_fill_fixup_dax:
++		ba,pt	%xcc, user_rtt_fill_fixup_common
++		 mov	1, %g3
+ 
+-		or	%g4, FAULT_CODE_WINFIXUP, %g4
+-		stb	%g4, [%g6 + TI_FAULT_CODE]
+-		stx	%g5, [%g6 + TI_FAULT_ADDR]
++user_rtt_fill_fixup_mna:
++		ba,pt	%xcc, user_rtt_fill_fixup_common
++		 mov	2, %g3
+ 
+-		mov	%g6, %l1
+-		wrpr	%g0, 0x0, %tl
+-
+-661:		nop
+-		.section		.sun4v_1insn_patch, "ax"
+-		.word			661b
+-		SET_GL(0)
+-		.previous
+-
+-		wrpr	%g0, RTRAP_PSTATE, %pstate
+-
+-		mov	%l1, %g6
+-		ldx	[%g6 + TI_TASK], %g4
+-		LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
+-		call	do_sparc64_fault
+-		 add	%sp, PTREGS_OFF, %o0
+-		ba,pt	%xcc, rtrap
+-		 nop
++user_rtt_fill_fixup:
++		ba,pt	%xcc, user_rtt_fill_fixup_common
++		 clr	%g3
+ 
+ user_rtt_pre_restore:
+ 		add			%g1, 1, %g1
+diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
+index 3c25241fa5cb..ebd0bfe25a72 100644
+--- a/arch/sparc/kernel/signal32.c
++++ b/arch/sparc/kernel/signal32.c
+@@ -138,12 +138,24 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
+ 	return 0;
+ }
+ 
++/* Checks if the fp is valid.  We always build signal frames which are
++ * 16-byte aligned, therefore we can always enforce that the restore
++ * frame has that property as well.
++ */
++static bool invalid_frame_pointer(void __user *fp, int fplen)
++{
++	if ((((unsigned long) fp) & 15) ||
++	    ((unsigned long)fp) > 0x100000000ULL - fplen)
++		return true;
++	return false;
++}
++
+ void do_sigreturn32(struct pt_regs *regs)
+ {
+ 	struct signal_frame32 __user *sf;
+ 	compat_uptr_t fpu_save;
+ 	compat_uptr_t rwin_save;
+-	unsigned int psr;
++	unsigned int psr, ufp;
+ 	unsigned int pc, npc;
+ 	sigset_t set;
+ 	compat_sigset_t seta;
+@@ -158,11 +170,16 @@ void do_sigreturn32(struct pt_regs *regs)
+ 	sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP];
+ 
+ 	/* 1. Make sure we are not getting garbage from the user */
+-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+-	    (((unsigned long) sf) & 3))
++	if (invalid_frame_pointer(sf, sizeof(*sf)))
++		goto segv;
++
++	if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
++		goto segv;
++
++	if (ufp & 0x7)
+ 		goto segv;
+ 
+-	if (get_user(pc, &sf->info.si_regs.pc) ||
++	if (__get_user(pc, &sf->info.si_regs.pc) ||
+ 	    __get_user(npc, &sf->info.si_regs.npc))
+ 		goto segv;
+ 
+@@ -227,7 +244,7 @@ segv:
+ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
+ {
+ 	struct rt_signal_frame32 __user *sf;
+-	unsigned int psr, pc, npc;
++	unsigned int psr, pc, npc, ufp;
+ 	compat_uptr_t fpu_save;
+ 	compat_uptr_t rwin_save;
+ 	sigset_t set;
+@@ -242,11 +259,16 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
+ 	sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP];
+ 
+ 	/* 1. Make sure we are not getting garbage from the user */
+-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+-	    (((unsigned long) sf) & 3))
++	if (invalid_frame_pointer(sf, sizeof(*sf)))
+ 		goto segv;
+ 
+-	if (get_user(pc, &sf->regs.pc) || 
++	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
++		goto segv;
++
++	if (ufp & 0x7)
++		goto segv;
++
++	if (__get_user(pc, &sf->regs.pc) ||
+ 	    __get_user(npc, &sf->regs.npc))
+ 		goto segv;
+ 
+@@ -307,14 +329,6 @@ segv:
+ 	force_sig(SIGSEGV, current);
+ }
+ 
+-/* Checks if the fp is valid */
+-static int invalid_frame_pointer(void __user *fp, int fplen)
+-{
+-	if ((((unsigned long) fp) & 7) || ((unsigned long)fp) > 0x100000000ULL - fplen)
+-		return 1;
+-	return 0;
+-}
+-
+ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ 	unsigned long sp;
+diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
+index 52aa5e4ce5e7..c3c12efe0bc0 100644
+--- a/arch/sparc/kernel/signal_32.c
++++ b/arch/sparc/kernel/signal_32.c
+@@ -60,10 +60,22 @@ struct rt_signal_frame {
+ #define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
+ #define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame) + 7) & (~7)))
+ 
++/* Checks if the fp is valid.  We always build signal frames which are
++ * 16-byte aligned, therefore we can always enforce that the restore
++ * frame has that property as well.
++ */
++static inline bool invalid_frame_pointer(void __user *fp, int fplen)
++{
++	if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen))
++		return true;
++
++	return false;
++}
++
+ asmlinkage void do_sigreturn(struct pt_regs *regs)
+ {
++	unsigned long up_psr, pc, npc, ufp;
+ 	struct signal_frame __user *sf;
+-	unsigned long up_psr, pc, npc;
+ 	sigset_t set;
+ 	__siginfo_fpu_t __user *fpu_save;
+ 	__siginfo_rwin_t __user *rwin_save;
+@@ -77,10 +89,13 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
+ 	sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
+ 
+ 	/* 1. Make sure we are not getting garbage from the user */
+-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
++	if (!invalid_frame_pointer(sf, sizeof(*sf)))
++		goto segv_and_exit;
++
++	if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
+ 		goto segv_and_exit;
+ 
+-	if (((unsigned long) sf) & 3)
++	if (ufp & 0x7)
+ 		goto segv_and_exit;
+ 
+ 	err = __get_user(pc,  &sf->info.si_regs.pc);
+@@ -127,7 +142,7 @@ segv_and_exit:
+ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
+ {
+ 	struct rt_signal_frame __user *sf;
+-	unsigned int psr, pc, npc;
++	unsigned int psr, pc, npc, ufp;
+ 	__siginfo_fpu_t __user *fpu_save;
+ 	__siginfo_rwin_t __user *rwin_save;
+ 	sigset_t set;
+@@ -135,8 +150,13 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
+ 
+ 	synchronize_user_stack();
+ 	sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
+-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
+-	    (((unsigned long) sf) & 0x03))
++	if (!invalid_frame_pointer(sf, sizeof(*sf)))
++		goto segv;
++
++	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
++		goto segv;
++
++	if (ufp & 0x7)
+ 		goto segv;
+ 
+ 	err = __get_user(pc, &sf->regs.pc);
+@@ -178,15 +198,6 @@ segv:
+ 	force_sig(SIGSEGV, current);
+ }
+ 
+-/* Checks if the fp is valid */
+-static inline int invalid_frame_pointer(void __user *fp, int fplen)
+-{
+-	if ((((unsigned long) fp) & 7) || !__access_ok((unsigned long)fp, fplen))
+-		return 1;
+-
+-	return 0;
+-}
+-
+ static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ 	unsigned long sp = regs->u_regs[UREG_FP];
+diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
+index 39aaec173f66..5ee930c48f4c 100644
+--- a/arch/sparc/kernel/signal_64.c
++++ b/arch/sparc/kernel/signal_64.c
+@@ -234,6 +234,17 @@ do_sigsegv:
+ 	goto out;
+ }
+ 
++/* Checks if the fp is valid.  We always build rt signal frames which
++ * are 16-byte aligned, therefore we can always enforce that the
++ * restore frame has that property as well.
++ */
++static bool invalid_frame_pointer(void __user *fp)
++{
++	if (((unsigned long) fp) & 15)
++		return true;
++	return false;
++}
++
+ struct rt_signal_frame {
+ 	struct sparc_stackf	ss;
+ 	siginfo_t		info;
+@@ -246,8 +257,8 @@ struct rt_signal_frame {
+ 
+ void do_rt_sigreturn(struct pt_regs *regs)
+ {
++	unsigned long tpc, tnpc, tstate, ufp;
+ 	struct rt_signal_frame __user *sf;
+-	unsigned long tpc, tnpc, tstate;
+ 	__siginfo_fpu_t __user *fpu_save;
+ 	__siginfo_rwin_t __user *rwin_save;
+ 	sigset_t set;
+@@ -261,10 +272,16 @@ void do_rt_sigreturn(struct pt_regs *regs)
+ 		(regs->u_regs [UREG_FP] + STACK_BIAS);
+ 
+ 	/* 1. Make sure we are not getting garbage from the user */
+-	if (((unsigned long) sf) & 3)
++	if (invalid_frame_pointer(sf))
++		goto segv;
++
++	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
+ 		goto segv;
+ 
+-	err = get_user(tpc, &sf->regs.tpc);
++	if ((ufp + STACK_BIAS) & 0x7)
++		goto segv;
++
++	err = __get_user(tpc, &sf->regs.tpc);
+ 	err |= __get_user(tnpc, &sf->regs.tnpc);
+ 	if (test_thread_flag(TIF_32BIT)) {
+ 		tpc &= 0xffffffff;
+@@ -308,14 +325,6 @@ segv:
+ 	force_sig(SIGSEGV, current);
+ }
+ 
+-/* Checks if the fp is valid */
+-static int invalid_frame_pointer(void __user *fp)
+-{
+-	if (((unsigned long) fp) & 15)
+-		return 1;
+-	return 0;
+-}
+-
+ static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+ {
+ 	unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
+diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c
+index 0f6eebe71e6c..e5fe8cef9a69 100644
+--- a/arch/sparc/kernel/sigutil_32.c
++++ b/arch/sparc/kernel/sigutil_32.c
+@@ -48,6 +48,10 @@ int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+ int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+ {
+ 	int err;
++
++	if (((unsigned long) fpu) & 3)
++		return -EFAULT;
++
+ #ifdef CONFIG_SMP
+ 	if (test_tsk_thread_flag(current, TIF_USEDFPU))
+ 		regs->psr &= ~PSR_EF;
+@@ -97,7 +101,10 @@ int restore_rwin_state(__siginfo_rwin_t __user *rp)
+ 	struct thread_info *t = current_thread_info();
+ 	int i, wsaved, err;
+ 
+-	__get_user(wsaved, &rp->wsaved);
++	if (((unsigned long) rp) & 3)
++		return -EFAULT;
++
++	get_user(wsaved, &rp->wsaved);
+ 	if (wsaved > NSWINS)
+ 		return -EFAULT;
+ 
+diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c
+index 387834a9c56a..36aadcbeac69 100644
+--- a/arch/sparc/kernel/sigutil_64.c
++++ b/arch/sparc/kernel/sigutil_64.c
+@@ -37,7 +37,10 @@ int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+ 	unsigned long fprs;
+ 	int err;
+ 
+-	err = __get_user(fprs, &fpu->si_fprs);
++	if (((unsigned long) fpu) & 7)
++		return -EFAULT;
++
++	err = get_user(fprs, &fpu->si_fprs);
+ 	fprs_write(0);
+ 	regs->tstate &= ~TSTATE_PEF;
+ 	if (fprs & FPRS_DL)
+@@ -72,7 +75,10 @@ int restore_rwin_state(__siginfo_rwin_t __user *rp)
+ 	struct thread_info *t = current_thread_info();
+ 	int i, wsaved, err;
+ 
+-	__get_user(wsaved, &rp->wsaved);
++	if (((unsigned long) rp) & 7)
++		return -EFAULT;
++
++	get_user(wsaved, &rp->wsaved);
+ 	if (wsaved > NSWINS)
+ 		return -EFAULT;
+ 
+diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S
+new file mode 100644
+index 000000000000..5604a2b051d4
+--- /dev/null
++++ b/arch/sparc/kernel/urtt_fill.S
+@@ -0,0 +1,98 @@
++#include <asm/thread_info.h>
++#include <asm/trap_block.h>
++#include <asm/spitfire.h>
++#include <asm/ptrace.h>
++#include <asm/head.h>
++
++		.text
++		.align	8
++		.globl	user_rtt_fill_fixup_common
++user_rtt_fill_fixup_common:
++		rdpr	%cwp, %g1
++		add	%g1, 1, %g1
++		wrpr	%g1, 0x0, %cwp
++
++		rdpr	%wstate, %g2
++		sll	%g2, 3, %g2
++		wrpr	%g2, 0x0, %wstate
++
++		/* We know %canrestore and %otherwin are both zero.  */
++
++		sethi	%hi(sparc64_kern_pri_context), %g2
++		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g2
++		mov	PRIMARY_CONTEXT, %g1
++
++661:		stxa	%g2, [%g1] ASI_DMMU
++		.section .sun4v_1insn_patch, "ax"
++		.word	661b
++		stxa	%g2, [%g1] ASI_MMU
++		.previous
++
++		sethi	%hi(KERNBASE), %g1
++		flush	%g1
++
++		mov	%g4, %l4
++		mov	%g5, %l5
++		brnz,pn	%g3, 1f
++		 mov	%g3, %l3
++
++		or	%g4, FAULT_CODE_WINFIXUP, %g4
++		stb	%g4, [%g6 + TI_FAULT_CODE]
++		stx	%g5, [%g6 + TI_FAULT_ADDR]
++1:
++		mov	%g6, %l1
++		wrpr	%g0, 0x0, %tl
++
++661:		nop
++		.section		.sun4v_1insn_patch, "ax"
++		.word			661b
++		SET_GL(0)
++		.previous
++
++		wrpr	%g0, RTRAP_PSTATE, %pstate
++
++		mov	%l1, %g6
++		ldx	[%g6 + TI_TASK], %g4
++		LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
++
++		brnz,pn	%l3, 1f
++		 nop
++
++		call	do_sparc64_fault
++		 add	%sp, PTREGS_OFF, %o0
++		ba,pt	%xcc, rtrap
++		 nop
++
++1:		cmp	%g3, 2
++		bne,pn	%xcc, 2f
++		 nop
++
++		sethi	%hi(tlb_type), %g1
++		lduw	[%g1 + %lo(tlb_type)], %g1
++		cmp	%g1, 3
++		bne,pt	%icc, 1f
++		 add	%sp, PTREGS_OFF, %o0
++		mov	%l4, %o2
++		call	sun4v_do_mna
++		 mov	%l5, %o1
++		ba,a,pt	%xcc, rtrap
++1:		mov	%l4, %o1
++		mov	%l5, %o2
++		call	mem_address_unaligned
++		 nop
++		ba,a,pt	%xcc, rtrap
++
++2:		sethi	%hi(tlb_type), %g1
++		mov	%l4, %o1
++		lduw	[%g1 + %lo(tlb_type)], %g1
++		mov	%l5, %o2
++		cmp	%g1, 3
++		bne,pt	%icc, 1f
++		 add	%sp, PTREGS_OFF, %o0
++		call	sun4v_data_access_exception
++		 nop
++		ba,a,pt	%xcc, rtrap
++
++1:		call	spitfire_data_access_exception
++		 nop
++		ba,a,pt	%xcc, rtrap
+diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
+index 4977800e9770..ba52e6466a82 100644
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -176,17 +176,31 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ 		     pte_t *ptep, pte_t entry)
+ {
+ 	int i;
++	pte_t orig[2];
++	unsigned long nptes;
+ 
+ 	if (!pte_present(*ptep) && pte_present(entry))
+ 		mm->context.huge_pte_count++;
+ 
+ 	addr &= HPAGE_MASK;
+-	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+-		set_pte_at(mm, addr, ptep, entry);
++
++	nptes = 1 << HUGETLB_PAGE_ORDER;
++	orig[0] = *ptep;
++	orig[1] = *(ptep + nptes / 2);
++	for (i = 0; i < nptes; i++) {
++		*ptep = entry;
+ 		ptep++;
+ 		addr += PAGE_SIZE;
+ 		pte_val(entry) += PAGE_SIZE;
+ 	}
++
++	/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
++	addr -= REAL_HPAGE_SIZE;
++	ptep -= nptes / 2;
++	maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0);
++	addr -= REAL_HPAGE_SIZE;
++	ptep -= nptes / 2;
++	maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0);
+ }
+ 
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+@@ -194,19 +208,28 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ {
+ 	pte_t entry;
+ 	int i;
++	unsigned long nptes;
+ 
+ 	entry = *ptep;
+ 	if (pte_present(entry))
+ 		mm->context.huge_pte_count--;
+ 
+ 	addr &= HPAGE_MASK;
+-
+-	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+-		pte_clear(mm, addr, ptep);
++	nptes = 1 << HUGETLB_PAGE_ORDER;
++	for (i = 0; i < nptes; i++) {
++		*ptep = __pte(0UL);
+ 		addr += PAGE_SIZE;
+ 		ptep++;
+ 	}
+ 
++	/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
++	addr -= REAL_HPAGE_SIZE;
++	ptep -= nptes / 2;
++	maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
++	addr -= REAL_HPAGE_SIZE;
++	ptep -= nptes / 2;
++	maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
++
+ 	return entry;
+ }
+ 
+diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
+index 09e838801e39..14bb0d5ed3c6 100644
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -324,18 +324,6 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde
+ 	tsb_insert(tsb, tag, tte);
+ }
+ 
+-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-static inline bool is_hugetlb_pte(pte_t pte)
+-{
+-	if ((tlb_type == hypervisor &&
+-	     (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
+-	    (tlb_type != hypervisor &&
+-	     (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U))
+-		return true;
+-	return false;
+-}
+-#endif
+-
+ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+ {
+ 	struct mm_struct *mm;
+@@ -2836,9 +2824,10 @@ void hugetlb_setup(struct pt_regs *regs)
+ 	 * the Data-TLB for huge pages.
+ 	 */
+ 	if (tlb_type == cheetah_plus) {
++		bool need_context_reload = false;
+ 		unsigned long ctx;
+ 
+-		spin_lock(&ctx_alloc_lock);
++		spin_lock_irq(&ctx_alloc_lock);
+ 		ctx = mm->context.sparc64_ctx_val;
+ 		ctx &= ~CTX_PGSZ_MASK;
+ 		ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+@@ -2857,9 +2846,12 @@ void hugetlb_setup(struct pt_regs *regs)
+ 			 * also executing in this address space.
+ 			 */
+ 			mm->context.sparc64_ctx_val = ctx;
+-			on_each_cpu(context_reload, mm, 0);
++			need_context_reload = true;
+ 		}
+-		spin_unlock(&ctx_alloc_lock);
++		spin_unlock_irq(&ctx_alloc_lock);
++
++		if (need_context_reload)
++			on_each_cpu(context_reload, mm, 0);
+ 	}
+ }
+ #endif
+diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
+index 9df2190c097e..f81cd9736700 100644
+--- a/arch/sparc/mm/tlb.c
++++ b/arch/sparc/mm/tlb.c
+@@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void)
+ }
+ 
+ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+-			      bool exec)
++			      bool exec, bool huge)
+ {
+ 	struct tlb_batch *tb = &get_cpu_var(tlb_batch);
+ 	unsigned long nr;
+@@ -84,13 +84,21 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+ 	}
+ 
+ 	if (!tb->active) {
+-		flush_tsb_user_page(mm, vaddr);
++		flush_tsb_user_page(mm, vaddr, huge);
+ 		global_flush_tlb_page(mm, vaddr);
+ 		goto out;
+ 	}
+ 
+-	if (nr == 0)
++	if (nr == 0) {
+ 		tb->mm = mm;
++		tb->huge = huge;
++	}
++
++	if (tb->huge != huge) {
++		flush_tlb_pending();
++		tb->huge = huge;
++		nr = 0;
++	}
+ 
+ 	tb->vaddrs[nr] = vaddr;
+ 	tb->tlb_nr = ++nr;
+@@ -104,6 +112,8 @@ out:
+ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ 		   pte_t *ptep, pte_t orig, int fullmm)
+ {
++	bool huge = is_hugetlb_pte(orig);
++
+ 	if (tlb_type != hypervisor &&
+ 	    pte_dirty(orig)) {
+ 		unsigned long paddr, pfn = pte_pfn(orig);
+@@ -129,7 +139,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ 
+ no_cache_flush:
+ 	if (!fullmm)
+-		tlb_batch_add_one(mm, vaddr, pte_exec(orig));
++		tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge);
+ }
+ 
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+@@ -145,7 +155,7 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
+ 		if (pte_val(*pte) & _PAGE_VALID) {
+ 			bool exec = pte_exec(*pte);
+ 
+-			tlb_batch_add_one(mm, vaddr, exec);
++			tlb_batch_add_one(mm, vaddr, exec, false);
+ 		}
+ 		pte++;
+ 		vaddr += PAGE_SIZE;
+@@ -185,8 +195,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ 			pte_t orig_pte = __pte(pmd_val(orig));
+ 			bool exec = pte_exec(orig_pte);
+ 
+-			tlb_batch_add_one(mm, addr, exec);
+-			tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec);
++			tlb_batch_add_one(mm, addr, exec, true);
++			tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec,
++					true);
+ 		} else {
+ 			tlb_batch_pmd_scan(mm, addr, orig);
+ 		}
+diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
+index a06576683c38..a0604a493a36 100644
+--- a/arch/sparc/mm/tsb.c
++++ b/arch/sparc/mm/tsb.c
+@@ -76,14 +76,15 @@ void flush_tsb_user(struct tlb_batch *tb)
+ 
+ 	spin_lock_irqsave(&mm->context.lock, flags);
+ 
+-	base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+-	nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+-	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+-		base = __pa(base);
+-	__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
+-
++	if (!tb->huge) {
++		base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
++		nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
++		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
++			base = __pa(base);
++		__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
++	}
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-	if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
++	if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ 		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+ 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+ 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+@@ -94,20 +95,21 @@ void flush_tsb_user(struct tlb_batch *tb)
+ 	spin_unlock_irqrestore(&mm->context.lock, flags);
+ }
+ 
+-void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr)
++void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge)
+ {
+ 	unsigned long nentries, base, flags;
+ 
+ 	spin_lock_irqsave(&mm->context.lock, flags);
+ 
+-	base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+-	nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+-	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+-		base = __pa(base);
+-	__flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
+-
++	if (!huge) {
++		base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
++		nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
++		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
++			base = __pa(base);
++		__flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
++	}
+ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+-	if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
++	if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+ 		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+ 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+ 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index 06cbe25861f1..87bd6b6bf5bd 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -95,6 +95,12 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
+ 		local_irq_disable();
+ }
+ 
++/*
++ * In IST context, we explicitly disable preemption.  This serves two
++ * purposes: it makes it much less likely that we would accidentally
++ * schedule in IST context and it will force a warning if we somehow
++ * manage to schedule by accident.
++ */
+ void ist_enter(struct pt_regs *regs)
+ {
+ 	if (user_mode(regs)) {
+@@ -109,13 +115,7 @@ void ist_enter(struct pt_regs *regs)
+ 		rcu_nmi_enter();
+ 	}
+ 
+-	/*
+-	 * We are atomic because we're on the IST stack; or we're on
+-	 * x86_32, in which case we still shouldn't schedule; or we're
+-	 * on x86_64 and entered from user mode, in which case we're
+-	 * still atomic unless ist_begin_non_atomic is called.
+-	 */
+-	preempt_count_add(HARDIRQ_OFFSET);
++	preempt_disable();
+ 
+ 	/* This code is a bit fragile.  Test it. */
+ 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
+@@ -123,7 +123,7 @@ void ist_enter(struct pt_regs *regs)
+ 
+ void ist_exit(struct pt_regs *regs)
+ {
+-	preempt_count_sub(HARDIRQ_OFFSET);
++	preempt_enable_no_resched();
+ 
+ 	if (!user_mode(regs))
+ 		rcu_nmi_exit();
+@@ -154,7 +154,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
+ 	BUG_ON((unsigned long)(current_top_of_stack() -
+ 			       current_stack_pointer()) >= THREAD_SIZE);
+ 
+-	preempt_count_sub(HARDIRQ_OFFSET);
++	preempt_enable_no_resched();
+ }
+ 
+ /**
+@@ -164,7 +164,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
+  */
+ void ist_end_non_atomic(void)
+ {
+-	preempt_count_add(HARDIRQ_OFFSET);
++	preempt_disable();
+ }
+ 
+ static nokprobe_inline int
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 9b7798c7b210..6b9701babaa1 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3032,6 +3032,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
+ 	if (dbgregs->flags)
+ 		return -EINVAL;
+ 
++	if (dbgregs->dr6 & ~0xffffffffull)
++		return -EINVAL;
++	if (dbgregs->dr7 & ~0xffffffffull)
++		return -EINVAL;
++
+ 	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+ 	kvm_update_dr0123(vcpu);
+ 	vcpu->arch.dr6 = dbgregs->dr6;
+diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
+index 91a7e047a765..477cbf39e0fb 100644
+--- a/crypto/asymmetric_keys/Kconfig
++++ b/crypto/asymmetric_keys/Kconfig
+@@ -13,6 +13,7 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
+ 	tristate "Asymmetric public-key crypto algorithm subtype"
+ 	select MPILIB
+ 	select CRYPTO_HASH_INFO
++	select CRYPTO_AKCIPHER
+ 	help
+ 	  This option provides support for asymmetric public key type handling.
+ 	  If signature generation and/or verification are to be used,
+diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+index 52c7395cb8d8..0d0d4529ee36 100644
+--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
++++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+@@ -122,6 +122,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
+ 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+ 	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+ 	unsigned int unit;
++	u32 unit_size;
+ 	int ret;
+ 
+ 	if (!ctx->u.aes.key_len)
+@@ -133,11 +134,17 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
+ 	if (!req->info)
+ 		return -EINVAL;
+ 
+-	for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++)
+-		if (!(req->nbytes & (unit_size_map[unit].size - 1)))
+-			break;
++	unit_size = CCP_XTS_AES_UNIT_SIZE__LAST;
++	if (req->nbytes <= unit_size_map[0].size) {
++		for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) {
++			if (!(req->nbytes & (unit_size_map[unit].size - 1))) {
++				unit_size = unit_size_map[unit].value;
++				break;
++			}
++		}
++	}
+ 
+-	if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) ||
++	if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) ||
+ 	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
+ 		/* Use the fallback to process the request for any
+ 		 * unsupported unit sizes or key sizes
+@@ -158,7 +165,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
+ 	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
+ 	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
+ 					   : CCP_AES_ACTION_DECRYPT;
+-	rctx->cmd.u.xts.unit_size = unit_size_map[unit].value;
++	rctx->cmd.u.xts.unit_size = unit_size;
+ 	rctx->cmd.u.xts.key = &ctx->u.aes.key_sg;
+ 	rctx->cmd.u.xts.key_len = ctx->u.aes.key_len;
+ 	rctx->cmd.u.xts.iv = &rctx->iv_sg;
+diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
+index 2fd38d598f3d..3c5e83263fe0 100644
+--- a/drivers/gpio/gpio-bcm-kona.c
++++ b/drivers/gpio/gpio-bcm-kona.c
+@@ -546,11 +546,11 @@ static void bcm_kona_gpio_reset(struct bcm_kona_gpio *kona_gpio)
+ 	/* disable interrupts and clear status */
+ 	for (i = 0; i < kona_gpio->num_bank; i++) {
+ 		/* Unlock the entire bank first */
+-		bcm_kona_gpio_write_lock_regs(kona_gpio, i, UNLOCK_CODE);
++		bcm_kona_gpio_write_lock_regs(reg_base, i, UNLOCK_CODE);
+ 		writel(0xffffffff, reg_base + GPIO_INT_MASK(i));
+ 		writel(0xffffffff, reg_base + GPIO_INT_STATUS(i));
+ 		/* Now re-lock the bank */
+-		bcm_kona_gpio_write_lock_regs(kona_gpio, i, LOCK_CODE);
++		bcm_kona_gpio_write_lock_regs(reg_base, i, LOCK_CODE);
+ 	}
+ }
+ 
+diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
+index 66d3d247d76d..e72794e463aa 100644
+--- a/drivers/gpio/gpio-zynq.c
++++ b/drivers/gpio/gpio-zynq.c
+@@ -709,11 +709,17 @@ static int zynq_gpio_probe(struct platform_device *pdev)
+ 		dev_err(&pdev->dev, "input clock not found.\n");
+ 		return PTR_ERR(gpio->clk);
+ 	}
++	ret = clk_prepare_enable(gpio->clk);
++	if (ret) {
++		dev_err(&pdev->dev, "Unable to enable clock.\n");
++		return ret;
++	}
+ 
++	pm_runtime_set_active(&pdev->dev);
+ 	pm_runtime_enable(&pdev->dev);
+ 	ret = pm_runtime_get_sync(&pdev->dev);
+ 	if (ret < 0)
+-		return ret;
++		goto err_pm_dis;
+ 
+ 	/* report a bug if gpio chip registration fails */
+ 	ret = gpiochip_add_data(chip, gpio);
+@@ -745,6 +751,9 @@ err_rm_gpiochip:
+ 	gpiochip_remove(chip);
+ err_pm_put:
+ 	pm_runtime_put(&pdev->dev);
++err_pm_dis:
++	pm_runtime_disable(&pdev->dev);
++	clk_disable_unprepare(gpio->clk);
+ 
+ 	return ret;
+ }
+diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
+index b747c76fd2b1..cf3e71243d6d 100644
+--- a/drivers/gpio/gpiolib.c
++++ b/drivers/gpio/gpiolib.c
+@@ -438,7 +438,6 @@ static void gpiodevice_release(struct device *dev)
+ {
+ 	struct gpio_device *gdev = dev_get_drvdata(dev);
+ 
+-	cdev_del(&gdev->chrdev);
+ 	list_del(&gdev->list);
+ 	ida_simple_remove(&gpio_ida, gdev->id);
+ 	kfree(gdev->label);
+@@ -471,7 +470,6 @@ static int gpiochip_setup_dev(struct gpio_device *gdev)
+ 
+ 	/* From this point, the .release() function cleans up gpio_device */
+ 	gdev->dev.release = gpiodevice_release;
+-	get_device(&gdev->dev);
+ 	pr_debug("%s: registered GPIOs %d to %d on device: %s (%s)\n",
+ 		 __func__, gdev->base, gdev->base + gdev->ngpio - 1,
+ 		 dev_name(&gdev->dev), gdev->chip->label ? : "generic");
+@@ -742,6 +740,8 @@ void gpiochip_remove(struct gpio_chip *chip)
+ 	 * be removed, else it will be dangling until the last user is
+ 	 * gone.
+ 	 */
++	cdev_del(&gdev->chrdev);
++	device_del(&gdev->dev);
+ 	put_device(&gdev->dev);
+ }
+ EXPORT_SYMBOL_GPL(gpiochip_remove);
+@@ -841,7 +841,7 @@ struct gpio_chip *gpiochip_find(void *data,
+ 
+ 	spin_lock_irqsave(&gpio_lock, flags);
+ 	list_for_each_entry(gdev, &gpio_devices, list)
+-		if (match(gdev->chip, data))
++		if (gdev->chip && match(gdev->chip, data))
+ 			break;
+ 
+ 	/* No match? */
+@@ -1339,10 +1339,13 @@ done:
+ /*
+  * This descriptor validation needs to be inserted verbatim into each
+  * function taking a descriptor, so we need to use a preprocessor
+- * macro to avoid endless duplication.
++ * macro to avoid endless duplication. If the desc is NULL it is an
++ * optional GPIO and calls should just bail out.
+  */
+ #define VALIDATE_DESC(desc) do { \
+-	if (!desc || !desc->gdev) { \
++	if (!desc) \
++		return 0; \
++	if (!desc->gdev) { \
+ 		pr_warn("%s: invalid GPIO\n", __func__); \
+ 		return -EINVAL; \
+ 	} \
+@@ -1353,7 +1356,9 @@ done:
+ 	} } while (0)
+ 
+ #define VALIDATE_DESC_VOID(desc) do { \
+-	if (!desc || !desc->gdev) { \
++	if (!desc) \
++		return; \
++	if (!desc->gdev) { \
+ 		pr_warn("%s: invalid GPIO\n", __func__); \
+ 		return; \
+ 	} \
+@@ -2001,7 +2006,14 @@ int gpiod_to_irq(const struct gpio_desc *desc)
+ 	struct gpio_chip	*chip;
+ 	int			offset;
+ 
+-	VALIDATE_DESC(desc);
++	/*
++	 * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
++	 * requires this function to not return zero on an invalid descriptor
++	 * but rather a negative error number.
++	 */
++	if (!desc || !desc->gdev || !desc->gdev->chip)
++		return -EINVAL;
++
+ 	chip = desc->gdev->chip;
+ 	offset = gpio_chip_hwgpio(desc);
+ 	return chip->to_irq ? chip->to_irq(chip, offset) : -ENXIO;
+diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
+index e08f962288d9..f30de8053545 100644
+--- a/drivers/gpu/drm/drm_crtc.c
++++ b/drivers/gpu/drm/drm_crtc.c
+@@ -3434,6 +3434,24 @@ int drm_mode_addfb2(struct drm_device *dev,
+ 	return 0;
+ }
+ 
++struct drm_mode_rmfb_work {
++	struct work_struct work;
++	struct list_head fbs;
++};
++
++static void drm_mode_rmfb_work_fn(struct work_struct *w)
++{
++	struct drm_mode_rmfb_work *arg = container_of(w, typeof(*arg), work);
++
++	while (!list_empty(&arg->fbs)) {
++		struct drm_framebuffer *fb =
++			list_first_entry(&arg->fbs, typeof(*fb), filp_head);
++
++		list_del_init(&fb->filp_head);
++		drm_framebuffer_remove(fb);
++	}
++}
++
+ /**
+  * drm_mode_rmfb - remove an FB from the configuration
+  * @dev: drm device for the ioctl
+@@ -3474,7 +3492,25 @@ int drm_mode_rmfb(struct drm_device *dev,
+ 	mutex_unlock(&dev->mode_config.fb_lock);
+ 	mutex_unlock(&file_priv->fbs_lock);
+ 
+-	drm_framebuffer_unreference(fb);
++	/*
++	 * we now own the reference that was stored in the fbs list
++	 *
++	 * drm_framebuffer_remove may fail with -EINTR on pending signals,
++	 * so run this in a separate stack as there's no way to correctly
++	 * handle this after the fb is already removed from the lookup table.
++	 */
++	if (atomic_read(&fb->refcount.refcount) > 1) {
++		struct drm_mode_rmfb_work arg;
++
++		INIT_WORK_ONSTACK(&arg.work, drm_mode_rmfb_work_fn);
++		INIT_LIST_HEAD(&arg.fbs);
++		list_add_tail(&fb->filp_head, &arg.fbs);
++
++		schedule_work(&arg.work);
++		flush_work(&arg.work);
++		destroy_work_on_stack(&arg.work);
++	} else
++		drm_framebuffer_unreference(fb);
+ 
+ 	return 0;
+ 
+@@ -3627,7 +3663,6 @@ out_err1:
+ 	return ret;
+ }
+ 
+-
+ /**
+  * drm_fb_release - remove and free the FBs on this file
+  * @priv: drm file for the ioctl
+@@ -3642,6 +3677,9 @@ out_err1:
+ void drm_fb_release(struct drm_file *priv)
+ {
+ 	struct drm_framebuffer *fb, *tfb;
++	struct drm_mode_rmfb_work arg;
++
++	INIT_LIST_HEAD(&arg.fbs);
+ 
+ 	/*
+ 	 * When the file gets released that means no one else can access the fb
+@@ -3654,10 +3692,22 @@ void drm_fb_release(struct drm_file *priv)
+ 	 * at it any more.
+ 	 */
+ 	list_for_each_entry_safe(fb, tfb, &priv->fbs, filp_head) {
+-		list_del_init(&fb->filp_head);
++		if (atomic_read(&fb->refcount.refcount) > 1) {
++			list_move_tail(&fb->filp_head, &arg.fbs);
++		} else {
++			list_del_init(&fb->filp_head);
+ 
+-		/* This drops the fpriv->fbs reference. */
+-		drm_framebuffer_unreference(fb);
++			/* This drops the fpriv->fbs reference. */
++			drm_framebuffer_unreference(fb);
++		}
++	}
++
++	if (!list_empty(&arg.fbs)) {
++		INIT_WORK_ONSTACK(&arg.work, drm_mode_rmfb_work_fn);
++
++		schedule_work(&arg.work);
++		flush_work(&arg.work);
++		destroy_work_on_stack(&arg.work);
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
+index 1c212205d0e7..d1a46ef5ab3f 100644
+--- a/drivers/gpu/drm/i915/i915_irq.c
++++ b/drivers/gpu/drm/i915/i915_irq.c
+@@ -1829,7 +1829,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
+ 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
+ 	disable_rpm_wakeref_asserts(dev_priv);
+ 
+-	do {
++	for (;;) {
+ 		master_ctl = I915_READ(GEN8_MASTER_IRQ) & ~GEN8_MASTER_IRQ_CONTROL;
+ 		iir = I915_READ(VLV_IIR);
+ 
+@@ -1857,7 +1857,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
+ 
+ 		I915_WRITE(GEN8_MASTER_IRQ, DE_MASTER_IRQ_CONTROL);
+ 		POSTING_READ(GEN8_MASTER_IRQ);
+-	} while (0);
++	}
+ 
+ 	enable_rpm_wakeref_asserts(dev_priv);
+ 
+diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
+index 8fc93c5f6abc..d02c4240b7df 100644
+--- a/drivers/net/ethernet/atheros/alx/alx.h
++++ b/drivers/net/ethernet/atheros/alx/alx.h
+@@ -96,6 +96,10 @@ struct alx_priv {
+ 	unsigned int rx_ringsz;
+ 	unsigned int rxbuf_size;
+ 
++	struct page  *rx_page;
++	unsigned int rx_page_offset;
++	unsigned int rx_frag_size;
++
+ 	struct napi_struct napi;
+ 	struct alx_tx_queue txq;
+ 	struct alx_rx_queue rxq;
+diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
+index 55b118e876fd..8611811fbf88 100644
+--- a/drivers/net/ethernet/atheros/alx/main.c
++++ b/drivers/net/ethernet/atheros/alx/main.c
+@@ -70,6 +70,35 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
+ 	}
+ }
+ 
++static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp)
++{
++	struct sk_buff *skb;
++	struct page *page;
++
++	if (alx->rx_frag_size > PAGE_SIZE)
++		return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
++
++	page = alx->rx_page;
++	if (!page) {
++		alx->rx_page = page = alloc_page(gfp);
++		if (unlikely(!page))
++			return NULL;
++		alx->rx_page_offset = 0;
++	}
++
++	skb = build_skb(page_address(page) + alx->rx_page_offset,
++			alx->rx_frag_size);
++	if (likely(skb)) {
++		alx->rx_page_offset += alx->rx_frag_size;
++		if (alx->rx_page_offset >= PAGE_SIZE)
++			alx->rx_page = NULL;
++		else
++			get_page(page);
++	}
++	return skb;
++}
++
++
+ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
+ {
+ 	struct alx_rx_queue *rxq = &alx->rxq;
+@@ -86,7 +115,7 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
+ 	while (!cur_buf->skb && next != rxq->read_idx) {
+ 		struct alx_rfd *rfd = &rxq->rfd[cur];
+ 
+-		skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
++		skb = alx_alloc_skb(alx, gfp);
+ 		if (!skb)
+ 			break;
+ 		dma = dma_map_single(&alx->hw.pdev->dev,
+@@ -124,6 +153,7 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
+ 		alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur);
+ 	}
+ 
++
+ 	return count;
+ }
+ 
+@@ -592,6 +622,11 @@ static void alx_free_rings(struct alx_priv *alx)
+ 	kfree(alx->txq.bufs);
+ 	kfree(alx->rxq.bufs);
+ 
++	if (alx->rx_page) {
++		put_page(alx->rx_page);
++		alx->rx_page = NULL;
++	}
++
+ 	dma_free_coherent(&alx->hw.pdev->dev,
+ 			  alx->descmem.size,
+ 			  alx->descmem.virt,
+@@ -646,6 +681,7 @@ static int alx_request_irq(struct alx_priv *alx)
+ 				  alx->dev->name, alx);
+ 		if (!err)
+ 			goto out;
++
+ 		/* fall back to legacy interrupt */
+ 		pci_disable_msi(alx->hw.pdev);
+ 	}
+@@ -689,6 +725,7 @@ static int alx_init_sw(struct alx_priv *alx)
+ 	struct pci_dev *pdev = alx->hw.pdev;
+ 	struct alx_hw *hw = &alx->hw;
+ 	int err;
++	unsigned int head_size;
+ 
+ 	err = alx_identify_hw(alx);
+ 	if (err) {
+@@ -704,7 +741,12 @@ static int alx_init_sw(struct alx_priv *alx)
+ 
+ 	hw->smb_timer = 400;
+ 	hw->mtu = alx->dev->mtu;
++
+ 	alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu);
++	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
++		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
++	alx->rx_frag_size = roundup_pow_of_two(head_size);
++
+ 	alx->tx_ringsz = 256;
+ 	alx->rx_ringsz = 512;
+ 	hw->imt = 200;
+@@ -806,6 +848,7 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
+ {
+ 	struct alx_priv *alx = netdev_priv(netdev);
+ 	int max_frame = ALX_MAX_FRAME_LEN(mtu);
++	unsigned int head_size;
+ 
+ 	if ((max_frame < ALX_MIN_FRAME_SIZE) ||
+ 	    (max_frame > ALX_MAX_FRAME_SIZE))
+@@ -817,6 +860,9 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
+ 	netdev->mtu = mtu;
+ 	alx->hw.mtu = mtu;
+ 	alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
++	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
++		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
++	alx->rx_frag_size = roundup_pow_of_two(head_size);
+ 	netdev_update_features(netdev);
+ 	if (netif_running(netdev))
+ 		alx_reinit(alx);
+diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
+index 085f9125cf42..06f031715b57 100644
+--- a/drivers/net/ethernet/ezchip/nps_enet.c
++++ b/drivers/net/ethernet/ezchip/nps_enet.c
+@@ -205,8 +205,10 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
+ 		 * re-adding ourselves to the poll list.
+ 		 */
+ 
+-		if (priv->tx_skb && !tx_ctrl_ct)
++		if (priv->tx_skb && !tx_ctrl_ct) {
++			nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0);
+ 			napi_reschedule(napi);
++		}
+ 	}
+ 
+ 	return work_done;
+diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c
+index 01fccec632ec..466939f8f0cf 100644
+--- a/drivers/net/ethernet/marvell/mvneta_bm.c
++++ b/drivers/net/ethernet/marvell/mvneta_bm.c
+@@ -189,6 +189,7 @@ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
+ 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ 		hwbm_pool->construct = mvneta_bm_construct;
+ 		hwbm_pool->priv = new_pool;
++		spin_lock_init(&hwbm_pool->lock);
+ 
+ 		/* Create new pool */
+ 		err = mvneta_bm_pool_create(priv, new_pool);
+diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
+index 0e758bcb26b0..1ca796316173 100644
+--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
++++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
+@@ -2727,7 +2727,7 @@ static int ofdpa_port_obj_fib4_add(struct rocker_port *rocker_port,
+ 
+ 	return ofdpa_port_fib_ipv4(ofdpa_port, trans,
+ 				   htonl(fib4->dst), fib4->dst_len,
+-				   &fib4->fi, fib4->tb_id, 0);
++				   fib4->fi, fib4->tb_id, 0);
+ }
+ 
+ static int ofdpa_port_obj_fib4_del(struct rocker_port *rocker_port,
+@@ -2737,7 +2737,7 @@ static int ofdpa_port_obj_fib4_del(struct rocker_port *rocker_port,
+ 
+ 	return ofdpa_port_fib_ipv4(ofdpa_port, NULL,
+ 				   htonl(fib4->dst), fib4->dst_len,
+-				   &fib4->fi, fib4->tb_id,
++				   fib4->fi, fib4->tb_id,
+ 				   OFDPA_OP_FLAG_REMOVE);
+ }
+ 
+diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
+index 1681084cc96f..1f309127457d 100644
+--- a/drivers/net/ethernet/sfc/ef10.c
++++ b/drivers/net/ethernet/sfc/ef10.c
+@@ -619,6 +619,17 @@ fail:
+ 	return rc;
+ }
+ 
++static void efx_ef10_forget_old_piobufs(struct efx_nic *efx)
++{
++	struct efx_channel *channel;
++	struct efx_tx_queue *tx_queue;
++
++	/* All our existing PIO buffers went away */
++	efx_for_each_channel(channel, efx)
++		efx_for_each_channel_tx_queue(tx_queue, channel)
++			tx_queue->piobuf = NULL;
++}
++
+ #else /* !EFX_USE_PIO */
+ 
+ static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n)
+@@ -635,6 +646,10 @@ static void efx_ef10_free_piobufs(struct efx_nic *efx)
+ {
+ }
+ 
++static void efx_ef10_forget_old_piobufs(struct efx_nic *efx)
++{
++}
++
+ #endif /* EFX_USE_PIO */
+ 
+ static void efx_ef10_remove(struct efx_nic *efx)
+@@ -1018,6 +1033,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
+ 	nic_data->must_realloc_vis = true;
+ 	nic_data->must_restore_filters = true;
+ 	nic_data->must_restore_piobufs = true;
++	efx_ef10_forget_old_piobufs(efx);
+ 	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+ 
+ 	/* Driver-created vswitches and vports must be re-created */
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+index 06704ca6f9ca..8683a21690b5 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+@@ -209,7 +209,7 @@ int stmmac_mdio_register(struct net_device *ndev)
+ 		return -ENOMEM;
+ 
+ 	if (mdio_bus_data->irqs)
+-		memcpy(new_bus->irq, mdio_bus_data, sizeof(new_bus->irq));
++		memcpy(new_bus->irq, mdio_bus_data->irqs, sizeof(new_bus->irq));
+ 
+ #ifdef CONFIG_OF
+ 	if (priv->device->of_node)
+diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
+index 7b0a644122eb..9fcb4898fb68 100644
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -336,15 +336,15 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+ 
+ 	/* Need Geneve and inner Ethernet header to be present */
+ 	if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
+-		goto error;
++		goto drop;
+ 
+ 	/* Return packets with reserved bits set */
+ 	geneveh = geneve_hdr(skb);
+ 	if (unlikely(geneveh->ver != GENEVE_VER))
+-		goto error;
++		goto drop;
+ 
+ 	if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
+-		goto error;
++		goto drop;
+ 
+ 	gs = rcu_dereference_sk_user_data(sk);
+ 	if (!gs)
+@@ -367,10 +367,6 @@ drop:
+ 	/* Consume bad packet */
+ 	kfree_skb(skb);
+ 	return 0;
+-
+-error:
+-	/* Let the UDP layer deal with the skb */
+-	return 1;
+ }
+ 
+ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 92eaab95ae2b..9e803bbcc0b6 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1645,7 +1645,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ 	if (tb_sa[MACSEC_SA_ATTR_ACTIVE])
+ 		rx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]);
+ 
+-	nla_memcpy(rx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEY], MACSEC_KEYID_LEN);
++	nla_memcpy(rx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN);
+ 	rx_sa->sc = rx_sc;
+ 	rcu_assign_pointer(rx_sc->sa[assoc_num], rx_sa);
+ 
+@@ -1784,7 +1784,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+ 		return -ENOMEM;
+ 	}
+ 
+-	nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEY], MACSEC_KEYID_LEN);
++	nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN);
+ 
+ 	spin_lock_bh(&tx_sa->lock);
+ 	tx_sa->next_pn = nla_get_u32(tb_sa[MACSEC_SA_ATTR_PN]);
+diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
+index a0f64cba86ba..2ace126533cd 100644
+--- a/drivers/net/team/team.c
++++ b/drivers/net/team/team.c
+@@ -990,7 +990,7 @@ static void team_port_disable(struct team *team,
+ #define TEAM_ENC_FEATURES	(NETIF_F_HW_CSUM | NETIF_F_SG | \
+ 				 NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
+ 
+-static void __team_compute_features(struct team *team)
++static void ___team_compute_features(struct team *team)
+ {
+ 	struct team_port *port;
+ 	u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
+@@ -1021,15 +1021,20 @@ static void __team_compute_features(struct team *team)
+ 	team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ 	if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
+ 		team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
++}
+ 
++static void __team_compute_features(struct team *team)
++{
++	___team_compute_features(team);
+ 	netdev_change_features(team->dev);
+ }
+ 
+ static void team_compute_features(struct team *team)
+ {
+ 	mutex_lock(&team->lock);
+-	__team_compute_features(team);
++	___team_compute_features(team);
+ 	mutex_unlock(&team->lock);
++	netdev_change_features(team->dev);
+ }
+ 
+ static int team_port_enter(struct team *team, struct team_port *port)
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index 2c9e45f50edb..dda490542624 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -568,11 +568,13 @@ static void tun_detach_all(struct net_device *dev)
+ 	for (i = 0; i < n; i++) {
+ 		tfile = rtnl_dereference(tun->tfiles[i]);
+ 		BUG_ON(!tfile);
++		tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
+ 		tfile->socket.sk->sk_data_ready(tfile->socket.sk);
+ 		RCU_INIT_POINTER(tfile->tun, NULL);
+ 		--tun->numqueues;
+ 	}
+ 	list_for_each_entry(tfile, &tun->disabled, next) {
++		tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
+ 		tfile->socket.sk->sk_data_ready(tfile->socket.sk);
+ 		RCU_INIT_POINTER(tfile->tun, NULL);
+ 	}
+@@ -628,6 +630,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
+ 			goto out;
+ 	}
+ 	tfile->queue_index = tun->numqueues;
++	tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
+ 	rcu_assign_pointer(tfile->tun, tun);
+ 	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
+ 	tun->numqueues++;
+@@ -1425,9 +1428,6 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
+ 	if (!iov_iter_count(to))
+ 		return 0;
+ 
+-	if (tun->dev->reg_state != NETREG_REGISTERED)
+-		return -EIO;
+-
+ 	/* Read frames from queue */
+ 	skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0,
+ 				  &peeked, &off, &err);
+diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
+index 8ac261ab7d7d..7e29b55015d0 100644
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -1262,7 +1262,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
+ 
+ 	/* Need Vxlan and inner Ethernet header to be present */
+ 	if (!pskb_may_pull(skb, VXLAN_HLEN))
+-		return 1;
++		goto drop;
+ 
+ 	unparsed = *vxlan_hdr(skb);
+ 	/* VNI flag always required to be set */
+@@ -1271,7 +1271,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
+ 			   ntohl(vxlan_hdr(skb)->vx_flags),
+ 			   ntohl(vxlan_hdr(skb)->vx_vni));
+ 		/* Return non vxlan pkt */
+-		return 1;
++		goto drop;
+ 	}
+ 	unparsed.vx_flags &= ~VXLAN_HF_VNI;
+ 	unparsed.vx_vni &= ~VXLAN_VNI_MASK;
+@@ -2959,6 +2959,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
+ 	if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
+ 		conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+ 
++	if (tb[IFLA_MTU])
++		conf.mtu = nla_get_u32(tb[IFLA_MTU]);
++
+ 	err = vxlan_dev_configure(src_net, dev, &conf);
+ 	switch (err) {
+ 	case -ENODEV:
+diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
+index f70090897fdf..0e537fdc1d08 100644
+--- a/drivers/perf/arm_pmu.c
++++ b/drivers/perf/arm_pmu.c
+@@ -987,9 +987,6 @@ int arm_pmu_device_probe(struct platform_device *pdev,
+ 
+ 	armpmu_init(pmu);
+ 
+-	if (!__oprofile_cpu_pmu)
+-		__oprofile_cpu_pmu = pmu;
+-
+ 	pmu->plat_device = pdev;
+ 
+ 	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
+@@ -1025,6 +1022,9 @@ int arm_pmu_device_probe(struct platform_device *pdev,
+ 	if (ret)
+ 		goto out_destroy;
+ 
++	if (!__oprofile_cpu_pmu)
++		__oprofile_cpu_pmu = pmu;
++
+ 	pr_info("enabled with %s PMU driver, %d counters available\n",
+ 			pmu->name, pmu->num_events);
+ 
+diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+index 6ab8c3ccdeea..fba2dd99ee95 100644
+--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
++++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+@@ -1256,9 +1256,10 @@ static void mtk_eint_irq_handler(struct irq_desc *desc)
+ 	const struct mtk_desc_pin *pin;
+ 
+ 	chained_irq_enter(chip, desc);
+-	for (eint_num = 0; eint_num < pctl->devdata->ap_num; eint_num += 32) {
++	for (eint_num = 0;
++	     eint_num < pctl->devdata->ap_num;
++	     eint_num += 32, reg += 4) {
+ 		status = readl(reg);
+-		reg += 4;
+ 		while (status) {
+ 			offset = __ffs(status);
+ 			index = eint_num + offset;
+diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
+index 3408578b08d6..ff41c310c900 100644
+--- a/drivers/scsi/scsi_devinfo.c
++++ b/drivers/scsi/scsi_devinfo.c
+@@ -230,6 +230,7 @@ static struct {
+ 	{"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
+ 	{"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
+ 	{"Promise", "", NULL, BLIST_SPARSELUN},
++	{"QEMU", "QEMU CD-ROM", NULL, BLIST_SKIP_VPD_PAGES},
+ 	{"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
+ 	{"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
+ 	{"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index 8106515d1df8..f704d02645f1 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -911,9 +911,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
+ 	}
+ 
+ 	/*
+-	 * If we finished all bytes in the request we are done now.
++	 * special case: failed zero length commands always need to
++	 * drop down into the retry code. Otherwise, if we finished
++	 * all bytes in the request we are done now.
+ 	 */
+-	if (!scsi_end_request(req, error, good_bytes, 0))
++	if (!(blk_rq_bytes(req) == 0 && error) &&
++	    !scsi_end_request(req, error, good_bytes, 0))
+ 		return;
+ 
+ 	/*
+diff --git a/fs/dcache.c b/fs/dcache.c
+index d5ecc6e477da..44008e3fafc4 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -1619,7 +1619,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
+ 	struct dentry *dentry = __d_alloc(parent->d_sb, name);
+ 	if (!dentry)
+ 		return NULL;
+-
++	dentry->d_flags |= DCACHE_RCUACCESS;
+ 	spin_lock(&parent->d_lock);
+ 	/*
+ 	 * don't need child lock because it is not subject
+@@ -2338,7 +2338,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
+ {
+ 	BUG_ON(!d_unhashed(entry));
+ 	hlist_bl_lock(b);
+-	entry->d_flags |= DCACHE_RCUACCESS;
+ 	hlist_bl_add_head_rcu(&entry->d_hash, b);
+ 	hlist_bl_unlock(b);
+ }
+@@ -2637,6 +2636,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
+ 	/* ... and switch them in the tree */
+ 	if (IS_ROOT(dentry)) {
+ 		/* splicing a tree */
++		dentry->d_flags |= DCACHE_RCUACCESS;
+ 		dentry->d_parent = target->d_parent;
+ 		target->d_parent = target;
+ 		list_del_init(&target->d_child);
+diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
+index 866bb18efefe..e818f5ac7a26 100644
+--- a/fs/ecryptfs/kthread.c
++++ b/fs/ecryptfs/kthread.c
+@@ -25,6 +25,7 @@
+ #include <linux/slab.h>
+ #include <linux/wait.h>
+ #include <linux/mount.h>
++#include <linux/file.h>
+ #include "ecryptfs_kernel.h"
+ 
+ struct ecryptfs_open_req {
+@@ -147,7 +148,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
+ 	flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR;
+ 	(*lower_file) = dentry_open(&req.path, flags, cred);
+ 	if (!IS_ERR(*lower_file))
+-		goto out;
++		goto have_file;
+ 	if ((flags & O_ACCMODE) == O_RDONLY) {
+ 		rc = PTR_ERR((*lower_file));
+ 		goto out;
+@@ -165,8 +166,16 @@ int ecryptfs_privileged_open(struct file **lower_file,
+ 	mutex_unlock(&ecryptfs_kthread_ctl.mux);
+ 	wake_up(&ecryptfs_kthread_ctl.wait);
+ 	wait_for_completion(&req.done);
+-	if (IS_ERR(*lower_file))
++	if (IS_ERR(*lower_file)) {
+ 		rc = PTR_ERR(*lower_file);
++		goto out;
++	}
++have_file:
++	if ((*lower_file)->f_op->mmap == NULL) {
++		fput(*lower_file);
++		*lower_file = NULL;
++		rc = -EMEDIUMTYPE;
++	}
+ out:
+ 	return rc;
+ }
+diff --git a/fs/proc/root.c b/fs/proc/root.c
+index 361ab4ee42fc..ec649c92d270 100644
+--- a/fs/proc/root.c
++++ b/fs/proc/root.c
+@@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
+ 	if (IS_ERR(sb))
+ 		return ERR_CAST(sb);
+ 
++	/*
++	 * procfs isn't actually a stacking filesystem; however, there is
++	 * too much magic going on inside it to permit stacking things on
++	 * top of it
++	 */
++	sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
++
+ 	if (!proc_parse_options(options, ns)) {
+ 		deactivate_locked_super(sb);
+ 		return ERR_PTR(-EINVAL);
+diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
+index d5d798b35c1f..e98425058f20 100644
+--- a/include/linux/irqchip/arm-gic-v3.h
++++ b/include/linux/irqchip/arm-gic-v3.h
+@@ -301,7 +301,7 @@
+ #define ICC_SGI1R_AFFINITY_1_SHIFT	16
+ #define ICC_SGI1R_AFFINITY_1_MASK	(0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
+ #define ICC_SGI1R_SGI_ID_SHIFT		24
+-#define ICC_SGI1R_SGI_ID_MASK		(0xff << ICC_SGI1R_SGI_ID_SHIFT)
++#define ICC_SGI1R_SGI_ID_MASK		(0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
+ #define ICC_SGI1R_AFFINITY_2_SHIFT	32
+ #define ICC_SGI1R_AFFINITY_2_MASK	(0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
+ #define ICC_SGI1R_IRQ_ROUTING_MODE_BIT	40
+diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
+index 80a305b85323..4dd9306c9d56 100644
+--- a/include/linux/netfilter/x_tables.h
++++ b/include/linux/netfilter/x_tables.h
+@@ -242,11 +242,18 @@ void xt_unregister_match(struct xt_match *target);
+ int xt_register_matches(struct xt_match *match, unsigned int n);
+ void xt_unregister_matches(struct xt_match *match, unsigned int n);
+ 
++int xt_check_entry_offsets(const void *base, const char *elems,
++			   unsigned int target_offset,
++			   unsigned int next_offset);
++
+ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
+ 		   bool inv_proto);
+ int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
+ 		    bool inv_proto);
+ 
++void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
++				 struct xt_counters_info *info, bool compat);
++
+ struct xt_table *xt_register_table(struct net *net,
+ 				   const struct xt_table *table,
+ 				   struct xt_table_info *bootstrap,
+@@ -480,7 +487,7 @@ void xt_compat_init_offsets(u_int8_t af, unsigned int number);
+ int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
+ 
+ int xt_compat_match_offset(const struct xt_match *match);
+-int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
++void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+ 			      unsigned int *size);
+ int xt_compat_match_to_user(const struct xt_entry_match *m,
+ 			    void __user **dstptr, unsigned int *size);
+@@ -490,6 +497,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+ 				unsigned int *size);
+ int xt_compat_target_to_user(const struct xt_entry_target *t,
+ 			     void __user **dstptr, unsigned int *size);
++int xt_compat_check_entry_offsets(const void *base, const char *elems,
++				  unsigned int target_offset,
++				  unsigned int next_offset);
+ 
+ #endif /* CONFIG_COMPAT */
+ #endif /* _X_TABLES_H */
+diff --git a/include/net/switchdev.h b/include/net/switchdev.h
+index 51d77b2ce2b2..985619a59323 100644
+--- a/include/net/switchdev.h
++++ b/include/net/switchdev.h
+@@ -97,7 +97,7 @@ struct switchdev_obj_ipv4_fib {
+ 	struct switchdev_obj obj;
+ 	u32 dst;
+ 	int dst_len;
+-	struct fib_info fi;
++	struct fib_info *fi;
+ 	u8 tos;
+ 	u8 type;
+ 	u32 nlflags;
+diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
+index d5e38c73377c..e4f048ee7043 100644
+--- a/include/uapi/linux/libc-compat.h
++++ b/include/uapi/linux/libc-compat.h
+@@ -52,7 +52,7 @@
+ #if defined(__GLIBC__)
+ 
+ /* Coordinate with glibc net/if.h header. */
+-#if defined(_NET_IF_H)
++#if defined(_NET_IF_H) && defined(__USE_MISC)
+ 
+ /* GLIBC headers included first so don't define anything
+  * that would already be defined. */
+diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
+index 8f94ca1860cf..b2aefa2d123a 100644
+--- a/kernel/bpf/inode.c
++++ b/kernel/bpf/inode.c
+@@ -378,7 +378,7 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
+ static struct dentry *bpf_mount(struct file_system_type *type, int flags,
+ 				const char *dev_name, void *data)
+ {
+-	return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super);
++	return mount_nodev(type, flags, data, bpf_fill_super);
+ }
+ 
+ static struct file_system_type bpf_fs_type = {
+@@ -386,7 +386,6 @@ static struct file_system_type bpf_fs_type = {
+ 	.name		= "bpf",
+ 	.mount		= bpf_mount,
+ 	.kill_sb	= kill_litter_super,
+-	.fs_flags	= FS_USERNS_MOUNT,
+ };
+ 
+ MODULE_ALIAS_FS("bpf");
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index d1f7149f8704..11546a6ed5df 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3047,7 +3047,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
+ static inline void schedule_debug(struct task_struct *prev)
+ {
+ #ifdef CONFIG_SCHED_STACK_END_CHECK
+-	BUG_ON(task_stack_end_corrupted(prev));
++	if (task_stack_end_corrupted(prev))
++		panic("corrupted stack end detected inside scheduler\n");
+ #endif
+ 
+ 	if (unlikely(in_atomic_preempt_off())) {
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index 3e4ffb3ace5f..d0289414cdeb 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -194,7 +194,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
+ 	if (unlikely(index >= array->map.max_entries))
+ 		return -E2BIG;
+ 
+-	file = (struct file *)array->ptrs[index];
++	file = READ_ONCE(array->ptrs[index]);
+ 	if (unlikely(!file))
+ 		return -ENOENT;
+ 
+@@ -238,7 +238,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
+ 	if (unlikely(index >= array->map.max_entries))
+ 		return -E2BIG;
+ 
+-	file = (struct file *)array->ptrs[index];
++	file = READ_ONCE(array->ptrs[index]);
+ 	if (unlikely(!file))
+ 		return -ENOENT;
+ 
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index fe787f5c41bd..a2e79b83920f 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2877,6 +2877,7 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
+ 	 * ordering is imposed by list_lru_node->lock taken by
+ 	 * memcg_drain_all_list_lrus().
+ 	 */
++	rcu_read_lock(); /* can be called from css_free w/o cgroup_mutex */
+ 	css_for_each_descendant_pre(css, &memcg->css) {
+ 		child = mem_cgroup_from_css(css);
+ 		BUG_ON(child->kmemcg_id != kmemcg_id);
+@@ -2884,6 +2885,8 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
+ 		if (!memcg->use_hierarchy)
+ 			break;
+ 	}
++	rcu_read_unlock();
++
+ 	memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id);
+ 
+ 	memcg_free_cache_id(kmemcg_id);
+diff --git a/mm/swap_state.c b/mm/swap_state.c
+index 366ce3518703..1155a6831a3d 100644
+--- a/mm/swap_state.c
++++ b/mm/swap_state.c
+@@ -252,7 +252,10 @@ static inline void free_swap_cache(struct page *page)
+ void free_page_and_swap_cache(struct page *page)
+ {
+ 	free_swap_cache(page);
+-	put_page(page);
++	if (is_huge_zero_page(page))
++		put_huge_zero_page();
++	else
++		put_page(page);
+ }
+ 
+ /*
+diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
+index dcea4f4c62b3..c18080ad4085 100644
+--- a/net/bridge/br_fdb.c
++++ b/net/bridge/br_fdb.c
+@@ -279,6 +279,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
+ 	 * change from under us.
+ 	 */
+ 	list_for_each_entry(v, &vg->vlan_list, vlist) {
++		if (!br_vlan_should_use(v))
++			continue;
+ 		f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
+ 		if (f && f->is_local && !f->dst)
+ 			fdb_delete_local(br, NULL, f);
+diff --git a/net/core/hwbm.c b/net/core/hwbm.c
+index 941c28486896..2cab489ae62e 100644
+--- a/net/core/hwbm.c
++++ b/net/core/hwbm.c
+@@ -55,18 +55,21 @@ int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
+ 	spin_lock_irqsave(&bm_pool->lock, flags);
+ 	if (bm_pool->buf_num == bm_pool->size) {
+ 		pr_warn("pool already filled\n");
++		spin_unlock_irqrestore(&bm_pool->lock, flags);
+ 		return bm_pool->buf_num;
+ 	}
+ 
+ 	if (buf_num + bm_pool->buf_num > bm_pool->size) {
+ 		pr_warn("cannot allocate %d buffers for pool\n",
+ 			buf_num);
++		spin_unlock_irqrestore(&bm_pool->lock, flags);
+ 		return 0;
+ 	}
+ 
+ 	if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
+ 		pr_warn("Adding %d buffers to the %d current buffers will overflow\n",
+ 			buf_num,  bm_pool->buf_num);
++		spin_unlock_irqrestore(&bm_pool->lock, flags);
+ 		return 0;
+ 	}
+ 
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 9e481992dbae..7ad0e567cf10 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -1660,6 +1660,14 @@ static __net_init int inet_init_net(struct net *net)
+ 	 */
+ 	net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
+ 	net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
++
++	/* Default values for sysctl-controlled parameters.
++	 * We set them here, in case sysctl is not compiled.
++	 */
++	net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
++	net->ipv4.sysctl_ip_dynaddr = 0;
++	net->ipv4.sysctl_ip_early_demux = 1;
++
+ 	return 0;
+ }
+ 
+diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
+index 4133b0f513af..85d60c69bfe3 100644
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -367,6 +367,18 @@ static inline bool unconditional(const struct arpt_entry *e)
+ 	       memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
+ }
+ 
++static bool find_jump_target(const struct xt_table_info *t,
++			     const struct arpt_entry *target)
++{
++	struct arpt_entry *iter;
++
++	xt_entry_foreach(iter, t->entries, t->size) {
++		 if (iter == target)
++			return true;
++	}
++	return false;
++}
++
+ /* Figures out from what hook each rule can be called: returns 0 if
+  * there are loops.  Puts hook bitmask in comefrom.
+  */
+@@ -439,6 +451,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
+ 				size = e->next_offset;
+ 				e = (struct arpt_entry *)
+ 					(entry0 + pos + size);
++				if (pos + size >= newinfo->size)
++					return 0;
+ 				e->counters.pcnt = pos;
+ 				pos += size;
+ 			} else {
+@@ -458,9 +472,15 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
+ 					/* This a jump; chase it. */
+ 					duprintf("Jump rule %u -> %u\n",
+ 						 pos, newpos);
++					e = (struct arpt_entry *)
++						(entry0 + newpos);
++					if (!find_jump_target(newinfo, e))
++						return 0;
+ 				} else {
+ 					/* ... this is a fallthru */
+ 					newpos = pos + e->next_offset;
++					if (newpos >= newinfo->size)
++						return 0;
+ 				}
+ 				e = (struct arpt_entry *)
+ 					(entry0 + newpos);
+@@ -474,23 +494,6 @@ next:
+ 	return 1;
+ }
+ 
+-static inline int check_entry(const struct arpt_entry *e)
+-{
+-	const struct xt_entry_target *t;
+-
+-	if (!arp_checkentry(&e->arp))
+-		return -EINVAL;
+-
+-	if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
+-		return -EINVAL;
+-
+-	t = arpt_get_target_c(e);
+-	if (e->target_offset + t->u.target_size > e->next_offset)
+-		return -EINVAL;
+-
+-	return 0;
+-}
+-
+ static inline int check_target(struct arpt_entry *e, const char *name)
+ {
+ 	struct xt_entry_target *t = arpt_get_target(e);
+@@ -586,7 +589,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
+ 		return -EINVAL;
+ 	}
+ 
+-	err = check_entry(e);
++	if (!arp_checkentry(&e->arp))
++		return -EINVAL;
++
++	err = xt_check_entry_offsets(e, e->elems, e->target_offset,
++				     e->next_offset);
+ 	if (err)
+ 		return err;
+ 
+@@ -691,10 +698,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
+ 		}
+ 	}
+ 
+-	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
+-		duprintf("Looping hook\n");
++	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
+ 		return -ELOOP;
+-	}
+ 
+ 	/* Finally, each sanity check must pass */
+ 	i = 0;
+@@ -1126,55 +1131,17 @@ static int do_add_counters(struct net *net, const void __user *user,
+ 	unsigned int i;
+ 	struct xt_counters_info tmp;
+ 	struct xt_counters *paddc;
+-	unsigned int num_counters;
+-	const char *name;
+-	int size;
+-	void *ptmp;
+ 	struct xt_table *t;
+ 	const struct xt_table_info *private;
+ 	int ret = 0;
+ 	struct arpt_entry *iter;
+ 	unsigned int addend;
+-#ifdef CONFIG_COMPAT
+-	struct compat_xt_counters_info compat_tmp;
+ 
+-	if (compat) {
+-		ptmp = &compat_tmp;
+-		size = sizeof(struct compat_xt_counters_info);
+-	} else
+-#endif
+-	{
+-		ptmp = &tmp;
+-		size = sizeof(struct xt_counters_info);
+-	}
++	paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
++	if (IS_ERR(paddc))
++		return PTR_ERR(paddc);
+ 
+-	if (copy_from_user(ptmp, user, size) != 0)
+-		return -EFAULT;
+-
+-#ifdef CONFIG_COMPAT
+-	if (compat) {
+-		num_counters = compat_tmp.num_counters;
+-		name = compat_tmp.name;
+-	} else
+-#endif
+-	{
+-		num_counters = tmp.num_counters;
+-		name = tmp.name;
+-	}
+-
+-	if (len != size + num_counters * sizeof(struct xt_counters))
+-		return -EINVAL;
+-
+-	paddc = vmalloc(len - size);
+-	if (!paddc)
+-		return -ENOMEM;
+-
+-	if (copy_from_user(paddc, user + size, len - size) != 0) {
+-		ret = -EFAULT;
+-		goto free;
+-	}
+-
+-	t = xt_find_table_lock(net, NFPROTO_ARP, name);
++	t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
+ 	if (IS_ERR_OR_NULL(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+ 		goto free;
+@@ -1182,7 +1149,7 @@ static int do_add_counters(struct net *net, const void __user *user,
+ 
+ 	local_bh_disable();
+ 	private = t->private;
+-	if (private->number != num_counters) {
++	if (private->number != tmp.num_counters) {
+ 		ret = -EINVAL;
+ 		goto unlock_up_free;
+ 	}
+@@ -1209,6 +1176,18 @@ static int do_add_counters(struct net *net, const void __user *user,
+ }
+ 
+ #ifdef CONFIG_COMPAT
++struct compat_arpt_replace {
++	char				name[XT_TABLE_MAXNAMELEN];
++	u32				valid_hooks;
++	u32				num_entries;
++	u32				size;
++	u32				hook_entry[NF_ARP_NUMHOOKS];
++	u32				underflow[NF_ARP_NUMHOOKS];
++	u32				num_counters;
++	compat_uptr_t			counters;
++	struct compat_arpt_entry	entries[0];
++};
++
+ static inline void compat_release_entry(struct compat_arpt_entry *e)
+ {
+ 	struct xt_entry_target *t;
+@@ -1217,20 +1196,17 @@ static inline void compat_release_entry(struct compat_arpt_entry *e)
+ 	module_put(t->u.kernel.target->me);
+ }
+ 
+-static inline int
++static int
+ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
+ 				  struct xt_table_info *newinfo,
+ 				  unsigned int *size,
+ 				  const unsigned char *base,
+-				  const unsigned char *limit,
+-				  const unsigned int *hook_entries,
+-				  const unsigned int *underflows,
+-				  const char *name)
++				  const unsigned char *limit)
+ {
+ 	struct xt_entry_target *t;
+ 	struct xt_target *target;
+ 	unsigned int entry_offset;
+-	int ret, off, h;
++	int ret, off;
+ 
+ 	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ 	if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
+@@ -1247,8 +1223,11 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
+ 		return -EINVAL;
+ 	}
+ 
+-	/* For purposes of check_entry casting the compat entry is fine */
+-	ret = check_entry((struct arpt_entry *)e);
++	if (!arp_checkentry(&e->arp))
++		return -EINVAL;
++
++	ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset,
++					    e->next_offset);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1272,17 +1251,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
+ 	if (ret)
+ 		goto release_target;
+ 
+-	/* Check hooks & underflows */
+-	for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+-		if ((unsigned char *)e - base == hook_entries[h])
+-			newinfo->hook_entry[h] = hook_entries[h];
+-		if ((unsigned char *)e - base == underflows[h])
+-			newinfo->underflow[h] = underflows[h];
+-	}
+-
+-	/* Clear counters and comefrom */
+-	memset(&e->counters, 0, sizeof(e->counters));
+-	e->comefrom = 0;
+ 	return 0;
+ 
+ release_target:
+@@ -1291,18 +1259,17 @@ out:
+ 	return ret;
+ }
+ 
+-static int
++static void
+ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
+-			    unsigned int *size, const char *name,
++			    unsigned int *size,
+ 			    struct xt_table_info *newinfo, unsigned char *base)
+ {
+ 	struct xt_entry_target *t;
+ 	struct xt_target *target;
+ 	struct arpt_entry *de;
+ 	unsigned int origsize;
+-	int ret, h;
++	int h;
+ 
+-	ret = 0;
+ 	origsize = *size;
+ 	de = (struct arpt_entry *)*dstptr;
+ 	memcpy(de, e, sizeof(struct arpt_entry));
+@@ -1323,148 +1290,82 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
+ 		if ((unsigned char *)de - base < newinfo->underflow[h])
+ 			newinfo->underflow[h] -= origsize - *size;
+ 	}
+-	return ret;
+ }
+ 
+-static int translate_compat_table(const char *name,
+-				  unsigned int valid_hooks,
+-				  struct xt_table_info **pinfo,
++static int translate_compat_table(struct xt_table_info **pinfo,
+ 				  void **pentry0,
+-				  unsigned int total_size,
+-				  unsigned int number,
+-				  unsigned int *hook_entries,
+-				  unsigned int *underflows)
++				  const struct compat_arpt_replace *compatr)
+ {
+ 	unsigned int i, j;
+ 	struct xt_table_info *newinfo, *info;
+ 	void *pos, *entry0, *entry1;
+ 	struct compat_arpt_entry *iter0;
+-	struct arpt_entry *iter1;
++	struct arpt_replace repl;
+ 	unsigned int size;
+ 	int ret = 0;
+ 
+ 	info = *pinfo;
+ 	entry0 = *pentry0;
+-	size = total_size;
+-	info->number = number;
+-
+-	/* Init all hooks to impossible value. */
+-	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+-		info->hook_entry[i] = 0xFFFFFFFF;
+-		info->underflow[i] = 0xFFFFFFFF;
+-	}
++	size = compatr->size;
++	info->number = compatr->num_entries;
+ 
+ 	duprintf("translate_compat_table: size %u\n", info->size);
+ 	j = 0;
+ 	xt_compat_lock(NFPROTO_ARP);
+-	xt_compat_init_offsets(NFPROTO_ARP, number);
++	xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+ 	/* Walk through entries, checking offsets. */
+-	xt_entry_foreach(iter0, entry0, total_size) {
++	xt_entry_foreach(iter0, entry0, compatr->size) {
+ 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ 							entry0,
+-							entry0 + total_size,
+-							hook_entries,
+-							underflows,
+-							name);
++							entry0 + compatr->size);
+ 		if (ret != 0)
+ 			goto out_unlock;
+ 		++j;
+ 	}
+ 
+ 	ret = -EINVAL;
+-	if (j != number) {
++	if (j != compatr->num_entries) {
+ 		duprintf("translate_compat_table: %u not %u entries\n",
+-			 j, number);
++			 j, compatr->num_entries);
+ 		goto out_unlock;
+ 	}
+ 
+-	/* Check hooks all assigned */
+-	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+-		/* Only hooks which are valid */
+-		if (!(valid_hooks & (1 << i)))
+-			continue;
+-		if (info->hook_entry[i] == 0xFFFFFFFF) {
+-			duprintf("Invalid hook entry %u %u\n",
+-				 i, hook_entries[i]);
+-			goto out_unlock;
+-		}
+-		if (info->underflow[i] == 0xFFFFFFFF) {
+-			duprintf("Invalid underflow %u %u\n",
+-				 i, underflows[i]);
+-			goto out_unlock;
+-		}
+-	}
+-
+ 	ret = -ENOMEM;
+ 	newinfo = xt_alloc_table_info(size);
+ 	if (!newinfo)
+ 		goto out_unlock;
+ 
+-	newinfo->number = number;
++	newinfo->number = compatr->num_entries;
+ 	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ 		newinfo->hook_entry[i] = info->hook_entry[i];
+ 		newinfo->underflow[i] = info->underflow[i];
+ 	}
+ 	entry1 = newinfo->entries;
+ 	pos = entry1;
+-	size = total_size;
+-	xt_entry_foreach(iter0, entry0, total_size) {
+-		ret = compat_copy_entry_from_user(iter0, &pos, &size,
+-						  name, newinfo, entry1);
+-		if (ret != 0)
+-			break;
+-	}
++	size = compatr->size;
++	xt_entry_foreach(iter0, entry0, compatr->size)
++		compat_copy_entry_from_user(iter0, &pos, &size,
++					    newinfo, entry1);
++
++	/* all module references in entry0 are now gone */
++
+ 	xt_compat_flush_offsets(NFPROTO_ARP);
+ 	xt_compat_unlock(NFPROTO_ARP);
+-	if (ret)
+-		goto free_newinfo;
+ 
+-	ret = -ELOOP;
+-	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+-		goto free_newinfo;
++	memcpy(&repl, compatr, sizeof(*compatr));
+ 
+-	i = 0;
+-	xt_entry_foreach(iter1, entry1, newinfo->size) {
+-		iter1->counters.pcnt = xt_percpu_counter_alloc();
+-		if (IS_ERR_VALUE(iter1->counters.pcnt)) {
+-			ret = -ENOMEM;
+-			break;
+-		}
+-
+-		ret = check_target(iter1, name);
+-		if (ret != 0) {
+-			xt_percpu_counter_free(iter1->counters.pcnt);
+-			break;
+-		}
+-		++i;
+-		if (strcmp(arpt_get_target(iter1)->u.user.name,
+-		    XT_ERROR_TARGET) == 0)
+-			++newinfo->stacksize;
+-	}
+-	if (ret) {
+-		/*
+-		 * The first i matches need cleanup_entry (calls ->destroy)
+-		 * because they had called ->check already. The other j-i
+-		 * entries need only release.
+-		 */
+-		int skip = i;
+-		j -= i;
+-		xt_entry_foreach(iter0, entry0, newinfo->size) {
+-			if (skip-- > 0)
+-				continue;
+-			if (j-- == 0)
+-				break;
+-			compat_release_entry(iter0);
+-		}
+-		xt_entry_foreach(iter1, entry1, newinfo->size) {
+-			if (i-- == 0)
+-				break;
+-			cleanup_entry(iter1);
+-		}
+-		xt_free_table_info(newinfo);
+-		return ret;
++	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
++		repl.hook_entry[i] = newinfo->hook_entry[i];
++		repl.underflow[i] = newinfo->underflow[i];
+ 	}
+ 
++	repl.num_counters = 0;
++	repl.counters = NULL;
++	repl.size = newinfo->size;
++	ret = translate_table(newinfo, entry1, &repl);
++	if (ret)
++		goto free_newinfo;
++
+ 	*pinfo = newinfo;
+ 	*pentry0 = entry1;
+ 	xt_free_table_info(info);
+@@ -1472,31 +1373,18 @@ static int translate_compat_table(const char *name,
+ 
+ free_newinfo:
+ 	xt_free_table_info(newinfo);
+-out:
+-	xt_entry_foreach(iter0, entry0, total_size) {
++	return ret;
++out_unlock:
++	xt_compat_flush_offsets(NFPROTO_ARP);
++	xt_compat_unlock(NFPROTO_ARP);
++	xt_entry_foreach(iter0, entry0, compatr->size) {
+ 		if (j-- == 0)
+ 			break;
+ 		compat_release_entry(iter0);
+ 	}
+ 	return ret;
+-out_unlock:
+-	xt_compat_flush_offsets(NFPROTO_ARP);
+-	xt_compat_unlock(NFPROTO_ARP);
+-	goto out;
+ }
+ 
+-struct compat_arpt_replace {
+-	char				name[XT_TABLE_MAXNAMELEN];
+-	u32				valid_hooks;
+-	u32				num_entries;
+-	u32				size;
+-	u32				hook_entry[NF_ARP_NUMHOOKS];
+-	u32				underflow[NF_ARP_NUMHOOKS];
+-	u32				num_counters;
+-	compat_uptr_t			counters;
+-	struct compat_arpt_entry	entries[0];
+-};
+-
+ static int compat_do_replace(struct net *net, void __user *user,
+ 			     unsigned int len)
+ {
+@@ -1529,10 +1417,7 @@ static int compat_do_replace(struct net *net, void __user *user,
+ 		goto free_newinfo;
+ 	}
+ 
+-	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+-				     &newinfo, &loc_cpu_entry, tmp.size,
+-				     tmp.num_entries, tmp.hook_entry,
+-				     tmp.underflow);
++	ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp);
+ 	if (ret != 0)
+ 		goto free_newinfo;
+ 
+diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
+index 631c100a1338..0984ea3fcf14 100644
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -443,6 +443,18 @@ ipt_do_table(struct sk_buff *skb,
+ #endif
+ }
+ 
++static bool find_jump_target(const struct xt_table_info *t,
++			     const struct ipt_entry *target)
++{
++	struct ipt_entry *iter;
++
++	xt_entry_foreach(iter, t->entries, t->size) {
++		 if (iter == target)
++			return true;
++	}
++	return false;
++}
++
+ /* Figures out from what hook each rule can be called: returns 0 if
+    there are loops.  Puts hook bitmask in comefrom. */
+ static int
+@@ -520,6 +532,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
+ 				size = e->next_offset;
+ 				e = (struct ipt_entry *)
+ 					(entry0 + pos + size);
++				if (pos + size >= newinfo->size)
++					return 0;
+ 				e->counters.pcnt = pos;
+ 				pos += size;
+ 			} else {
+@@ -538,9 +552,15 @@ mark_source_chains(const struct xt_table_info *newinfo,
+ 					/* This a jump; chase it. */
+ 					duprintf("Jump rule %u -> %u\n",
+ 						 pos, newpos);
++					e = (struct ipt_entry *)
++						(entry0 + newpos);
++					if (!find_jump_target(newinfo, e))
++						return 0;
+ 				} else {
+ 					/* ... this is a fallthru */
+ 					newpos = pos + e->next_offset;
++					if (newpos >= newinfo->size)
++						return 0;
+ 				}
+ 				e = (struct ipt_entry *)
+ 					(entry0 + newpos);
+@@ -568,25 +588,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
+ }
+ 
+ static int
+-check_entry(const struct ipt_entry *e)
+-{
+-	const struct xt_entry_target *t;
+-
+-	if (!ip_checkentry(&e->ip))
+-		return -EINVAL;
+-
+-	if (e->target_offset + sizeof(struct xt_entry_target) >
+-	    e->next_offset)
+-		return -EINVAL;
+-
+-	t = ipt_get_target_c(e);
+-	if (e->target_offset + t->u.target_size > e->next_offset)
+-		return -EINVAL;
+-
+-	return 0;
+-}
+-
+-static int
+ check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
+ {
+ 	const struct ipt_ip *ip = par->entryinfo;
+@@ -750,7 +751,11 @@ check_entry_size_and_hooks(struct ipt_entry *e,
+ 		return -EINVAL;
+ 	}
+ 
+-	err = check_entry(e);
++	if (!ip_checkentry(&e->ip))
++		return -EINVAL;
++
++	err = xt_check_entry_offsets(e, e->elems, e->target_offset,
++				     e->next_offset);
+ 	if (err)
+ 		return err;
+ 
+@@ -1309,55 +1314,17 @@ do_add_counters(struct net *net, const void __user *user,
+ 	unsigned int i;
+ 	struct xt_counters_info tmp;
+ 	struct xt_counters *paddc;
+-	unsigned int num_counters;
+-	const char *name;
+-	int size;
+-	void *ptmp;
+ 	struct xt_table *t;
+ 	const struct xt_table_info *private;
+ 	int ret = 0;
+ 	struct ipt_entry *iter;
+ 	unsigned int addend;
+-#ifdef CONFIG_COMPAT
+-	struct compat_xt_counters_info compat_tmp;
+ 
+-	if (compat) {
+-		ptmp = &compat_tmp;
+-		size = sizeof(struct compat_xt_counters_info);
+-	} else
+-#endif
+-	{
+-		ptmp = &tmp;
+-		size = sizeof(struct xt_counters_info);
+-	}
+-
+-	if (copy_from_user(ptmp, user, size) != 0)
+-		return -EFAULT;
+-
+-#ifdef CONFIG_COMPAT
+-	if (compat) {
+-		num_counters = compat_tmp.num_counters;
+-		name = compat_tmp.name;
+-	} else
+-#endif
+-	{
+-		num_counters = tmp.num_counters;
+-		name = tmp.name;
+-	}
++	paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
++	if (IS_ERR(paddc))
++		return PTR_ERR(paddc);
+ 
+-	if (len != size + num_counters * sizeof(struct xt_counters))
+-		return -EINVAL;
+-
+-	paddc = vmalloc(len - size);
+-	if (!paddc)
+-		return -ENOMEM;
+-
+-	if (copy_from_user(paddc, user + size, len - size) != 0) {
+-		ret = -EFAULT;
+-		goto free;
+-	}
+-
+-	t = xt_find_table_lock(net, AF_INET, name);
++	t = xt_find_table_lock(net, AF_INET, tmp.name);
+ 	if (IS_ERR_OR_NULL(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+ 		goto free;
+@@ -1365,7 +1332,7 @@ do_add_counters(struct net *net, const void __user *user,
+ 
+ 	local_bh_disable();
+ 	private = t->private;
+-	if (private->number != num_counters) {
++	if (private->number != tmp.num_counters) {
+ 		ret = -EINVAL;
+ 		goto unlock_up_free;
+ 	}
+@@ -1444,7 +1411,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
+ 
+ static int
+ compat_find_calc_match(struct xt_entry_match *m,
+-		       const char *name,
+ 		       const struct ipt_ip *ip,
+ 		       int *size)
+ {
+@@ -1479,17 +1445,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
+ 				  struct xt_table_info *newinfo,
+ 				  unsigned int *size,
+ 				  const unsigned char *base,
+-				  const unsigned char *limit,
+-				  const unsigned int *hook_entries,
+-				  const unsigned int *underflows,
+-				  const char *name)
++				  const unsigned char *limit)
+ {
+ 	struct xt_entry_match *ematch;
+ 	struct xt_entry_target *t;
+ 	struct xt_target *target;
+ 	unsigned int entry_offset;
+ 	unsigned int j;
+-	int ret, off, h;
++	int ret, off;
+ 
+ 	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ 	if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
+@@ -1506,8 +1469,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
+ 		return -EINVAL;
+ 	}
+ 
+-	/* For purposes of check_entry casting the compat entry is fine */
+-	ret = check_entry((struct ipt_entry *)e);
++	if (!ip_checkentry(&e->ip))
++		return -EINVAL;
++
++	ret = xt_compat_check_entry_offsets(e, e->elems,
++					    e->target_offset, e->next_offset);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1515,7 +1481,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
+ 	entry_offset = (void *)e - (void *)base;
+ 	j = 0;
+ 	xt_ematch_foreach(ematch, e) {
+-		ret = compat_find_calc_match(ematch, name, &e->ip, &off);
++		ret = compat_find_calc_match(ematch, &e->ip, &off);
+ 		if (ret != 0)
+ 			goto release_matches;
+ 		++j;
+@@ -1538,17 +1504,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
+ 	if (ret)
+ 		goto out;
+ 
+-	/* Check hooks & underflows */
+-	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+-		if ((unsigned char *)e - base == hook_entries[h])
+-			newinfo->hook_entry[h] = hook_entries[h];
+-		if ((unsigned char *)e - base == underflows[h])
+-			newinfo->underflow[h] = underflows[h];
+-	}
+-
+-	/* Clear counters and comefrom */
+-	memset(&e->counters, 0, sizeof(e->counters));
+-	e->comefrom = 0;
+ 	return 0;
+ 
+ out:
+@@ -1562,19 +1517,18 @@ release_matches:
+ 	return ret;
+ }
+ 
+-static int
++static void
+ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
+-			    unsigned int *size, const char *name,
++			    unsigned int *size,
+ 			    struct xt_table_info *newinfo, unsigned char *base)
+ {
+ 	struct xt_entry_target *t;
+ 	struct xt_target *target;
+ 	struct ipt_entry *de;
+ 	unsigned int origsize;
+-	int ret, h;
++	int h;
+ 	struct xt_entry_match *ematch;
+ 
+-	ret = 0;
+ 	origsize = *size;
+ 	de = (struct ipt_entry *)*dstptr;
+ 	memcpy(de, e, sizeof(struct ipt_entry));
+@@ -1583,201 +1537,105 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
+ 	*dstptr += sizeof(struct ipt_entry);
+ 	*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+ 
+-	xt_ematch_foreach(ematch, e) {
+-		ret = xt_compat_match_from_user(ematch, dstptr, size);
+-		if (ret != 0)
+-			return ret;
+-	}
++	xt_ematch_foreach(ematch, e)
++		xt_compat_match_from_user(ematch, dstptr, size);
++
+ 	de->target_offset = e->target_offset - (origsize - *size);
+ 	t = compat_ipt_get_target(e);
+ 	target = t->u.kernel.target;
+ 	xt_compat_target_from_user(t, dstptr, size);
+ 
+ 	de->next_offset = e->next_offset - (origsize - *size);
++
+ 	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ 		if ((unsigned char *)de - base < newinfo->hook_entry[h])
+ 			newinfo->hook_entry[h] -= origsize - *size;
+ 		if ((unsigned char *)de - base < newinfo->underflow[h])
+ 			newinfo->underflow[h] -= origsize - *size;
+ 	}
+-	return ret;
+-}
+-
+-static int
+-compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
+-{
+-	struct xt_entry_match *ematch;
+-	struct xt_mtchk_param mtpar;
+-	unsigned int j;
+-	int ret = 0;
+-
+-	e->counters.pcnt = xt_percpu_counter_alloc();
+-	if (IS_ERR_VALUE(e->counters.pcnt))
+-		return -ENOMEM;
+-
+-	j = 0;
+-	mtpar.net	= net;
+-	mtpar.table     = name;
+-	mtpar.entryinfo = &e->ip;
+-	mtpar.hook_mask = e->comefrom;
+-	mtpar.family    = NFPROTO_IPV4;
+-	xt_ematch_foreach(ematch, e) {
+-		ret = check_match(ematch, &mtpar);
+-		if (ret != 0)
+-			goto cleanup_matches;
+-		++j;
+-	}
+-
+-	ret = check_target(e, net, name);
+-	if (ret)
+-		goto cleanup_matches;
+-	return 0;
+-
+- cleanup_matches:
+-	xt_ematch_foreach(ematch, e) {
+-		if (j-- == 0)
+-			break;
+-		cleanup_match(ematch, net);
+-	}
+-
+-	xt_percpu_counter_free(e->counters.pcnt);
+-
+-	return ret;
+ }
+ 
+ static int
+ translate_compat_table(struct net *net,
+-		       const char *name,
+-		       unsigned int valid_hooks,
+ 		       struct xt_table_info **pinfo,
+ 		       void **pentry0,
+-		       unsigned int total_size,
+-		       unsigned int number,
+-		       unsigned int *hook_entries,
+-		       unsigned int *underflows)
++		       const struct compat_ipt_replace *compatr)
+ {
+ 	unsigned int i, j;
+ 	struct xt_table_info *newinfo, *info;
+ 	void *pos, *entry0, *entry1;
+ 	struct compat_ipt_entry *iter0;
+-	struct ipt_entry *iter1;
++	struct ipt_replace repl;
+ 	unsigned int size;
+ 	int ret;
+ 
+ 	info = *pinfo;
+ 	entry0 = *pentry0;
+-	size = total_size;
+-	info->number = number;
+-
+-	/* Init all hooks to impossible value. */
+-	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+-		info->hook_entry[i] = 0xFFFFFFFF;
+-		info->underflow[i] = 0xFFFFFFFF;
+-	}
++	size = compatr->size;
++	info->number = compatr->num_entries;
+ 
+ 	duprintf("translate_compat_table: size %u\n", info->size);
+ 	j = 0;
+ 	xt_compat_lock(AF_INET);
+-	xt_compat_init_offsets(AF_INET, number);
++	xt_compat_init_offsets(AF_INET, compatr->num_entries);
+ 	/* Walk through entries, checking offsets. */
+-	xt_entry_foreach(iter0, entry0, total_size) {
++	xt_entry_foreach(iter0, entry0, compatr->size) {
+ 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ 							entry0,
+-							entry0 + total_size,
+-							hook_entries,
+-							underflows,
+-							name);
++							entry0 + compatr->size);
+ 		if (ret != 0)
+ 			goto out_unlock;
+ 		++j;
+ 	}
+ 
+ 	ret = -EINVAL;
+-	if (j != number) {
++	if (j != compatr->num_entries) {
+ 		duprintf("translate_compat_table: %u not %u entries\n",
+-			 j, number);
++			 j, compatr->num_entries);
+ 		goto out_unlock;
+ 	}
+ 
+-	/* Check hooks all assigned */
+-	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+-		/* Only hooks which are valid */
+-		if (!(valid_hooks & (1 << i)))
+-			continue;
+-		if (info->hook_entry[i] == 0xFFFFFFFF) {
+-			duprintf("Invalid hook entry %u %u\n",
+-				 i, hook_entries[i]);
+-			goto out_unlock;
+-		}
+-		if (info->underflow[i] == 0xFFFFFFFF) {
+-			duprintf("Invalid underflow %u %u\n",
+-				 i, underflows[i]);
+-			goto out_unlock;
+-		}
+-	}
+-
+ 	ret = -ENOMEM;
+ 	newinfo = xt_alloc_table_info(size);
+ 	if (!newinfo)
+ 		goto out_unlock;
+ 
+-	newinfo->number = number;
++	newinfo->number = compatr->num_entries;
+ 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+-		newinfo->hook_entry[i] = info->hook_entry[i];
+-		newinfo->underflow[i] = info->underflow[i];
++		newinfo->hook_entry[i] = compatr->hook_entry[i];
++		newinfo->underflow[i] = compatr->underflow[i];
+ 	}
+ 	entry1 = newinfo->entries;
+ 	pos = entry1;
+-	size = total_size;
+-	xt_entry_foreach(iter0, entry0, total_size) {
+-		ret = compat_copy_entry_from_user(iter0, &pos, &size,
+-						  name, newinfo, entry1);
+-		if (ret != 0)
+-			break;
+-	}
++	size = compatr->size;
++	xt_entry_foreach(iter0, entry0, compatr->size)
++		compat_copy_entry_from_user(iter0, &pos, &size,
++					    newinfo, entry1);
++
++	/* all module references in entry0 are now gone.
++	 * entry1/newinfo contains a 64bit ruleset that looks exactly as
++	 * generated by 64bit userspace.
++	 *
++	 * Call standard translate_table() to validate all hook_entrys,
++	 * underflows, check for loops, etc.
++	 */
+ 	xt_compat_flush_offsets(AF_INET);
+ 	xt_compat_unlock(AF_INET);
+-	if (ret)
+-		goto free_newinfo;
+ 
+-	ret = -ELOOP;
+-	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+-		goto free_newinfo;
++	memcpy(&repl, compatr, sizeof(*compatr));
+ 
+-	i = 0;
+-	xt_entry_foreach(iter1, entry1, newinfo->size) {
+-		ret = compat_check_entry(iter1, net, name);
+-		if (ret != 0)
+-			break;
+-		++i;
+-		if (strcmp(ipt_get_target(iter1)->u.user.name,
+-		    XT_ERROR_TARGET) == 0)
+-			++newinfo->stacksize;
+-	}
+-	if (ret) {
+-		/*
+-		 * The first i matches need cleanup_entry (calls ->destroy)
+-		 * because they had called ->check already. The other j-i
+-		 * entries need only release.
+-		 */
+-		int skip = i;
+-		j -= i;
+-		xt_entry_foreach(iter0, entry0, newinfo->size) {
+-			if (skip-- > 0)
+-				continue;
+-			if (j-- == 0)
+-				break;
+-			compat_release_entry(iter0);
+-		}
+-		xt_entry_foreach(iter1, entry1, newinfo->size) {
+-			if (i-- == 0)
+-				break;
+-			cleanup_entry(iter1, net);
+-		}
+-		xt_free_table_info(newinfo);
+-		return ret;
++	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
++		repl.hook_entry[i] = newinfo->hook_entry[i];
++		repl.underflow[i] = newinfo->underflow[i];
+ 	}
+ 
++	repl.num_counters = 0;
++	repl.counters = NULL;
++	repl.size = newinfo->size;
++	ret = translate_table(net, newinfo, entry1, &repl);
++	if (ret)
++		goto free_newinfo;
++
+ 	*pinfo = newinfo;
+ 	*pentry0 = entry1;
+ 	xt_free_table_info(info);
+@@ -1785,17 +1643,16 @@ translate_compat_table(struct net *net,
+ 
+ free_newinfo:
+ 	xt_free_table_info(newinfo);
+-out:
+-	xt_entry_foreach(iter0, entry0, total_size) {
++	return ret;
++out_unlock:
++	xt_compat_flush_offsets(AF_INET);
++	xt_compat_unlock(AF_INET);
++	xt_entry_foreach(iter0, entry0, compatr->size) {
+ 		if (j-- == 0)
+ 			break;
+ 		compat_release_entry(iter0);
+ 	}
+ 	return ret;
+-out_unlock:
+-	xt_compat_flush_offsets(AF_INET);
+-	xt_compat_unlock(AF_INET);
+-	goto out;
+ }
+ 
+ static int
+@@ -1831,10 +1688,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
+ 		goto free_newinfo;
+ 	}
+ 
+-	ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
+-				     &newinfo, &loc_cpu_entry, tmp.size,
+-				     tmp.num_entries, tmp.hook_entry,
+-				     tmp.underflow);
++	ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
+ 	if (ret != 0)
+ 		goto free_newinfo;
+ 
+diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
+index 1e1fe6086dd9..03112a3106ab 100644
+--- a/net/ipv4/sysctl_net_ipv4.c
++++ b/net/ipv4/sysctl_net_ipv4.c
+@@ -988,10 +988,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
+ 	if (!net->ipv4.sysctl_local_reserved_ports)
+ 		goto err_ports;
+ 
+-	net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
+-	net->ipv4.sysctl_ip_dynaddr = 0;
+-	net->ipv4.sysctl_ip_early_demux = 1;
+-
+ 	return 0;
+ 
+ err_ports:
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index a2e7f55a1f61..e9853dff7b52 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1616,7 +1616,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ 
+ 		/* if we're overly short, let UDP handle it */
+ 		encap_rcv = ACCESS_ONCE(up->encap_rcv);
+-		if (skb->len > sizeof(struct udphdr) && encap_rcv) {
++		if (encap_rcv) {
+ 			int ret;
+ 
+ 			/* Verify checksum before giving to encap */
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index bc972e7152c7..da88de82b3b8 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1071,17 +1071,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+ 					 const struct in6_addr *final_dst)
+ {
+ 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
+-	int err;
+ 
+ 	dst = ip6_sk_dst_check(sk, dst, fl6);
++	if (!dst)
++		dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
+ 
+-	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
+-	if (err)
+-		return ERR_PTR(err);
+-	if (final_dst)
+-		fl6->daddr = *final_dst;
+-
+-	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
++	return dst;
+ }
+ EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
+ 
+diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
+index 86b67b70b626..9021b435506d 100644
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -455,6 +455,18 @@ ip6t_do_table(struct sk_buff *skb,
+ #endif
+ }
+ 
++static bool find_jump_target(const struct xt_table_info *t,
++			     const struct ip6t_entry *target)
++{
++	struct ip6t_entry *iter;
++
++	xt_entry_foreach(iter, t->entries, t->size) {
++		 if (iter == target)
++			return true;
++	}
++	return false;
++}
++
+ /* Figures out from what hook each rule can be called: returns 0 if
+    there are loops.  Puts hook bitmask in comefrom. */
+ static int
+@@ -532,6 +544,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
+ 				size = e->next_offset;
+ 				e = (struct ip6t_entry *)
+ 					(entry0 + pos + size);
++				if (pos + size >= newinfo->size)
++					return 0;
+ 				e->counters.pcnt = pos;
+ 				pos += size;
+ 			} else {
+@@ -550,9 +564,15 @@ mark_source_chains(const struct xt_table_info *newinfo,
+ 					/* This a jump; chase it. */
+ 					duprintf("Jump rule %u -> %u\n",
+ 						 pos, newpos);
++					e = (struct ip6t_entry *)
++						(entry0 + newpos);
++					if (!find_jump_target(newinfo, e))
++						return 0;
+ 				} else {
+ 					/* ... this is a fallthru */
+ 					newpos = pos + e->next_offset;
++					if (newpos >= newinfo->size)
++						return 0;
+ 				}
+ 				e = (struct ip6t_entry *)
+ 					(entry0 + newpos);
+@@ -579,25 +599,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
+ 	module_put(par.match->me);
+ }
+ 
+-static int
+-check_entry(const struct ip6t_entry *e)
+-{
+-	const struct xt_entry_target *t;
+-
+-	if (!ip6_checkentry(&e->ipv6))
+-		return -EINVAL;
+-
+-	if (e->target_offset + sizeof(struct xt_entry_target) >
+-	    e->next_offset)
+-		return -EINVAL;
+-
+-	t = ip6t_get_target_c(e);
+-	if (e->target_offset + t->u.target_size > e->next_offset)
+-		return -EINVAL;
+-
+-	return 0;
+-}
+-
+ static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
+ {
+ 	const struct ip6t_ip6 *ipv6 = par->entryinfo;
+@@ -762,7 +763,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
+ 		return -EINVAL;
+ 	}
+ 
+-	err = check_entry(e);
++	if (!ip6_checkentry(&e->ipv6))
++		return -EINVAL;
++
++	err = xt_check_entry_offsets(e, e->elems, e->target_offset,
++				     e->next_offset);
+ 	if (err)
+ 		return err;
+ 
+@@ -1321,55 +1326,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
+ 	unsigned int i;
+ 	struct xt_counters_info tmp;
+ 	struct xt_counters *paddc;
+-	unsigned int num_counters;
+-	char *name;
+-	int size;
+-	void *ptmp;
+ 	struct xt_table *t;
+ 	const struct xt_table_info *private;
+ 	int ret = 0;
+ 	struct ip6t_entry *iter;
+ 	unsigned int addend;
+-#ifdef CONFIG_COMPAT
+-	struct compat_xt_counters_info compat_tmp;
+-
+-	if (compat) {
+-		ptmp = &compat_tmp;
+-		size = sizeof(struct compat_xt_counters_info);
+-	} else
+-#endif
+-	{
+-		ptmp = &tmp;
+-		size = sizeof(struct xt_counters_info);
+-	}
+-
+-	if (copy_from_user(ptmp, user, size) != 0)
+-		return -EFAULT;
+-
+-#ifdef CONFIG_COMPAT
+-	if (compat) {
+-		num_counters = compat_tmp.num_counters;
+-		name = compat_tmp.name;
+-	} else
+-#endif
+-	{
+-		num_counters = tmp.num_counters;
+-		name = tmp.name;
+-	}
+-
+-	if (len != size + num_counters * sizeof(struct xt_counters))
+-		return -EINVAL;
+-
+-	paddc = vmalloc(len - size);
+-	if (!paddc)
+-		return -ENOMEM;
+ 
+-	if (copy_from_user(paddc, user + size, len - size) != 0) {
+-		ret = -EFAULT;
+-		goto free;
+-	}
+-
+-	t = xt_find_table_lock(net, AF_INET6, name);
++	paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
++	if (IS_ERR(paddc))
++		return PTR_ERR(paddc);
++	t = xt_find_table_lock(net, AF_INET6, tmp.name);
+ 	if (IS_ERR_OR_NULL(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+ 		goto free;
+@@ -1377,7 +1343,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
+ 
+ 	local_bh_disable();
+ 	private = t->private;
+-	if (private->number != num_counters) {
++	if (private->number != tmp.num_counters) {
+ 		ret = -EINVAL;
+ 		goto unlock_up_free;
+ 	}
+@@ -1456,7 +1422,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
+ 
+ static int
+ compat_find_calc_match(struct xt_entry_match *m,
+-		       const char *name,
+ 		       const struct ip6t_ip6 *ipv6,
+ 		       int *size)
+ {
+@@ -1491,17 +1456,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
+ 				  struct xt_table_info *newinfo,
+ 				  unsigned int *size,
+ 				  const unsigned char *base,
+-				  const unsigned char *limit,
+-				  const unsigned int *hook_entries,
+-				  const unsigned int *underflows,
+-				  const char *name)
++				  const unsigned char *limit)
+ {
+ 	struct xt_entry_match *ematch;
+ 	struct xt_entry_target *t;
+ 	struct xt_target *target;
+ 	unsigned int entry_offset;
+ 	unsigned int j;
+-	int ret, off, h;
++	int ret, off;
+ 
+ 	duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ 	if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
+@@ -1518,8 +1480,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
+ 		return -EINVAL;
+ 	}
+ 
+-	/* For purposes of check_entry casting the compat entry is fine */
+-	ret = check_entry((struct ip6t_entry *)e);
++	if (!ip6_checkentry(&e->ipv6))
++		return -EINVAL;
++
++	ret = xt_compat_check_entry_offsets(e, e->elems,
++					    e->target_offset, e->next_offset);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1527,7 +1492,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
+ 	entry_offset = (void *)e - (void *)base;
+ 	j = 0;
+ 	xt_ematch_foreach(ematch, e) {
+-		ret = compat_find_calc_match(ematch, name, &e->ipv6, &off);
++		ret = compat_find_calc_match(ematch, &e->ipv6, &off);
+ 		if (ret != 0)
+ 			goto release_matches;
+ 		++j;
+@@ -1550,17 +1515,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
+ 	if (ret)
+ 		goto out;
+ 
+-	/* Check hooks & underflows */
+-	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+-		if ((unsigned char *)e - base == hook_entries[h])
+-			newinfo->hook_entry[h] = hook_entries[h];
+-		if ((unsigned char *)e - base == underflows[h])
+-			newinfo->underflow[h] = underflows[h];
+-	}
+-
+-	/* Clear counters and comefrom */
+-	memset(&e->counters, 0, sizeof(e->counters));
+-	e->comefrom = 0;
+ 	return 0;
+ 
+ out:
+@@ -1574,18 +1528,17 @@ release_matches:
+ 	return ret;
+ }
+ 
+-static int
++static void
+ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
+-			    unsigned int *size, const char *name,
++			    unsigned int *size,
+ 			    struct xt_table_info *newinfo, unsigned char *base)
+ {
+ 	struct xt_entry_target *t;
+ 	struct ip6t_entry *de;
+ 	unsigned int origsize;
+-	int ret, h;
++	int h;
+ 	struct xt_entry_match *ematch;
+ 
+-	ret = 0;
+ 	origsize = *size;
+ 	de = (struct ip6t_entry *)*dstptr;
+ 	memcpy(de, e, sizeof(struct ip6t_entry));
+@@ -1594,11 +1547,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
+ 	*dstptr += sizeof(struct ip6t_entry);
+ 	*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
+ 
+-	xt_ematch_foreach(ematch, e) {
+-		ret = xt_compat_match_from_user(ematch, dstptr, size);
+-		if (ret != 0)
+-			return ret;
+-	}
++	xt_ematch_foreach(ematch, e)
++		xt_compat_match_from_user(ematch, dstptr, size);
++
+ 	de->target_offset = e->target_offset - (origsize - *size);
+ 	t = compat_ip6t_get_target(e);
+ 	xt_compat_target_from_user(t, dstptr, size);
+@@ -1610,183 +1561,83 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
+ 		if ((unsigned char *)de - base < newinfo->underflow[h])
+ 			newinfo->underflow[h] -= origsize - *size;
+ 	}
+-	return ret;
+-}
+-
+-static int compat_check_entry(struct ip6t_entry *e, struct net *net,
+-			      const char *name)
+-{
+-	unsigned int j;
+-	int ret = 0;
+-	struct xt_mtchk_param mtpar;
+-	struct xt_entry_match *ematch;
+-
+-	e->counters.pcnt = xt_percpu_counter_alloc();
+-	if (IS_ERR_VALUE(e->counters.pcnt))
+-		return -ENOMEM;
+-	j = 0;
+-	mtpar.net	= net;
+-	mtpar.table     = name;
+-	mtpar.entryinfo = &e->ipv6;
+-	mtpar.hook_mask = e->comefrom;
+-	mtpar.family    = NFPROTO_IPV6;
+-	xt_ematch_foreach(ematch, e) {
+-		ret = check_match(ematch, &mtpar);
+-		if (ret != 0)
+-			goto cleanup_matches;
+-		++j;
+-	}
+-
+-	ret = check_target(e, net, name);
+-	if (ret)
+-		goto cleanup_matches;
+-	return 0;
+-
+- cleanup_matches:
+-	xt_ematch_foreach(ematch, e) {
+-		if (j-- == 0)
+-			break;
+-		cleanup_match(ematch, net);
+-	}
+-
+-	xt_percpu_counter_free(e->counters.pcnt);
+-
+-	return ret;
+ }
+ 
+ static int
+ translate_compat_table(struct net *net,
+-		       const char *name,
+-		       unsigned int valid_hooks,
+ 		       struct xt_table_info **pinfo,
+ 		       void **pentry0,
+-		       unsigned int total_size,
+-		       unsigned int number,
+-		       unsigned int *hook_entries,
+-		       unsigned int *underflows)
++		       const struct compat_ip6t_replace *compatr)
+ {
+ 	unsigned int i, j;
+ 	struct xt_table_info *newinfo, *info;
+ 	void *pos, *entry0, *entry1;
+ 	struct compat_ip6t_entry *iter0;
+-	struct ip6t_entry *iter1;
++	struct ip6t_replace repl;
+ 	unsigned int size;
+ 	int ret = 0;
+ 
+ 	info = *pinfo;
+ 	entry0 = *pentry0;
+-	size = total_size;
+-	info->number = number;
+-
+-	/* Init all hooks to impossible value. */
+-	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+-		info->hook_entry[i] = 0xFFFFFFFF;
+-		info->underflow[i] = 0xFFFFFFFF;
+-	}
++	size = compatr->size;
++	info->number = compatr->num_entries;
+ 
+ 	duprintf("translate_compat_table: size %u\n", info->size);
+ 	j = 0;
+ 	xt_compat_lock(AF_INET6);
+-	xt_compat_init_offsets(AF_INET6, number);
++	xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ 	/* Walk through entries, checking offsets. */
+-	xt_entry_foreach(iter0, entry0, total_size) {
++	xt_entry_foreach(iter0, entry0, compatr->size) {
+ 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ 							entry0,
+-							entry0 + total_size,
+-							hook_entries,
+-							underflows,
+-							name);
++							entry0 + compatr->size);
+ 		if (ret != 0)
+ 			goto out_unlock;
+ 		++j;
+ 	}
+ 
+ 	ret = -EINVAL;
+-	if (j != number) {
++	if (j != compatr->num_entries) {
+ 		duprintf("translate_compat_table: %u not %u entries\n",
+-			 j, number);
++			 j, compatr->num_entries);
+ 		goto out_unlock;
+ 	}
+ 
+-	/* Check hooks all assigned */
+-	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+-		/* Only hooks which are valid */
+-		if (!(valid_hooks & (1 << i)))
+-			continue;
+-		if (info->hook_entry[i] == 0xFFFFFFFF) {
+-			duprintf("Invalid hook entry %u %u\n",
+-				 i, hook_entries[i]);
+-			goto out_unlock;
+-		}
+-		if (info->underflow[i] == 0xFFFFFFFF) {
+-			duprintf("Invalid underflow %u %u\n",
+-				 i, underflows[i]);
+-			goto out_unlock;
+-		}
+-	}
+-
+ 	ret = -ENOMEM;
+ 	newinfo = xt_alloc_table_info(size);
+ 	if (!newinfo)
+ 		goto out_unlock;
+ 
+-	newinfo->number = number;
++	newinfo->number = compatr->num_entries;
+ 	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+-		newinfo->hook_entry[i] = info->hook_entry[i];
+-		newinfo->underflow[i] = info->underflow[i];
++		newinfo->hook_entry[i] = compatr->hook_entry[i];
++		newinfo->underflow[i] = compatr->underflow[i];
+ 	}
+ 	entry1 = newinfo->entries;
+ 	pos = entry1;
+-	size = total_size;
+-	xt_entry_foreach(iter0, entry0, total_size) {
+-		ret = compat_copy_entry_from_user(iter0, &pos, &size,
+-						  name, newinfo, entry1);
+-		if (ret != 0)
+-			break;
+-	}
++	size = compatr->size;
++	xt_entry_foreach(iter0, entry0, compatr->size)
++		compat_copy_entry_from_user(iter0, &pos, &size,
++					    newinfo, entry1);
++
++	/* all module references in entry0 are now gone. */
+ 	xt_compat_flush_offsets(AF_INET6);
+ 	xt_compat_unlock(AF_INET6);
+-	if (ret)
+-		goto free_newinfo;
+ 
+-	ret = -ELOOP;
+-	if (!mark_source_chains(newinfo, valid_hooks, entry1))
+-		goto free_newinfo;
++	memcpy(&repl, compatr, sizeof(*compatr));
+ 
+-	i = 0;
+-	xt_entry_foreach(iter1, entry1, newinfo->size) {
+-		ret = compat_check_entry(iter1, net, name);
+-		if (ret != 0)
+-			break;
+-		++i;
+-		if (strcmp(ip6t_get_target(iter1)->u.user.name,
+-		    XT_ERROR_TARGET) == 0)
+-			++newinfo->stacksize;
+-	}
+-	if (ret) {
+-		/*
+-		 * The first i matches need cleanup_entry (calls ->destroy)
+-		 * because they had called ->check already. The other j-i
+-		 * entries need only release.
+-		 */
+-		int skip = i;
+-		j -= i;
+-		xt_entry_foreach(iter0, entry0, newinfo->size) {
+-			if (skip-- > 0)
+-				continue;
+-			if (j-- == 0)
+-				break;
+-			compat_release_entry(iter0);
+-		}
+-		xt_entry_foreach(iter1, entry1, newinfo->size) {
+-			if (i-- == 0)
+-				break;
+-			cleanup_entry(iter1, net);
+-		}
+-		xt_free_table_info(newinfo);
+-		return ret;
++	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
++		repl.hook_entry[i] = newinfo->hook_entry[i];
++		repl.underflow[i] = newinfo->underflow[i];
+ 	}
+ 
++	repl.num_counters = 0;
++	repl.counters = NULL;
++	repl.size = newinfo->size;
++	ret = translate_table(net, newinfo, entry1, &repl);
++	if (ret)
++		goto free_newinfo;
++
+ 	*pinfo = newinfo;
+ 	*pentry0 = entry1;
+ 	xt_free_table_info(info);
+@@ -1794,17 +1645,16 @@ translate_compat_table(struct net *net,
+ 
+ free_newinfo:
+ 	xt_free_table_info(newinfo);
+-out:
+-	xt_entry_foreach(iter0, entry0, total_size) {
++	return ret;
++out_unlock:
++	xt_compat_flush_offsets(AF_INET6);
++	xt_compat_unlock(AF_INET6);
++	xt_entry_foreach(iter0, entry0, compatr->size) {
+ 		if (j-- == 0)
+ 			break;
+ 		compat_release_entry(iter0);
+ 	}
+ 	return ret;
+-out_unlock:
+-	xt_compat_flush_offsets(AF_INET6);
+-	xt_compat_unlock(AF_INET6);
+-	goto out;
+ }
+ 
+ static int
+@@ -1840,10 +1690,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
+ 		goto free_newinfo;
+ 	}
+ 
+-	ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
+-				     &newinfo, &loc_cpu_entry, tmp.size,
+-				     tmp.num_entries, tmp.hook_entry,
+-				     tmp.underflow);
++	ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
+ 	if (ret != 0)
+ 		goto free_newinfo;
+ 
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index f443c6b0ce16..f6d7516eeb8a 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1717,7 +1717,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
+ 	destp = ntohs(inet->inet_dport);
+ 	srcp  = ntohs(inet->inet_sport);
+ 
+-	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
++	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
++	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
++	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ 		timer_active	= 1;
+ 		timer_expires	= icsk->icsk_timeout;
+ 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 6bc5c664fa46..f96831d9d419 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -653,7 +653,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ 
+ 		/* if we're overly short, let UDP handle it */
+ 		encap_rcv = ACCESS_ONCE(up->encap_rcv);
+-		if (skb->len > sizeof(struct udphdr) && encap_rcv) {
++		if (encap_rcv) {
+ 			int ret;
+ 
+ 			/* Verify checksum before giving to encap */
+diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
+index 6edfa9980314..1e40dacaa137 100644
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
+ 	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+ 	tunnel->encap = encap;
+ 	if (encap == L2TP_ENCAPTYPE_UDP) {
+-		struct udp_tunnel_sock_cfg udp_cfg;
++		struct udp_tunnel_sock_cfg udp_cfg = { };
+ 
+ 		udp_cfg.sk_user_data = tunnel;
+ 		udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
+diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
+index 582c9cfd6567..2675d580c490 100644
+--- a/net/netfilter/x_tables.c
++++ b/net/netfilter/x_tables.c
+@@ -416,6 +416,47 @@ int xt_check_match(struct xt_mtchk_param *par,
+ }
+ EXPORT_SYMBOL_GPL(xt_check_match);
+ 
++/** xt_check_entry_match - check that matches end before start of target
++ *
++ * @match: beginning of xt_entry_match
++ * @target: beginning of this rules target (alleged end of matches)
++ * @alignment: alignment requirement of match structures
++ *
++ * Validates that all matches add up to the beginning of the target,
++ * and that each match covers at least the base structure size.
++ *
++ * Return: 0 on success, negative errno on failure.
++ */
++static int xt_check_entry_match(const char *match, const char *target,
++				const size_t alignment)
++{
++	const struct xt_entry_match *pos;
++	int length = target - match;
++
++	if (length == 0) /* no matches */
++		return 0;
++
++	pos = (struct xt_entry_match *)match;
++	do {
++		if ((unsigned long)pos % alignment)
++			return -EINVAL;
++
++		if (length < (int)sizeof(struct xt_entry_match))
++			return -EINVAL;
++
++		if (pos->u.match_size < sizeof(struct xt_entry_match))
++			return -EINVAL;
++
++		if (pos->u.match_size > length)
++			return -EINVAL;
++
++		length -= pos->u.match_size;
++		pos = ((void *)((char *)(pos) + (pos)->u.match_size));
++	} while (length > 0);
++
++	return 0;
++}
++
+ #ifdef CONFIG_COMPAT
+ int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
+ {
+@@ -485,13 +526,14 @@ int xt_compat_match_offset(const struct xt_match *match)
+ }
+ EXPORT_SYMBOL_GPL(xt_compat_match_offset);
+ 
+-int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+-			      unsigned int *size)
++void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
++			       unsigned int *size)
+ {
+ 	const struct xt_match *match = m->u.kernel.match;
+ 	struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
+ 	int pad, off = xt_compat_match_offset(match);
+ 	u_int16_t msize = cm->u.user.match_size;
++	char name[sizeof(m->u.user.name)];
+ 
+ 	m = *dstptr;
+ 	memcpy(m, cm, sizeof(*cm));
+@@ -505,10 +547,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+ 
+ 	msize += off;
+ 	m->u.user.match_size = msize;
++	strlcpy(name, match->name, sizeof(name));
++	module_put(match->me);
++	strncpy(m->u.user.name, name, sizeof(m->u.user.name));
+ 
+ 	*size += off;
+ 	*dstptr += msize;
+-	return 0;
+ }
+ EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
+ 
+@@ -539,8 +583,125 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
++
++/* non-compat version may have padding after verdict */
++struct compat_xt_standard_target {
++	struct compat_xt_entry_target t;
++	compat_uint_t verdict;
++};
++
++int xt_compat_check_entry_offsets(const void *base, const char *elems,
++				  unsigned int target_offset,
++				  unsigned int next_offset)
++{
++	long size_of_base_struct = elems - (const char *)base;
++	const struct compat_xt_entry_target *t;
++	const char *e = base;
++
++	if (target_offset < size_of_base_struct)
++		return -EINVAL;
++
++	if (target_offset + sizeof(*t) > next_offset)
++		return -EINVAL;
++
++	t = (void *)(e + target_offset);
++	if (t->u.target_size < sizeof(*t))
++		return -EINVAL;
++
++	if (target_offset + t->u.target_size > next_offset)
++		return -EINVAL;
++
++	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
++	    COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
++		return -EINVAL;
++
++	/* compat_xt_entry match has less strict aligment requirements,
++	 * otherwise they are identical.  In case of padding differences
++	 * we need to add compat version of xt_check_entry_match.
++	 */
++	BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
++
++	return xt_check_entry_match(elems, base + target_offset,
++				    __alignof__(struct compat_xt_entry_match));
++}
++EXPORT_SYMBOL(xt_compat_check_entry_offsets);
+ #endif /* CONFIG_COMPAT */
+ 
++/**
++ * xt_check_entry_offsets - validate arp/ip/ip6t_entry
++ *
++ * @base: pointer to arp/ip/ip6t_entry
++ * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
++ * @target_offset: the arp/ip/ip6_t->target_offset
++ * @next_offset: the arp/ip/ip6_t->next_offset
++ *
++ * validates that target_offset and next_offset are sane and that all
++ * match sizes (if any) align with the target offset.
++ *
++ * This function does not validate the targets or matches themselves, it
++ * only tests that all the offsets and sizes are correct, that all
++ * match structures are aligned, and that the last structure ends where
++ * the target structure begins.
++ *
++ * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
++ *
++ * The arp/ip/ip6t_entry structure @base must have passed following tests:
++ * - it must point to a valid memory location
++ * - base to base + next_offset must be accessible, i.e. not exceed allocated
++ *   length.
++ *
++ * A well-formed entry looks like this:
++ *
++ * ip(6)t_entry   match [mtdata]  match [mtdata] target [tgdata] ip(6)t_entry
++ * e->elems[]-----'                              |               |
++ *                matchsize                      |               |
++ *                                matchsize      |               |
++ *                                               |               |
++ * target_offset---------------------------------'               |
++ * next_offset---------------------------------------------------'
++ *
++ * elems[]: flexible array member at end of ip(6)/arpt_entry struct.
++ *          This is where matches (if any) and the target reside.
++ * target_offset: beginning of target.
++ * next_offset: start of the next rule; also: size of this rule.
++ * Since targets have a minimum size, target_offset + minlen <= next_offset.
++ *
++ * Every match stores its size, sum of sizes must not exceed target_offset.
++ *
++ * Return: 0 on success, negative errno on failure.
++ */
++int xt_check_entry_offsets(const void *base,
++			   const char *elems,
++			   unsigned int target_offset,
++			   unsigned int next_offset)
++{
++	long size_of_base_struct = elems - (const char *)base;
++	const struct xt_entry_target *t;
++	const char *e = base;
++
++	/* target start is within the ip/ip6/arpt_entry struct */
++	if (target_offset < size_of_base_struct)
++		return -EINVAL;
++
++	if (target_offset + sizeof(*t) > next_offset)
++		return -EINVAL;
++
++	t = (void *)(e + target_offset);
++	if (t->u.target_size < sizeof(*t))
++		return -EINVAL;
++
++	if (target_offset + t->u.target_size > next_offset)
++		return -EINVAL;
++
++	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
++	    XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset)
++		return -EINVAL;
++
++	return xt_check_entry_match(elems, base + target_offset,
++				    __alignof__(struct xt_entry_match));
++}
++EXPORT_SYMBOL(xt_check_entry_offsets);
++
+ int xt_check_target(struct xt_tgchk_param *par,
+ 		    unsigned int size, u_int8_t proto, bool inv_proto)
+ {
+@@ -591,6 +752,80 @@ int xt_check_target(struct xt_tgchk_param *par,
+ }
+ EXPORT_SYMBOL_GPL(xt_check_target);
+ 
++/**
++ * xt_copy_counters_from_user - copy counters and metadata from userspace
++ *
++ * @user: src pointer to userspace memory
++ * @len: alleged size of userspace memory
++ * @info: where to store the xt_counters_info metadata
++ * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel
++ *
++ * Copies counter meta data from @user and stores it in @info.
++ *
++ * vmallocs memory to hold the counters, then copies the counter data
++ * from @user to the new memory and returns a pointer to it.
++ *
++ * If @compat is true, @info gets converted automatically to the 64bit
++ * representation.
++ *
++ * The metadata associated with the counters is stored in @info.
++ *
++ * Return: returns pointer that caller has to test via IS_ERR().
++ * If IS_ERR is false, caller has to vfree the pointer.
++ */
++void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
++				 struct xt_counters_info *info, bool compat)
++{
++	void *mem;
++	u64 size;
++
++#ifdef CONFIG_COMPAT
++	if (compat) {
++		/* structures only differ in size due to alignment */
++		struct compat_xt_counters_info compat_tmp;
++
++		if (len <= sizeof(compat_tmp))
++			return ERR_PTR(-EINVAL);
++
++		len -= sizeof(compat_tmp);
++		if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
++			return ERR_PTR(-EFAULT);
++
++		strlcpy(info->name, compat_tmp.name, sizeof(info->name));
++		info->num_counters = compat_tmp.num_counters;
++		user += sizeof(compat_tmp);
++	} else
++#endif
++	{
++		if (len <= sizeof(*info))
++			return ERR_PTR(-EINVAL);
++
++		len -= sizeof(*info);
++		if (copy_from_user(info, user, sizeof(*info)) != 0)
++			return ERR_PTR(-EFAULT);
++
++		info->name[sizeof(info->name) - 1] = '\0';
++		user += sizeof(*info);
++	}
++
++	size = sizeof(struct xt_counters);
++	size *= info->num_counters;
++
++	if (size != (u64)len)
++		return ERR_PTR(-EINVAL);
++
++	mem = vmalloc(len);
++	if (!mem)
++		return ERR_PTR(-ENOMEM);
++
++	if (copy_from_user(mem, user, len) == 0)
++		return mem;
++
++	vfree(mem);
++	return ERR_PTR(-EFAULT);
++}
++EXPORT_SYMBOL_GPL(xt_copy_counters_from_user);
++
+ #ifdef CONFIG_COMPAT
+ int xt_compat_target_offset(const struct xt_target *target)
+ {
+@@ -606,6 +841,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+ 	struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
+ 	int pad, off = xt_compat_target_offset(target);
+ 	u_int16_t tsize = ct->u.user.target_size;
++	char name[sizeof(t->u.user.name)];
+ 
+ 	t = *dstptr;
+ 	memcpy(t, ct, sizeof(*ct));
+@@ -619,6 +855,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+ 
+ 	tsize += off;
+ 	t->u.user.target_size = tsize;
++	strlcpy(name, target->name, sizeof(name));
++	module_put(target->me);
++	strncpy(t->u.user.name, name, sizeof(t->u.user.name));
+ 
+ 	*size += off;
+ 	*dstptr += tsize;
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 330ebd600f25..f48e3b3aedd5 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2059,6 +2059,7 @@ static int netlink_dump(struct sock *sk)
+ 	struct netlink_callback *cb;
+ 	struct sk_buff *skb = NULL;
+ 	struct nlmsghdr *nlh;
++	struct module *module;
+ 	int len, err = -ENOBUFS;
+ 	int alloc_min_size;
+ 	int alloc_size;
+@@ -2134,9 +2135,11 @@ static int netlink_dump(struct sock *sk)
+ 		cb->done(cb);
+ 
+ 	nlk->cb_running = false;
++	module = cb->module;
++	skb = cb->skb;
+ 	mutex_unlock(nlk->cb_mutex);
+-	module_put(cb->module);
+-	consume_skb(cb->skb);
++	module_put(module);
++	consume_skb(skb);
+ 	return 0;
+ 
+ errout_skb:
+diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
+index b7e01d88bdc5..59658b2e9cdf 100644
+--- a/net/switchdev/switchdev.c
++++ b/net/switchdev/switchdev.c
+@@ -1188,6 +1188,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
+ 		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
+ 		.dst = dst,
+ 		.dst_len = dst_len,
++		.fi = fi,
+ 		.tos = tos,
+ 		.type = type,
+ 		.nlflags = nlflags,
+@@ -1196,8 +1197,6 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
+ 	struct net_device *dev;
+ 	int err = 0;
+ 
+-	memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
+-
+ 	/* Don't offload route if using custom ip rules or if
+ 	 * IPv4 FIB offloading has been disabled completely.
+ 	 */
+@@ -1242,6 +1241,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
+ 		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
+ 		.dst = dst,
+ 		.dst_len = dst_len,
++		.fi = fi,
+ 		.tos = tos,
+ 		.type = type,
+ 		.nlflags = 0,
+@@ -1250,8 +1250,6 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
+ 	struct net_device *dev;
+ 	int err = 0;
+ 
+-	memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
+-
+ 	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
+ 		return 0;
+ 
+diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
+index d7d050f44fc1..4dfc5c14f8c3 100644
+--- a/net/tipc/netlink_compat.c
++++ b/net/tipc/netlink_compat.c
+@@ -802,7 +802,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
+ 		goto out;
+ 
+ 	tipc_tlv_sprintf(msg->rep, "%-10u %s",
+-			 nla_get_u32(publ[TIPC_NLA_PUBL_REF]),
++			 nla_get_u32(publ[TIPC_NLA_PUBL_KEY]),
+ 			 scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]);
+ out:
+ 	tipc_tlv_sprintf(msg->rep, "\n");
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index 3eeb50a27b89..5f80d3fa9c85 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -2807,6 +2807,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ 		if (err)
+ 			return err;
+ 
++		if (!attrs[TIPC_NLA_SOCK])
++			return -EINVAL;
++
+ 		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
+ 				       attrs[TIPC_NLA_SOCK],
+ 				       tipc_nl_sock_policy);
+diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
+index b50ee5d622e1..c753211cb83f 100644
+--- a/net/wireless/wext-core.c
++++ b/net/wireless/wext-core.c
+@@ -955,8 +955,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
+ 			return private(dev, iwr, cmd, info, handler);
+ 	}
+ 	/* Old driver API : call driver ioctl handler */
+-	if (dev->netdev_ops->ndo_do_ioctl)
+-		return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
++	if (dev->netdev_ops->ndo_do_ioctl) {
++#ifdef CONFIG_COMPAT
++		if (info->flags & IW_REQUEST_FLAG_COMPAT) {
++			int ret = 0;
++			struct iwreq iwr_lcl;
++			struct compat_iw_point *iwp_compat = (void *) &iwr->u.data;
++
++			memcpy(&iwr_lcl, iwr, sizeof(struct iwreq));
++			iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer);
++			iwr_lcl.u.data.length = iwp_compat->length;
++			iwr_lcl.u.data.flags = iwp_compat->flags;
++
++			ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd);
++
++			iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer);
++			iwp_compat->length = iwr_lcl.u.data.length;
++			iwp_compat->flags = iwr_lcl.u.data.flags;
++
++			return ret;
++		} else
++#endif
++			return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
++	}
+ 	return -EOPNOTSUPP;
+ }
+ 
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index 9a0d1445ca5c..94089fc71884 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -365,8 +365,11 @@ enum {
+ 
+ #define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
+ #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
++#define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171)
++#define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
+ #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
++#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
++			IS_KBL(pci) || IS_KBL_LP(pci)
+ 
+ static char *driver_short_names[] = {
+ 	[AZX_DRIVER_ICH] = "HDA Intel",
+@@ -2181,6 +2184,12 @@ static const struct pci_device_id azx_ids[] = {
+ 	/* Sunrise Point-LP */
+ 	{ PCI_DEVICE(0x8086, 0x9d70),
+ 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
++	/* Kabylake */
++	{ PCI_DEVICE(0x8086, 0xa171),
++	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
++	/* Kabylake-LP */
++	{ PCI_DEVICE(0x8086, 0x9d71),
++	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+ 	/* Broxton-P(Apollolake) */
+ 	{ PCI_DEVICE(0x8086, 0x5a98),
+ 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index d53c25e7a1c1..0fe18ede3e85 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -346,6 +346,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
+ 	case 0x10ec0234:
+ 	case 0x10ec0274:
+ 	case 0x10ec0294:
++	case 0x10ec0700:
++	case 0x10ec0701:
++	case 0x10ec0703:
+ 		alc_update_coef_idx(codec, 0x10, 1<<15, 0);
+ 		break;
+ 	case 0x10ec0662:
+@@ -2655,6 +2658,7 @@ enum {
+ 	ALC269_TYPE_ALC256,
+ 	ALC269_TYPE_ALC225,
+ 	ALC269_TYPE_ALC294,
++	ALC269_TYPE_ALC700,
+ };
+ 
+ /*
+@@ -2686,6 +2690,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
+ 	case ALC269_TYPE_ALC256:
+ 	case ALC269_TYPE_ALC225:
+ 	case ALC269_TYPE_ALC294:
++	case ALC269_TYPE_ALC700:
+ 		ssids = alc269_ssids;
+ 		break;
+ 	default:
+@@ -3618,13 +3623,20 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec,
+ static void alc_headset_mode_unplugged(struct hda_codec *codec)
+ {
+ 	static struct coef_fw coef0255[] = {
+-		WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */
+ 		WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */
+ 		UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/
+ 		WRITE_COEF(0x06, 0x6104), /* Set MIC2 Vref gate with HP */
+ 		WRITE_COEFEX(0x57, 0x03, 0x8aa6), /* Direct Drive HP Amp control */
+ 		{}
+ 	};
++	static struct coef_fw coef0255_1[] = {
++		WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */
++		{}
++	};
++	static struct coef_fw coef0256[] = {
++		WRITE_COEF(0x1b, 0x0c4b), /* LDO and MISC control */
++		{}
++	};
+ 	static struct coef_fw coef0233[] = {
+ 		WRITE_COEF(0x1b, 0x0c0b),
+ 		WRITE_COEF(0x45, 0xc429),
+@@ -3677,7 +3689,11 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
+ 
+ 	switch (codec->core.vendor_id) {
+ 	case 0x10ec0255:
++		alc_process_coef_fw(codec, coef0255_1);
++		alc_process_coef_fw(codec, coef0255);
++		break;
+ 	case 0x10ec0256:
++		alc_process_coef_fw(codec, coef0256);
+ 		alc_process_coef_fw(codec, coef0255);
+ 		break;
+ 	case 0x10ec0233:
+@@ -3896,6 +3912,12 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
+ 		WRITE_COEFEX(0x57, 0x03, 0x8ea6),
+ 		{}
+ 	};
++	static struct coef_fw coef0256[] = {
++		WRITE_COEF(0x45, 0xd489), /* Set to CTIA type */
++		WRITE_COEF(0x1b, 0x0c6b),
++		WRITE_COEFEX(0x57, 0x03, 0x8ea6),
++		{}
++	};
+ 	static struct coef_fw coef0233[] = {
+ 		WRITE_COEF(0x45, 0xd429),
+ 		WRITE_COEF(0x1b, 0x0c2b),
+@@ -3936,9 +3958,11 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
+ 
+ 	switch (codec->core.vendor_id) {
+ 	case 0x10ec0255:
+-	case 0x10ec0256:
+ 		alc_process_coef_fw(codec, coef0255);
+ 		break;
++	case 0x10ec0256:
++		alc_process_coef_fw(codec, coef0256);
++		break;
+ 	case 0x10ec0233:
+ 	case 0x10ec0283:
+ 		alc_process_coef_fw(codec, coef0233);
+@@ -3978,6 +4002,12 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
+ 		WRITE_COEFEX(0x57, 0x03, 0x8ea6),
+ 		{}
+ 	};
++	static struct coef_fw coef0256[] = {
++		WRITE_COEF(0x45, 0xe489), /* Set to OMTP Type */
++		WRITE_COEF(0x1b, 0x0c6b),
++		WRITE_COEFEX(0x57, 0x03, 0x8ea6),
++		{}
++	};
+ 	static struct coef_fw coef0233[] = {
+ 		WRITE_COEF(0x45, 0xe429),
+ 		WRITE_COEF(0x1b, 0x0c2b),
+@@ -4018,9 +4048,11 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
+ 
+ 	switch (codec->core.vendor_id) {
+ 	case 0x10ec0255:
+-	case 0x10ec0256:
+ 		alc_process_coef_fw(codec, coef0255);
+ 		break;
++	case 0x10ec0256:
++		alc_process_coef_fw(codec, coef0256);
++		break;
+ 	case 0x10ec0233:
+ 	case 0x10ec0283:
+ 		alc_process_coef_fw(codec, coef0233);
+@@ -4266,7 +4298,7 @@ static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec,
+ static void alc255_set_default_jack_type(struct hda_codec *codec)
+ {
+ 	/* Set to iphone type */
+-	static struct coef_fw fw[] = {
++	static struct coef_fw alc255fw[] = {
+ 		WRITE_COEF(0x1b, 0x880b),
+ 		WRITE_COEF(0x45, 0xd089),
+ 		WRITE_COEF(0x1b, 0x080b),
+@@ -4274,7 +4306,22 @@ static void alc255_set_default_jack_type(struct hda_codec *codec)
+ 		WRITE_COEF(0x1b, 0x0c0b),
+ 		{}
+ 	};
+-	alc_process_coef_fw(codec, fw);
++	static struct coef_fw alc256fw[] = {
++		WRITE_COEF(0x1b, 0x884b),
++		WRITE_COEF(0x45, 0xd089),
++		WRITE_COEF(0x1b, 0x084b),
++		WRITE_COEF(0x46, 0x0004),
++		WRITE_COEF(0x1b, 0x0c4b),
++		{}
++	};
++	switch (codec->core.vendor_id) {
++	case 0x10ec0255:
++		alc_process_coef_fw(codec, alc255fw);
++		break;
++	case 0x10ec0256:
++		alc_process_coef_fw(codec, alc256fw);
++		break;
++	}
+ 	msleep(30);
+ }
+ 
+@@ -5587,6 +5634,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
++	SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
+ 	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+ 	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+ 	SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+@@ -5775,6 +5823,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ 		{0x12, 0x90a60180},
+ 		{0x14, 0x90170130},
+ 		{0x21, 0x02211040}),
++	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell Inspiron 5565", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++		{0x12, 0x90a60180},
++		{0x14, 0x90170120},
++		{0x21, 0x02211030}),
+ 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ 		{0x12, 0x90a60160},
+ 		{0x14, 0x90170120},
+@@ -6053,6 +6105,14 @@ static int patch_alc269(struct hda_codec *codec)
+ 	case 0x10ec0294:
+ 		spec->codec_variant = ALC269_TYPE_ALC294;
+ 		break;
++	case 0x10ec0700:
++	case 0x10ec0701:
++	case 0x10ec0703:
++		spec->codec_variant = ALC269_TYPE_ALC700;
++		spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */
++		alc_update_coef_idx(codec, 0x4a, 0, 1 << 15); /* Combo jack auto trigger control */
++		break;
++
+ 	}
+ 
+ 	if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
+@@ -7008,6 +7068,9 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
+ 	HDA_CODEC_ENTRY(0x10ec0670, "ALC670", patch_alc662),
+ 	HDA_CODEC_ENTRY(0x10ec0671, "ALC671", patch_alc662),
+ 	HDA_CODEC_ENTRY(0x10ec0680, "ALC680", patch_alc680),
++	HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269),
++	HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269),
++	HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269),
+ 	HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc882),
+ 	HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880),
+ 	HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882),
+diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
+index 674bdf8ecf4f..501849ad0b60 100644
+--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
++++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
+@@ -93,12 +93,11 @@ static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
+ 		if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
+ 			continue;
+ 
+-		if (cpu_if->vgic_elrsr & (1UL << i)) {
++		if (cpu_if->vgic_elrsr & (1UL << i))
+ 			cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
+-			continue;
+-		}
++		else
++			cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
+ 
+-		cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
+ 		writel_relaxed(0, base + GICH_LR0 + (i * 4));
+ 	}
+ }
+diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
+index fe84e1a95dd5..8db197bb6c7a 100644
+--- a/virt/kvm/irqchip.c
++++ b/virt/kvm/irqchip.c
+@@ -40,7 +40,7 @@ int kvm_irq_map_gsi(struct kvm *kvm,
+ 
+ 	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+ 					lockdep_is_held(&kvm->irq_lock));
+-	if (gsi < irq_rt->nr_rt_entries) {
++	if (irq_rt && gsi < irq_rt->nr_rt_entries) {
+ 		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+ 			entries[n] = *e;
+ 			++n;


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-07-01  0:31 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-07-01  0:31 UTC (permalink / raw
  To: gentoo-commits

commit:     fd28e3ad763ea79a262ca9d96d6d812817ec60fe
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Fri Jul  1 00:31:10 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Fri Jul  1 00:31:10 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=fd28e3ad

Update gcc 4.9+ optimization patch. Bug #587578

 ...-additional-cpu-optimizations-for-gcc-4.9.patch | 90 ++++++++++++++--------
 1 file changed, 57 insertions(+), 33 deletions(-)

diff --git a/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
index 418201d..d9729b2 100644
--- a/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
+++ b/5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
@@ -21,11 +21,12 @@ bug report to see if I'm right: https://bugzilla.kernel.org/show_bug.cgi?id=7746
 This patch will expand the number of microarchitectures to include newer
 processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
 14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
-Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 1.5 Gen Core
-i3/i5/i7 (Westmere), Intel 2nd Gen Core i3/i5/i7 (Sandybridge), Intel 3rd Gen
-Core i3/i5/i7 (Ivybridge), Intel 4th Gen Core i3/i5/i7 (Haswell), Intel 5th
-Gen Core i3/i5/i7 (Broadwell), and the low power Silvermont series of Atom
-processors (Silvermont). It also offers the compiler the 'native' flag.
+Family 15h (Steamroller), Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7
+(Nehalem), Intel 1.5 Gen Core i3/i5/i7 (Westmere), Intel 2nd Gen Core i3/i5/i7
+(Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), Intel 4th Gen Core
+i3/i5/i7 (Haswell), Intel 5th Gen Core i3/i5/i7 (Broadwell), and the low power
+Silvermont series of Atom processors (Silvermont). It also offers the compiler
+the 'native' flag.
 
 Small but real speed increases are measurable using a make endpoint comparing
 a generic kernel to one built with one of the respective microarchs.
@@ -37,9 +38,9 @@ REQUIREMENTS
 linux version >=3.15
 gcc version >=4.9
 
---- a/arch/x86/include/asm/module.h	2014-06-16 16:44:27.000000000 -0400
-+++ b/arch/x86/include/asm/module.h	2015-03-07 03:27:32.556672424 -0500
-@@ -15,6 +15,22 @@
+--- a/arch/x86/include/asm/module.h	2015-08-30 14:34:09.000000000 -0400
++++ b/arch/x86/include/asm/module.h	2015-11-06 14:18:24.234941036 -0500
+@@ -15,6 +15,24 @@
  #define MODULE_PROC_FAMILY "586MMX "
  #elif defined CONFIG_MCORE2
  #define MODULE_PROC_FAMILY "CORE2 "
@@ -59,10 +60,12 @@ gcc version >=4.9
 +#define MODULE_PROC_FAMILY "HASWELL "
 +#elif defined CONFIG_MBROADWELL
 +#define MODULE_PROC_FAMILY "BROADWELL "
++#elif defined CONFIG_MSKYLAKE
++#define MODULE_PROC_FAMILY "SKYLAKE "
  #elif defined CONFIG_MATOM
  #define MODULE_PROC_FAMILY "ATOM "
  #elif defined CONFIG_M686
-@@ -33,6 +49,20 @@
+@@ -33,6 +51,22 @@
  #define MODULE_PROC_FAMILY "K7 "
  #elif defined CONFIG_MK8
  #define MODULE_PROC_FAMILY "K8 "
@@ -77,14 +80,16 @@ gcc version >=4.9
 +#elif defined CONFIG_MBULLDOZER
 +#define MODULE_PROC_FAMILY "BULLDOZER "
 +#elif defined CONFIG_MPILEDRIVER
++#define MODULE_PROC_FAMILY "STEAMROLLER "
++#elif defined CONFIG_MSTEAMROLLER
 +#define MODULE_PROC_FAMILY "PILEDRIVER "
 +#elif defined CONFIG_MJAGUAR
 +#define MODULE_PROC_FAMILY "JAGUAR "
  #elif defined CONFIG_MELAN
  #define MODULE_PROC_FAMILY "ELAN "
  #elif defined CONFIG_MCRUSOE
---- a/arch/x86/Kconfig.cpu	2014-06-16 16:44:27.000000000 -0400
-+++ b/arch/x86/Kconfig.cpu	2015-03-07 03:32:14.337713226 -0500
+--- a/arch/x86/Kconfig.cpu	2015-08-30 14:34:09.000000000 -0400
++++ b/arch/x86/Kconfig.cpu	2015-11-06 14:20:14.948369244 -0500
 @@ -137,9 +137,8 @@ config MPENTIUM4
  		-Paxville
  		-Dempsey
@@ -105,7 +110,7 @@ gcc version >=4.9
  	depends on X86_32
  	---help---
  	  Select this for an AMD Athlon K7-family processor.  Enables use of
-@@ -155,12 +154,62 @@ config MK7
+@@ -155,12 +154,69 @@ config MK7
  	  flags to GCC.
  
  config MK8
@@ -159,6 +164,13 @@ gcc version >=4.9
 +
 +	  Enables -march=bdver2
 +
++config MSTEAMROLLER
++	bool "AMD Steamroller"
++	---help---
++	  Select this for AMD Steamroller processors.
++
++	  Enables -march=bdver3
++
 +config MJAGUAR
 +	bool "AMD Jaguar"
 +	---help---
@@ -169,7 +181,7 @@ gcc version >=4.9
  config MCRUSOE
  	bool "Crusoe"
  	depends on X86_32
-@@ -251,8 +300,17 @@ config MPSC
+@@ -251,8 +307,17 @@ config MPSC
  	  using the cpu family field
  	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
  
@@ -188,7 +200,7 @@ gcc version >=4.9
  	---help---
  
  	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
-@@ -260,14 +318,63 @@ config MCORE2
+@@ -260,14 +325,71 @@ config MCORE2
  	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
  	  (not a typo)
  
@@ -255,10 +267,18 @@ gcc version >=4.9
 +	  Select this for 5th Gen Core processors in the Broadwell family.
 +
 +	  Enables -march=broadwell
++
++config MSKYLAKE
++	bool "Intel Skylake"
++	---help---
++
++	  Select this for 6th Gen Core processors in the Skylake family.
++
++	  Enables -march=skylake
  
  config GENERIC_CPU
  	bool "Generic-x86-64"
-@@ -276,6 +383,19 @@ config GENERIC_CPU
+@@ -276,6 +398,19 @@ config GENERIC_CPU
  	  Generic x86-64 CPU.
  	  Run equally well on all x86-64 CPUs.
  
@@ -278,54 +298,54 @@ gcc version >=4.9
  endchoice
  
  config X86_GENERIC
-@@ -300,7 +420,7 @@ config X86_INTERNODE_CACHE_SHIFT
+@@ -300,7 +435,7 @@ config X86_INTERNODE_CACHE_SHIFT
  config X86_L1_CACHE_SHIFT
  	int
  	default "7" if MPENTIUM4 || MPSC
 -	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-+	default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
++	default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
  	default "4" if MELAN || M486 || MGEODEGX1
  	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
  
-@@ -331,11 +451,11 @@ config X86_ALIGNMENT_16
+@@ -331,11 +466,11 @@ config X86_ALIGNMENT_16
  
  config X86_INTEL_USERCOPY
  	def_bool y
 -	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
-+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE
++	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE
  
  config X86_USE_PPRO_CHECKSUM
  	def_bool y
 -	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
-+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MATOM || MNATIVE
++	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MATOM || MNATIVE
  
  config X86_USE_3DNOW
  	def_bool y
-@@ -359,17 +479,17 @@ config X86_P6_NOP
+@@ -359,17 +494,17 @@ config X86_P6_NOP
  
  config X86_TSC
  	def_bool y
 -	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
-+	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MNATIVE || MATOM) || X86_64
++	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MNATIVE || MATOM) || X86_64
  
  config X86_CMPXCHG64
  	def_bool y
 -	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
-+	depends on X86_PAE || X86_64 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
++	depends on X86_PAE || X86_64 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
  
  # this should be set for all -march=.. options where the compiler
  # generates cmov.
  config X86_CMOV
  	def_bool y
 -	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
-+	depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
++	depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
  
  config X86_MINIMUM_CPU_FAMILY
  	int
---- a/arch/x86/Makefile	2014-06-16 16:44:27.000000000 -0400
-+++ b/arch/x86/Makefile	2015-03-07 03:33:27.650843211 -0500
-@@ -92,13 +92,35 @@ else
- 	KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3)
+--- a/arch/x86/Makefile	2015-08-30 14:34:09.000000000 -0400
++++ b/arch/x86/Makefile	2015-11-06 14:21:05.708983344 -0500
+@@ -94,13 +94,38 @@ else
+ 	KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
  
          # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
 +        cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
@@ -336,6 +356,7 @@ gcc version >=4.9
 +        cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
 +        cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
 +        cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
++        cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3)
 +        cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
          cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
  
@@ -358,14 +379,16 @@ gcc version >=4.9
 +                $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell))
 +        cflags-$(CONFIG_MBROADWELL) += \
 +                $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell))
++        cflags-$(CONFIG_MSKYLAKE) += \
++                $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake))
 +        cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \
 +                $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
          cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
          KBUILD_CFLAGS += $(cflags-y)
  
---- a/arch/x86/Makefile_32.cpu	2014-06-16 16:44:27.000000000 -0400
-+++ b/arch/x86/Makefile_32.cpu	2015-03-07 03:34:15.203586024 -0500
-@@ -23,7 +23,15 @@ cflags-$(CONFIG_MK6)		+= -march=k6
+--- a/arch/x86/Makefile_32.cpu	2015-08-30 14:34:09.000000000 -0400
++++ b/arch/x86/Makefile_32.cpu	2015-11-06 14:21:43.604429077 -0500
+@@ -23,7 +23,16 @@ cflags-$(CONFIG_MK6)		+= -march=k6
  # Please note, that patches that add -march=athlon-xp and friends are pointless.
  # They make zero difference whatsosever to performance at this time.
  cflags-$(CONFIG_MK7)		+= -march=athlon
@@ -377,11 +400,12 @@ gcc version >=4.9
 +cflags-$(CONFIG_MBOBCAT)	+= $(call cc-option,-march=btver1,-march=athlon)
 +cflags-$(CONFIG_MBULLDOZER)	+= $(call cc-option,-march=bdver1,-march=athlon)
 +cflags-$(CONFIG_MPILEDRIVER)	+= $(call cc-option,-march=bdver2,-march=athlon)
++cflags-$(CONFIG_MSTEAMROLLER)	+= $(call cc-option,-march=bdver3,-march=athlon)
 +cflags-$(CONFIG_MJAGUAR)	+= $(call cc-option,-march=btver2,-march=athlon)
  cflags-$(CONFIG_MCRUSOE)	+= -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
  cflags-$(CONFIG_MEFFICEON)	+= -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
  cflags-$(CONFIG_MWINCHIPC6)	+= $(call cc-option,-march=winchip-c6,-march=i586)
-@@ -32,8 +40,15 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-
+@@ -32,8 +41,16 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-
  cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
  cflags-$(CONFIG_MVIAC7)		+= -march=i686
  cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
@@ -394,9 +418,9 @@ gcc version >=4.9
 +cflags-$(CONFIG_MIVYBRIDGE)	+= -march=i686 $(call tune,ivybridge)
 +cflags-$(CONFIG_MHASWELL)	+= -march=i686 $(call tune,haswell)
 +cflags-$(CONFIG_MBROADWELL)	+= -march=i686 $(call tune,broadwell)
++cflags-$(CONFIG_MSKYLAKE)	+= -march=i686 $(call tune,skylake)
 +cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \
 +	$(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic))
  
  # AMD Elan support
  cflags-$(CONFIG_MELAN)		+= -march=i486
-


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-07-02 15:27 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-07-02 15:27 UTC (permalink / raw
  To: gentoo-commits

commit:     e85cf895cefab29ae83391390c84229cb7830c88
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sat Jul  2 15:27:20 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sat Jul  2 15:27:20 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=e85cf895

Select SYSVIPC when GENTOO_LINUX_PORTAGE is selected. Dependency of IPC_NS. See bug #587736.

 4567_distro-Gentoo-Kconfig.patch | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/4567_distro-Gentoo-Kconfig.patch b/4567_distro-Gentoo-Kconfig.patch
index c7af596..499b21f 100644
--- a/4567_distro-Gentoo-Kconfig.patch
+++ b/4567_distro-Gentoo-Kconfig.patch
@@ -1,5 +1,5 @@
---- a/Kconfig
-+++ b/Kconfig
+--- a/Kconfig	2016-07-01 19:22:17.117439707 -0400
++++ b/Kconfig	2016-07-01 19:21:54.371440596 -0400
 @@ -8,4 +8,6 @@ config SRCARCH
  	string
  	option env="SRCARCH"
@@ -7,9 +7,9 @@
 +source "distro/Kconfig"
 +
  source "arch/$SRCARCH/Kconfig"
---- /dev/null
-+++ b/distro/Kconfig
-@@ -0,0 +1,131 @@
+--- /dev/null	2016-07-01 11:23:26.087932647 -0400
++++ b/distro/Kconfig	2016-07-01 19:32:35.581415519 -0400
+@@ -0,0 +1,134 @@
 +menu "Gentoo Linux"
 +
 +config GENTOO_LINUX
@@ -63,6 +63,7 @@
 +	select NAMESPACES
 +	select IPC_NS
 +	select NET_NS
++	select SYSVIPC
 +
 +	help
 +		This enables options required by various Portage FEATURES.
@@ -71,6 +72,8 @@
 +		CGROUPS     (required for FEATURES=cgroup)
 +		IPC_NS      (required for FEATURES=ipc-sandbox)
 +		NET_NS      (required for FEATURES=network-sandbox)
++		SYSVIPC     (required by IPC_NS)
++   
 +
 +		It is highly recommended that you leave this enabled as these FEATURES
 +		are, or will soon be, enabled by default.


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-07-11 19:57 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-07-11 19:57 UTC (permalink / raw
  To: gentoo-commits

commit:     04d89d0f4edaf324768e6d860f4ec6546d043725
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Mon Jul 11 19:56:55 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Mon Jul 11 19:57:24 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=04d89d0f

Linux patch 4.6.4

 0000_README            |    4 +
 1003_linux-4.6.4.patch | 1008 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1012 insertions(+)

diff --git a/0000_README b/0000_README
index 8feba45..5c841a9 100644
--- a/0000_README
+++ b/0000_README
@@ -55,6 +55,10 @@ Patch:  1002_linux-4.6.3.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.6.3
 
+Patch:  1003_linux-4.6.4.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.6.4
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1003_linux-4.6.4.patch b/1003_linux-4.6.4.patch
new file mode 100644
index 0000000..5c5a062
--- /dev/null
+++ b/1003_linux-4.6.4.patch
@@ -0,0 +1,1008 @@
+diff --git a/Makefile b/Makefile
+index c62b531d5a85..cd374426114a 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 6
+-SUBLEVEL = 3
++SUBLEVEL = 4
+ EXTRAVERSION =
+ NAME = Charred Weasel
+ 
+diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
+index 43fe85f20d57..7097a3395b25 100644
+--- a/crypto/crypto_user.c
++++ b/crypto/crypto_user.c
+@@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
+ 	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+ 	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+ 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
++	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+ 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
+ };
+ 
+diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
+index 574e87c7f2b8..9acccad26928 100644
+--- a/drivers/crypto/ux500/hash/hash_core.c
++++ b/drivers/crypto/ux500/hash/hash_core.c
+@@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data,
+ 						&device_data->state);
+ 				memmove(req_ctx->state.buffer,
+ 					device_data->state.buffer,
+-					HASH_BLOCK_SIZE / sizeof(u32));
++					HASH_BLOCK_SIZE);
+ 				if (ret) {
+ 					dev_err(device_data->dev,
+ 						"%s: hash_resume_state() failed!\n",
+@@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data,
+ 
+ 			memmove(device_data->state.buffer,
+ 				req_ctx->state.buffer,
+-				HASH_BLOCK_SIZE / sizeof(u32));
++				HASH_BLOCK_SIZE);
+ 			if (ret) {
+ 				dev_err(device_data->dev, "%s: hash_save_state() failed!\n",
+ 					__func__);
+diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
+index 495577b6d31b..94ad5c0adbcb 100644
+--- a/drivers/crypto/vmx/aes_cbc.c
++++ b/drivers/crypto/vmx/aes_cbc.c
+@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
+ 	.cra_name = "cbc(aes)",
+ 	.cra_driver_name = "p8_aes_cbc",
+ 	.cra_module = THIS_MODULE,
+-	.cra_priority = 1000,
++	.cra_priority = 2000,
+ 	.cra_type = &crypto_blkcipher_type,
+ 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
+ 	.cra_alignmask = 0,
+diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
+index 0a3c1b04cf3c..38ed10d761d0 100644
+--- a/drivers/crypto/vmx/aes_ctr.c
++++ b/drivers/crypto/vmx/aes_ctr.c
+@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
+ 	.cra_name = "ctr(aes)",
+ 	.cra_driver_name = "p8_aes_ctr",
+ 	.cra_module = THIS_MODULE,
+-	.cra_priority = 1000,
++	.cra_priority = 2000,
+ 	.cra_type = &crypto_blkcipher_type,
+ 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
+ 	.cra_alignmask = 0,
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index 6dc810bce295..944a6dca0fcb 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = {
+ 	/* Creative SB Audigy 2 NX */
+ 	{ USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },
+ 
++	/* USB3503 */
++	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
++
+ 	/* Microsoft Wireless Laser Mouse 6000 Receiver */
+ 	{ USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME },
+ 
+@@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = {
+ 	/* MAYA44USB sound device */
+ 	{ USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME },
+ 
++	/* ASUS Base Station(T100) */
++	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
++			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
++
+ 	/* Action Semiconductor flash disk */
+ 	{ USB_DEVICE(0x10d6, 0x2200), .driver_info =
+ 			USB_QUIRK_STRING_FETCH_255 },
+@@ -188,26 +195,22 @@ static const struct usb_device_id usb_quirk_list[] = {
+ 	{ USB_DEVICE(0x1908, 0x1315), .driver_info =
+ 			USB_QUIRK_HONOR_BNUMINTERFACES },
+ 
+-	/* INTEL VALUE SSD */
+-	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
+-
+-	/* USB3503 */
+-	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
+-
+-	/* ASUS Base Station(T100) */
+-	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+-			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+-
+ 	/* Protocol and OTG Electrical Test Device */
+ 	{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
+ 			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
+ 
++	/* Acer C120 LED Projector */
++	{ USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM },
++
+ 	/* Blackmagic Design Intensity Shuttle */
+ 	{ USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },
+ 
+ 	/* Blackmagic Design UltraStudio SDI */
+ 	{ USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM },
+ 
++	/* INTEL VALUE SSD */
++	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
++
+ 	{ }  /* terminating entry must be last */
+ };
+ 
+diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
+index dd5cb5577dca..2f1fb7e7aa54 100644
+--- a/drivers/usb/dwc3/dwc3-exynos.c
++++ b/drivers/usb/dwc3/dwc3-exynos.c
+@@ -128,12 +128,6 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
+ 
+ 	platform_set_drvdata(pdev, exynos);
+ 
+-	ret = dwc3_exynos_register_phys(exynos);
+-	if (ret) {
+-		dev_err(dev, "couldn't register PHYs\n");
+-		return ret;
+-	}
+-
+ 	exynos->dev	= dev;
+ 
+ 	exynos->clk = devm_clk_get(dev, "usbdrd30");
+@@ -183,20 +177,29 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
+ 		goto err3;
+ 	}
+ 
++	ret = dwc3_exynos_register_phys(exynos);
++	if (ret) {
++		dev_err(dev, "couldn't register PHYs\n");
++		goto err4;
++	}
++
+ 	if (node) {
+ 		ret = of_platform_populate(node, NULL, NULL, dev);
+ 		if (ret) {
+ 			dev_err(dev, "failed to add dwc3 core\n");
+-			goto err4;
++			goto err5;
+ 		}
+ 	} else {
+ 		dev_err(dev, "no device node, failed to add dwc3 core\n");
+ 		ret = -ENODEV;
+-		goto err4;
++		goto err5;
+ 	}
+ 
+ 	return 0;
+ 
++err5:
++	platform_device_unregister(exynos->usb2_phy);
++	platform_device_unregister(exynos->usb3_phy);
+ err4:
+ 	regulator_disable(exynos->vdd10);
+ err3:
+diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
+index e64479f882a5..aa3707bdebb4 100644
+--- a/drivers/usb/gadget/legacy/inode.c
++++ b/drivers/usb/gadget/legacy/inode.c
+@@ -938,8 +938,11 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
+ 			struct usb_ep		*ep = dev->gadget->ep0;
+ 			struct usb_request	*req = dev->req;
+ 
+-			if ((retval = setup_req (ep, req, 0)) == 0)
+-				retval = usb_ep_queue (ep, req, GFP_ATOMIC);
++			if ((retval = setup_req (ep, req, 0)) == 0) {
++				spin_unlock_irq (&dev->lock);
++				retval = usb_ep_queue (ep, req, GFP_KERNEL);
++				spin_lock_irq (&dev->lock);
++			}
+ 			dev->state = STATE_DEV_CONNECTED;
+ 
+ 			/* assume that was SET_CONFIGURATION */
+@@ -1457,8 +1460,11 @@ delegate:
+ 							w_length);
+ 				if (value < 0)
+ 					break;
++
++				spin_unlock (&dev->lock);
+ 				value = usb_ep_queue (gadget->ep0, dev->req,
+-							GFP_ATOMIC);
++							GFP_KERNEL);
++				spin_lock (&dev->lock);
+ 				if (value < 0) {
+ 					clean_req (gadget->ep0, dev->req);
+ 					break;
+@@ -1481,11 +1487,14 @@ delegate:
+ 	if (value >= 0 && dev->state != STATE_DEV_SETUP) {
+ 		req->length = value;
+ 		req->zero = value < w_length;
+-		value = usb_ep_queue (gadget->ep0, req, GFP_ATOMIC);
++
++		spin_unlock (&dev->lock);
++		value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL);
+ 		if (value < 0) {
+ 			DBG (dev, "ep_queue --> %d\n", value);
+ 			req->status = 0;
+ 		}
++		return value;
+ 	}
+ 
+ 	/* device stalls when value < 0 */
+diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
+index 4031b372008e..c1c1024a054c 100644
+--- a/drivers/usb/host/ehci-tegra.c
++++ b/drivers/usb/host/ehci-tegra.c
+@@ -89,7 +89,7 @@ static int tegra_reset_usb_controller(struct platform_device *pdev)
+ 	if (!usb1_reset_attempted) {
+ 		struct reset_control *usb1_reset;
+ 
+-		usb1_reset = of_reset_control_get(phy_np, "usb");
++		usb1_reset = of_reset_control_get(phy_np, "utmi-pads");
+ 		if (IS_ERR(usb1_reset)) {
+ 			dev_warn(&pdev->dev,
+ 				 "can't get utmi-pads reset from the PHY\n");
+diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
+index 48672fac7ff3..c10972fcc8e4 100644
+--- a/drivers/usb/host/xhci-pci.c
++++ b/drivers/usb/host/xhci-pci.c
+@@ -37,6 +37,7 @@
+ /* Device for a quirk */
+ #define PCI_VENDOR_ID_FRESCO_LOGIC	0x1b73
+ #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK	0x1000
++#define PCI_DEVICE_ID_FRESCO_LOGIC_FL1009	0x1009
+ #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1400	0x1400
+ 
+ #define PCI_VENDOR_ID_ETRON		0x1b6f
+@@ -114,6 +115,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+ 	}
+ 
++	if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
++			pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009)
++		xhci->quirks |= XHCI_BROKEN_STREAMS;
++
+ 	if (pdev->vendor == PCI_VENDOR_ID_NEC)
+ 		xhci->quirks |= XHCI_NEC_HOST;
+ 
+diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
+index 474b5fa14900..d6e2b2751e50 100644
+--- a/drivers/usb/host/xhci-plat.c
++++ b/drivers/usb/host/xhci-plat.c
+@@ -194,6 +194,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
+ 		ret = clk_prepare_enable(clk);
+ 		if (ret)
+ 			goto put_hcd;
++	} else if (PTR_ERR(clk) == -EPROBE_DEFER) {
++		ret = -EPROBE_DEFER;
++		goto put_hcd;
+ 	}
+ 
+ 	xhci = hcd_to_xhci(hcd);
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 99b4ff42f7a0..8b5b2aca277d 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -290,6 +290,14 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
+ 
+ 	temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
+ 	xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
++
++	/*
++	 * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
++	 * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
++	 * but the completion event in never sent. Use the cmd timeout timer to
++	 * handle those cases. Use twice the time to cover the bit polling retry
++	 */
++	mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT));
+ 	xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
+ 			&xhci->op_regs->cmd_ring);
+ 
+@@ -314,6 +322,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
+ 
+ 		xhci_err(xhci, "Stopped the command ring failed, "
+ 				"maybe the host is dead\n");
++		del_timer(&xhci->cmd_timer);
+ 		xhci->xhc_state |= XHCI_STATE_DYING;
+ 		xhci_quiesce(xhci);
+ 		xhci_halt(xhci);
+@@ -1253,22 +1262,21 @@ void xhci_handle_command_timeout(unsigned long data)
+ 	int ret;
+ 	unsigned long flags;
+ 	u64 hw_ring_state;
+-	struct xhci_command *cur_cmd = NULL;
++	bool second_timeout = false;
+ 	xhci = (struct xhci_hcd *) data;
+ 
+ 	/* mark this command to be cancelled */
+ 	spin_lock_irqsave(&xhci->lock, flags);
+ 	if (xhci->current_cmd) {
+-		cur_cmd = xhci->current_cmd;
+-		cur_cmd->status = COMP_CMD_ABORT;
++		if (xhci->current_cmd->status == COMP_CMD_ABORT)
++			second_timeout = true;
++		xhci->current_cmd->status = COMP_CMD_ABORT;
+ 	}
+ 
+-
+ 	/* Make sure command ring is running before aborting it */
+ 	hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
+ 	if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
+ 	    (hw_ring_state & CMD_RING_RUNNING))  {
+-
+ 		spin_unlock_irqrestore(&xhci->lock, flags);
+ 		xhci_dbg(xhci, "Command timeout\n");
+ 		ret = xhci_abort_cmd_ring(xhci);
+@@ -1280,6 +1288,15 @@ void xhci_handle_command_timeout(unsigned long data)
+ 		}
+ 		return;
+ 	}
++
++	/* command ring failed to restart, or host removed. Bail out */
++	if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
++		spin_unlock_irqrestore(&xhci->lock, flags);
++		xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
++		xhci_cleanup_command_queue(xhci);
++		return;
++	}
++
+ 	/* command timeout on stopped ring, ring can't be aborted */
+ 	xhci_dbg(xhci, "Command timeout on stopped ring\n");
+ 	xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd);
+@@ -2728,7 +2745,8 @@ hw_died:
+ 		writel(irq_pending, &xhci->ir_set->irq_pending);
+ 	}
+ 
+-	if (xhci->xhc_state & XHCI_STATE_DYING) {
++	if (xhci->xhc_state & XHCI_STATE_DYING ||
++	    xhci->xhc_state & XHCI_STATE_HALTED) {
+ 		xhci_dbg(xhci, "xHCI dying, ignoring interrupt. "
+ 				"Shouldn't IRQs be disabled?\n");
+ 		/* Clear the event handler busy flag (RW1C);
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
+index 9e71c96ad74a..327280535848 100644
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -685,20 +685,23 @@ void xhci_stop(struct usb_hcd *hcd)
+ 	u32 temp;
+ 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ 
+-	if (xhci->xhc_state & XHCI_STATE_HALTED)
+-		return;
+-
+ 	mutex_lock(&xhci->mutex);
+-	spin_lock_irq(&xhci->lock);
+-	xhci->xhc_state |= XHCI_STATE_HALTED;
+-	xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
+ 
+-	/* Make sure the xHC is halted for a USB3 roothub
+-	 * (xhci_stop() could be called as part of failed init).
+-	 */
+-	xhci_halt(xhci);
+-	xhci_reset(xhci);
+-	spin_unlock_irq(&xhci->lock);
++	if (!(xhci->xhc_state & XHCI_STATE_HALTED)) {
++		spin_lock_irq(&xhci->lock);
++
++		xhci->xhc_state |= XHCI_STATE_HALTED;
++		xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
++		xhci_halt(xhci);
++		xhci_reset(xhci);
++
++		spin_unlock_irq(&xhci->lock);
++	}
++
++	if (!usb_hcd_is_primary_hcd(hcd)) {
++		mutex_unlock(&xhci->mutex);
++		return;
++	}
+ 
+ 	xhci_cleanup_msix(xhci);
+ 
+diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
+index 39fd95833eb8..c84f4d0816e5 100644
+--- a/drivers/usb/musb/musb_core.c
++++ b/drivers/usb/musb/musb_core.c
+@@ -2429,7 +2429,8 @@ static void musb_restore_context(struct musb *musb)
+ 	musb_writew(musb_base, MUSB_INTRTXE, musb->intrtxe);
+ 	musb_writew(musb_base, MUSB_INTRRXE, musb->intrrxe);
+ 	musb_writeb(musb_base, MUSB_INTRUSBE, musb->context.intrusbe);
+-	musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl);
++	if (musb->context.devctl & MUSB_DEVCTL_SESSION)
++		musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl);
+ 
+ 	for (i = 0; i < musb->config->num_eps; ++i) {
+ 		struct musb_hw_ep	*hw_ep;
+diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
+index 2f8ad7f1f482..8ff03228540a 100644
+--- a/drivers/usb/musb/musb_host.c
++++ b/drivers/usb/musb/musb_host.c
+@@ -594,14 +594,13 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum)
+ 		musb_writew(ep->regs, MUSB_TXCSR, 0);
+ 
+ 	/* scrub all previous state, clearing toggle */
+-	} else {
+-		csr = musb_readw(ep->regs, MUSB_RXCSR);
+-		if (csr & MUSB_RXCSR_RXPKTRDY)
+-			WARNING("rx%d, packet/%d ready?\n", ep->epnum,
+-				musb_readw(ep->regs, MUSB_RXCOUNT));
+-
+-		musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG);
+ 	}
++	csr = musb_readw(ep->regs, MUSB_RXCSR);
++	if (csr & MUSB_RXCSR_RXPKTRDY)
++		WARNING("rx%d, packet/%d ready?\n", ep->epnum,
++			musb_readw(ep->regs, MUSB_RXCOUNT));
++
++	musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG);
+ 
+ 	/* target addr and (for multipoint) hub addr/port */
+ 	if (musb->is_multipoint) {
+@@ -995,9 +994,15 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep,
+ 	if (is_in) {
+ 		dma = is_dma_capable() ? ep->rx_channel : NULL;
+ 
+-		/* clear nak timeout bit */
++		/*
++		 * Need to stop the transaction by clearing REQPKT first
++		 * then the NAK Timeout bit ref MUSBMHDRC USB 2.0 HIGH-SPEED
++		 * DUAL-ROLE CONTROLLER Programmer's Guide, section 9.2.2
++		 */
+ 		rx_csr = musb_readw(epio, MUSB_RXCSR);
+ 		rx_csr |= MUSB_RXCSR_H_WZC_BITS;
++		rx_csr &= ~MUSB_RXCSR_H_REQPKT;
++		musb_writew(epio, MUSB_RXCSR, rx_csr);
+ 		rx_csr &= ~MUSB_RXCSR_DATAERROR;
+ 		musb_writew(epio, MUSB_RXCSR, rx_csr);
+ 
+@@ -1551,7 +1556,7 @@ static int musb_rx_dma_iso_cppi41(struct dma_controller *dma,
+ 				  struct urb *urb,
+ 				  size_t len)
+ {
+-	struct dma_channel *channel = hw_ep->tx_channel;
++	struct dma_channel *channel = hw_ep->rx_channel;
+ 	void __iomem *epio = hw_ep->regs;
+ 	dma_addr_t *buf;
+ 	u32 length, res;
+diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
+index 2eddbe538cda..5608af4a369d 100644
+--- a/drivers/usb/serial/mos7720.c
++++ b/drivers/usb/serial/mos7720.c
+@@ -2007,6 +2007,7 @@ static void mos7720_release(struct usb_serial *serial)
+ 				    urblist_entry)
+ 			usb_unlink_urb(urbtrack->urb);
+ 		spin_unlock_irqrestore(&mos_parport->listlock, flags);
++		parport_del_port(mos_parport->pp);
+ 
+ 		kref_put(&mos_parport->ref_count, destroy_mos_parport);
+ 	}
+diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
+index 16bc679dc2fc..ecc7d4b1dfa3 100644
+--- a/drivers/usb/storage/uas.c
++++ b/drivers/usb/storage/uas.c
+@@ -835,6 +835,7 @@ static int uas_slave_configure(struct scsi_device *sdev)
+ 	if (devinfo->flags & US_FL_BROKEN_FUA)
+ 		sdev->broken_fua = 1;
+ 
++	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
+ 	return 0;
+ }
+ 
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index f1d5c5acc8dd..ca80d5830bfd 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -229,6 +229,10 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
+ static inline void bpf_prog_put(struct bpf_prog *prog)
+ {
+ }
++
++static inline void bpf_prog_put_rcu(struct bpf_prog *prog)
++{
++}
+ #endif /* CONFIG_BPF_SYSCALL */
+ 
+ /* verifier prototypes for helper functions called from eBPF programs */
+diff --git a/include/linux/net.h b/include/linux/net.h
+index f840d77c6c31..9d90efe6a708 100644
+--- a/include/linux/net.h
++++ b/include/linux/net.h
+@@ -252,7 +252,8 @@ do {									\
+ 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
+ 	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
+ 	    net_ratelimit())						\
+-		__dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__);	\
++		__dynamic_pr_debug(&descriptor, pr_fmt(fmt),		\
++		                   ##__VA_ARGS__);			\
+ } while (0)
+ #elif defined(DEBUG)
+ #define net_dbg_ratelimited(fmt, ...)				\
+diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
+index 4018b48f2b3b..a0596ca0e80a 100644
+--- a/include/linux/sock_diag.h
++++ b/include/linux/sock_diag.h
+@@ -36,6 +36,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
+ {
+ 	switch (sk->sk_family) {
+ 	case AF_INET:
++		if (sk->sk_type == SOCK_RAW)
++			return SKNLGRP_NONE;
++
+ 		switch (sk->sk_protocol) {
+ 		case IPPROTO_TCP:
+ 			return SKNLGRP_INET_TCP_DESTROY;
+@@ -45,6 +48,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
+ 			return SKNLGRP_NONE;
+ 		}
+ 	case AF_INET6:
++		if (sk->sk_type == SOCK_RAW)
++			return SKNLGRP_NONE;
++
+ 		switch (sk->sk_protocol) {
+ 		case IPPROTO_TCP:
+ 			return SKNLGRP_INET6_TCP_DESTROY;
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index c0ded2416615..a69c90cea05d 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -7143,7 +7143,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
+ 	prog = event->tp_event->prog;
+ 	if (prog) {
+ 		event->tp_event->prog = NULL;
+-		bpf_prog_put(prog);
++		bpf_prog_put_rcu(prog);
+ 	}
+ }
+ 
+diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
+index fbd0acf80b13..2fdebabbfacd 100644
+--- a/net/ax25/af_ax25.c
++++ b/net/ax25/af_ax25.c
+@@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock)
+ 			release_sock(sk);
+ 			ax25_disconnect(ax25, 0);
+ 			lock_sock(sk);
+-			ax25_destroy_socket(ax25);
++			if (!sock_flag(ax25->sk, SOCK_DESTROY))
++				ax25_destroy_socket(ax25);
+ 			break;
+ 
+ 		case AX25_STATE_3:
+diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
+index 951cd57bb07d..5237dff6941d 100644
+--- a/net/ax25/ax25_ds_timer.c
++++ b/net/ax25/ax25_ds_timer.c
+@@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
+ 	switch (ax25->state) {
+ 
+ 	case AX25_STATE_0:
++	case AX25_STATE_2:
+ 		/* Magic here: If we listen() and a new link dies before it
+ 		   is accepted() it isn't 'dead' so doesn't get removed. */
+ 		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
+@@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
+ 				sock_hold(sk);
+ 				ax25_destroy_socket(ax25);
+ 				bh_unlock_sock(sk);
++				/* Ungrab socket and destroy it */
+ 				sock_put(sk);
+ 			} else
+ 				ax25_destroy_socket(ax25);
+@@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25)
+ 	case AX25_STATE_2:
+ 		if (ax25->n2count == ax25->n2) {
+ 			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+-			ax25_disconnect(ax25, ETIMEDOUT);
++			if (!sock_flag(ax25->sk, SOCK_DESTROY))
++				ax25_disconnect(ax25, ETIMEDOUT);
+ 			return;
+ 		} else {
+ 			ax25->n2count++;
+diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
+index 004467c9e6e1..2c0d6ef66f9d 100644
+--- a/net/ax25/ax25_std_timer.c
++++ b/net/ax25/ax25_std_timer.c
+@@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
+ 
+ 	switch (ax25->state) {
+ 	case AX25_STATE_0:
++	case AX25_STATE_2:
+ 		/* Magic here: If we listen() and a new link dies before it
+ 		   is accepted() it isn't 'dead' so doesn't get removed. */
+ 		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
+@@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
+ 				sock_hold(sk);
+ 				ax25_destroy_socket(ax25);
+ 				bh_unlock_sock(sk);
++				/* Ungrab socket and destroy it */
+ 				sock_put(sk);
+ 			} else
+ 				ax25_destroy_socket(ax25);
+@@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25)
+ 	case AX25_STATE_2:
+ 		if (ax25->n2count == ax25->n2) {
+ 			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
+-			ax25_disconnect(ax25, ETIMEDOUT);
++			if (!sock_flag(ax25->sk, SOCK_DESTROY))
++				ax25_disconnect(ax25, ETIMEDOUT);
+ 			return;
+ 		} else {
+ 			ax25->n2count++;
+diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
+index 3b78e8473a01..655a7d4c96e1 100644
+--- a/net/ax25/ax25_subr.c
++++ b/net/ax25/ax25_subr.c
+@@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
+ {
+ 	ax25_clear_queues(ax25);
+ 
+-	ax25_stop_heartbeat(ax25);
++	if (!sock_flag(ax25->sk, SOCK_DESTROY))
++		ax25_stop_heartbeat(ax25);
+ 	ax25_stop_t1timer(ax25);
+ 	ax25_stop_t2timer(ax25);
+ 	ax25_stop_t3timer(ax25);
+diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
+index 6852f3c7009c..43844144c9c4 100644
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
+ 	if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+ 			       &ip6h->saddr)) {
+ 		kfree_skb(skb);
++		br->has_ipv6_addr = 0;
+ 		return NULL;
+ 	}
++
++	br->has_ipv6_addr = 1;
+ 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
+ 
+ 	hopopt = (u8 *)(ip6h + 1);
+@@ -1745,6 +1748,7 @@ void br_multicast_init(struct net_bridge *br)
+ 	br->ip6_other_query.delay_time = 0;
+ 	br->ip6_querier.port = NULL;
+ #endif
++	br->has_ipv6_addr = 1;
+ 
+ 	spin_lock_init(&br->multicast_lock);
+ 	setup_timer(&br->multicast_router_timer,
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index d9da857182ef..f516c53bafb6 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -304,6 +304,7 @@ struct net_bridge
+ 	u8				multicast_disabled:1;
+ 	u8				multicast_querier:1;
+ 	u8				multicast_query_use_ifaddr:1;
++	u8				has_ipv6_addr:1;
+ 
+ 	u32				hash_elasticity;
+ 	u32				hash_max;
+@@ -577,10 +578,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
+ 
+ static inline bool
+ __br_multicast_querier_exists(struct net_bridge *br,
+-			      struct bridge_mcast_other_query *querier)
++				struct bridge_mcast_other_query *querier,
++				const bool is_ipv6)
+ {
++	bool own_querier_enabled;
++
++	if (br->multicast_querier) {
++		if (is_ipv6 && !br->has_ipv6_addr)
++			own_querier_enabled = false;
++		else
++			own_querier_enabled = true;
++	} else {
++		own_querier_enabled = false;
++	}
++
+ 	return time_is_before_jiffies(querier->delay_time) &&
+-	       (br->multicast_querier || timer_pending(&querier->timer));
++	       (own_querier_enabled || timer_pending(&querier->timer));
+ }
+ 
+ static inline bool br_multicast_querier_exists(struct net_bridge *br,
+@@ -588,10 +601,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
+ {
+ 	switch (eth->h_proto) {
+ 	case (htons(ETH_P_IP)):
+-		return __br_multicast_querier_exists(br, &br->ip4_other_query);
++		return __br_multicast_querier_exists(br,
++			&br->ip4_other_query, false);
+ #if IS_ENABLED(CONFIG_IPV6)
+ 	case (htons(ETH_P_IPV6)):
+-		return __br_multicast_querier_exists(br, &br->ip6_other_query);
++		return __br_multicast_querier_exists(br,
++			&br->ip6_other_query, true);
+ #endif
+ 	default:
+ 		return false;
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index f18ae91b652e..769cece9b00b 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -2467,13 +2467,17 @@ int neigh_xmit(int index, struct net_device *dev,
+ 		tbl = neigh_tables[index];
+ 		if (!tbl)
+ 			goto out;
++		rcu_read_lock_bh();
+ 		neigh = __neigh_lookup_noref(tbl, addr, dev);
+ 		if (!neigh)
+ 			neigh = __neigh_create(tbl, addr, dev, false);
+ 		err = PTR_ERR(neigh);
+-		if (IS_ERR(neigh))
++		if (IS_ERR(neigh)) {
++			rcu_read_unlock_bh();
+ 			goto out_kfree_skb;
++		}
+ 		err = neigh->output(neigh, skb);
++		rcu_read_unlock_bh();
+ 	}
+ 	else if (index == NEIGH_LINK_TABLE) {
+ 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
+index 477937465a20..d95631d09248 100644
+--- a/net/ipv4/esp4.c
++++ b/net/ipv4/esp4.c
+@@ -23,6 +23,11 @@ struct esp_skb_cb {
+ 	void *tmp;
+ };
+ 
++struct esp_output_extra {
++	__be32 seqhi;
++	u32 esphoff;
++};
++
+ #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
+ 
+ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
+@@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
+  *
+  * TODO: Use spare space in skb for this where possible.
+  */
+-static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
++static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen)
+ {
+ 	unsigned int len;
+ 
+-	len = seqhilen;
++	len = extralen;
+ 
+ 	len += crypto_aead_ivsize(aead);
+ 
+@@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
+ 	return kmalloc(len, GFP_ATOMIC);
+ }
+ 
+-static inline __be32 *esp_tmp_seqhi(void *tmp)
++static inline void *esp_tmp_extra(void *tmp)
+ {
+-	return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
++	return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra));
+ }
+-static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
++
++static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen)
+ {
+ 	return crypto_aead_ivsize(aead) ?
+-	       PTR_ALIGN((u8 *)tmp + seqhilen,
+-			 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
++	       PTR_ALIGN((u8 *)tmp + extralen,
++			 crypto_aead_alignmask(aead) + 1) : tmp + extralen;
+ }
+ 
+ static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
+@@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
+ {
+ 	struct ip_esp_hdr *esph = (void *)(skb->data + offset);
+ 	void *tmp = ESP_SKB_CB(skb)->tmp;
+-	__be32 *seqhi = esp_tmp_seqhi(tmp);
++	__be32 *seqhi = esp_tmp_extra(tmp);
+ 
+ 	esph->seq_no = esph->spi;
+ 	esph->spi = *seqhi;
+@@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
+ 
+ static void esp_output_restore_header(struct sk_buff *skb)
+ {
+-	esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
++	void *tmp = ESP_SKB_CB(skb)->tmp;
++	struct esp_output_extra *extra = esp_tmp_extra(tmp);
++
++	esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff -
++				sizeof(__be32));
+ }
+ 
+ static void esp_output_done_esn(struct crypto_async_request *base, int err)
+@@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
+ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+ {
+ 	int err;
++	struct esp_output_extra *extra;
+ 	struct ip_esp_hdr *esph;
+ 	struct crypto_aead *aead;
+ 	struct aead_request *req;
+@@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+ 	int tfclen;
+ 	int nfrags;
+ 	int assoclen;
+-	int seqhilen;
+-	__be32 *seqhi;
++	int extralen;
+ 	__be64 seqno;
+ 
+ 	/* skb is pure payload to encrypt */
+@@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+ 	nfrags = err;
+ 
+ 	assoclen = sizeof(*esph);
+-	seqhilen = 0;
++	extralen = 0;
+ 
+ 	if (x->props.flags & XFRM_STATE_ESN) {
+-		seqhilen += sizeof(__be32);
+-		assoclen += seqhilen;
++		extralen += sizeof(*extra);
++		assoclen += sizeof(__be32);
+ 	}
+ 
+-	tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
++	tmp = esp_alloc_tmp(aead, nfrags, extralen);
+ 	if (!tmp) {
+ 		err = -ENOMEM;
+ 		goto error;
+ 	}
+ 
+-	seqhi = esp_tmp_seqhi(tmp);
+-	iv = esp_tmp_iv(aead, tmp, seqhilen);
++	extra = esp_tmp_extra(tmp);
++	iv = esp_tmp_iv(aead, tmp, extralen);
+ 	req = esp_tmp_req(aead, iv);
+ 	sg = esp_req_sg(aead, req);
+ 
+@@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
+ 	 * encryption.
+ 	 */
+ 	if ((x->props.flags & XFRM_STATE_ESN)) {
+-		esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
+-		*seqhi = esph->spi;
++		extra->esphoff = (unsigned char *)esph -
++				 skb_transport_header(skb);
++		esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
++		extra->seqhi = esph->spi;
+ 		esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ 		aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ 	}
+@@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
+ 		goto out;
+ 
+ 	ESP_SKB_CB(skb)->tmp = tmp;
+-	seqhi = esp_tmp_seqhi(tmp);
++	seqhi = esp_tmp_extra(tmp);
+ 	iv = esp_tmp_iv(aead, tmp, seqhilen);
+ 	req = esp_tmp_req(aead, iv);
+ 	sg = esp_req_sg(aead, req);
+diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
+index 395e2814a46d..a42dd8021b6b 100644
+--- a/net/ipv4/ipmr.c
++++ b/net/ipv4/ipmr.c
+@@ -891,8 +891,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
+ {
+ 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
+ 
+-	if (c)
++	if (c) {
++		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ 		c->mfc_un.res.minvif = MAXVIFS;
++	}
+ 	return c;
+ }
+ 
+diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
+index a10e77103c88..e207cb2468da 100644
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
+ 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
+ 	if (!c)
+ 		return NULL;
++	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ 	c->mfc_un.res.minvif = MAXMIFS;
+ 	return c;
+ }
+diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
+index 83384308d032..6c53e4eb0f09 100644
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
+ 
+ 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+-				 t->parms.link, 0, IPPROTO_IPV6, 0);
++				 t->parms.link, 0, iph->protocol, 0);
+ 		err = 0;
+ 		goto out;
+ 	}
+ 	if (type == ICMP_REDIRECT) {
+ 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
+-			      IPPROTO_IPV6, 0);
++			      iph->protocol, 0);
+ 		err = 0;
+ 		goto out;
+ 	}
+diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
+index 738008726cc6..fda7f4715c58 100644
+--- a/net/kcm/kcmproc.c
++++ b/net/kcm/kcmproc.c
+@@ -241,6 +241,7 @@ static const struct file_operations kcm_seq_fops = {
+ 	.open		= kcm_seq_open,
+ 	.read		= seq_read,
+ 	.llseek		= seq_lseek,
++	.release	= seq_release_net,
+ };
+ 
+ static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
+diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
+index 8b5270008a6e..606323339e1f 100644
+--- a/net/sched/act_ipt.c
++++ b/net/sched/act_ipt.c
+@@ -121,10 +121,13 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
+ 	}
+ 
+ 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
+-	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
++	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
++		if (exists)
++			tcf_hash_release(a, bind);
+ 		return -EINVAL;
++	}
+ 
+-	if (!tcf_hash_check(tn, index, a, bind)) {
++	if (!exists) {
+ 		ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind,
+ 				      false);
+ 		if (ret)
+diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
+index 2177eac0a61e..2e4bd2c0a50c 100644
+--- a/net/sched/sch_fifo.c
++++ b/net/sched/sch_fifo.c
+@@ -37,14 +37,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+ 
+ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+ {
++	unsigned int prev_backlog;
++
+ 	if (likely(skb_queue_len(&sch->q) < sch->limit))
+ 		return qdisc_enqueue_tail(skb, sch);
+ 
++	prev_backlog = sch->qstats.backlog;
+ 	/* queue full, remove one skb to fulfill the limit */
+ 	__qdisc_queue_drop_head(sch, &sch->q);
+ 	qdisc_qstats_drop(sch);
+ 	qdisc_enqueue_tail(skb, sch);
+ 
++	qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog);
+ 	return NET_XMIT_CN;
+ }
+ 
+diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
+index 4befe97a9034..b7c29d5b6f04 100644
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -650,14 +650,14 @@ deliver:
+ #endif
+ 
+ 			if (q->qdisc) {
++				unsigned int pkt_len = qdisc_pkt_len(skb);
+ 				int err = qdisc_enqueue(skb, q->qdisc);
+ 
+-				if (unlikely(err != NET_XMIT_SUCCESS)) {
+-					if (net_xmit_drop_count(err)) {
+-						qdisc_qstats_drop(sch);
+-						qdisc_tree_reduce_backlog(sch, 1,
+-									  qdisc_pkt_len(skb));
+-					}
++				if (err != NET_XMIT_SUCCESS &&
++				    net_xmit_drop_count(err)) {
++					qdisc_qstats_drop(sch);
++					qdisc_tree_reduce_backlog(sch, 1,
++								  pkt_len);
+ 				}
+ 				goto tfifo_dequeue;
+ 			}


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-07-27 19:23 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-07-27 19:23 UTC (permalink / raw
  To: gentoo-commits

commit:     221739056950edff86a332751f03ac75fb232f2e
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Wed Jul 27 19:23:26 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Wed Jul 27 19:23:26 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=22173905

Linux patch 4.6.5

 0000_README            |    4 +
 1004_linux-4.6.5.patch | 7262 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 7266 insertions(+)

diff --git a/0000_README b/0000_README
index 5c841a9..67da565 100644
--- a/0000_README
+++ b/0000_README
@@ -59,6 +59,10 @@ Patch:  1003_linux-4.6.4.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.6.4
 
+Patch:  1004_linux-4.6.5.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.6.5
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1004_linux-4.6.5.patch b/1004_linux-4.6.5.patch
new file mode 100644
index 0000000..21cc942
--- /dev/null
+++ b/1004_linux-4.6.5.patch
@@ -0,0 +1,7262 @@
+diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
+index 6708c5e264aa..33e96f740639 100644
+--- a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
++++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
+@@ -1,4 +1,4 @@
+-What		/sys/bus/iio/devices/iio:deviceX/in_proximity_raw
++What		/sys/bus/iio/devices/iio:deviceX/in_proximity_input
+ Date:		March 2014
+ KernelVersion:	3.15
+ Contact:	Matt Ranostay <mranostay@gmail.com>
+diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
+index 8638f61c8c9d..37eca00796ee 100644
+--- a/Documentation/scsi/scsi_eh.txt
++++ b/Documentation/scsi/scsi_eh.txt
+@@ -263,19 +263,23 @@ scmd->allowed.
+ 
+  3. scmd recovered
+     ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd
+-	- shost->host_failed--
+ 	- clear scmd->eh_eflags
+ 	- scsi_setup_cmd_retry()
+ 	- move from local eh_work_q to local eh_done_q
+     LOCKING: none
++    CONCURRENCY: at most one thread per separate eh_work_q to
++		 keep queue manipulation lockless
+ 
+  4. EH completes
+     ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper
+-	    layer of failure.
++	    layer of failure. May be called concurrently but must have
++	    a no more than one thread per separate eh_work_q to
++	    manipulate the queue locklessly
+ 	- scmd is removed from eh_done_q and scmd->eh_entry is cleared
+ 	- if retry is necessary, scmd is requeued using
+           scsi_queue_insert()
+ 	- otherwise, scsi_finish_command() is invoked for scmd
++	- zero shost->host_failed
+     LOCKING: queue or finish function performs appropriate locking
+ 
+ 
+diff --git a/Makefile b/Makefile
+index cd374426114a..7d693a825fc7 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 6
+-SUBLEVEL = 4
++SUBLEVEL = 5
+ EXTRAVERSION =
+ NAME = Charred Weasel
+ 
+diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
+index 8450944b28e6..22f7a13e20b4 100644
+--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
++++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
+@@ -58,8 +58,8 @@
+ 	soc {
+ 		ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000
+ 			  MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000
+-			  MBUS_ID(0x09, 0x09) 0 0xf1100000 0x10000
+-			  MBUS_ID(0x09, 0x05) 0 0xf1110000 0x10000>;
++			  MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000
++			  MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000>;
+ 
+ 		internal-regs {
+ 
+diff --git a/arch/arm/boot/dts/sun5i-r8-chip.dts b/arch/arm/boot/dts/sun5i-r8-chip.dts
+index f6898c6b84d4..c937c85ffb45 100644
+--- a/arch/arm/boot/dts/sun5i-r8-chip.dts
++++ b/arch/arm/boot/dts/sun5i-r8-chip.dts
+@@ -52,7 +52,7 @@
+ 
+ / {
+ 	model = "NextThing C.H.I.P.";
+-	compatible = "nextthing,chip", "allwinner,sun5i-r8";
++	compatible = "nextthing,chip", "allwinner,sun5i-r8", "allwinner,sun5i-a13";
+ 
+ 	aliases {
+ 		i2c0 = &i2c0;
+diff --git a/arch/arm/boot/dts/sun6i-a31s-primo81.dts b/arch/arm/boot/dts/sun6i-a31s-primo81.dts
+index 68b479b8772c..73c133f5e79c 100644
+--- a/arch/arm/boot/dts/sun6i-a31s-primo81.dts
++++ b/arch/arm/boot/dts/sun6i-a31s-primo81.dts
+@@ -176,8 +176,6 @@
+ };
+ 
+ &reg_dc1sw {
+-	regulator-min-microvolt = <3000000>;
+-	regulator-max-microvolt = <3000000>;
+ 	regulator-name = "vcc-lcd";
+ };
+ 
+diff --git a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts
+index 360adfb1e9ca..d6ad6196a768 100644
+--- a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts
++++ b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts
+@@ -135,8 +135,6 @@
+ 
+ &reg_dc1sw {
+ 	regulator-name = "vcc-lcd-usb2";
+-	regulator-min-microvolt = <3000000>;
+-	regulator-max-microvolt = <3000000>;
+ };
+ 
+ &reg_dc5ldo {
+diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
+index aeddd28b3595..92fd2c8a9af0 100644
+--- a/arch/arm/include/asm/pgtable-2level.h
++++ b/arch/arm/include/asm/pgtable-2level.h
+@@ -193,6 +193,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
+ 
+ #define pmd_large(pmd)		(pmd_val(pmd) & 2)
+ #define pmd_bad(pmd)		(pmd_val(pmd) & 2)
++#define pmd_present(pmd)	(pmd_val(pmd))
+ 
+ #define copy_pmd(pmdpd,pmdps)		\
+ 	do {				\
+diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
+index dc46398bc3a5..74114667d116 100644
+--- a/arch/arm/include/asm/pgtable-3level.h
++++ b/arch/arm/include/asm/pgtable-3level.h
+@@ -211,6 +211,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
+ 						: !!(pmd_val(pmd) & (val)))
+ #define pmd_isclear(pmd, val)	(!(pmd_val(pmd) & (val)))
+ 
++#define pmd_present(pmd)	(pmd_isset((pmd), L_PMD_SECT_VALID))
+ #define pmd_young(pmd)		(pmd_isset((pmd), PMD_SECT_AF))
+ #define pte_special(pte)	(pte_isset((pte), L_PTE_SPECIAL))
+ static inline pte_t pte_mkspecial(pte_t pte)
+@@ -249,10 +250,10 @@ PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
+ #define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+ #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+ 
+-/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */
++/* represent a notpresent pmd by faulting entry, this is used by pmdp_invalidate */
+ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+ {
+-	return __pmd(0);
++	return __pmd(pmd_val(pmd) & ~L_PMD_SECT_VALID);
+ }
+ 
+ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
+index 348caabb7625..d62204060cbe 100644
+--- a/arch/arm/include/asm/pgtable.h
++++ b/arch/arm/include/asm/pgtable.h
+@@ -182,7 +182,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+ #define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
+ 
+ #define pmd_none(pmd)		(!pmd_val(pmd))
+-#define pmd_present(pmd)	(pmd_val(pmd))
+ 
+ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+ {
+diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
+index dded1b763c16..72b11d91ede2 100644
+--- a/arch/arm/kvm/arm.c
++++ b/arch/arm/kvm/arm.c
+@@ -267,6 +267,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+ 	kvm_timer_vcpu_terminate(vcpu);
+ 	kvm_vgic_vcpu_destroy(vcpu);
+ 	kvm_pmu_vcpu_destroy(vcpu);
++	kvm_vcpu_uninit(vcpu);
+ 	kmem_cache_free(kvm_vcpu_cache, vcpu);
+ }
+ 
+diff --git a/arch/arm/mach-imx/mach-imx6ul.c b/arch/arm/mach-imx/mach-imx6ul.c
+index a38b16b69923..b56de4b8cdf2 100644
+--- a/arch/arm/mach-imx/mach-imx6ul.c
++++ b/arch/arm/mach-imx/mach-imx6ul.c
+@@ -46,7 +46,7 @@ static int ksz8081_phy_fixup(struct phy_device *dev)
+ static void __init imx6ul_enet_phy_init(void)
+ {
+ 	if (IS_BUILTIN(CONFIG_PHYLIB))
+-		phy_register_fixup_for_uid(PHY_ID_KSZ8081, 0xffffffff,
++		phy_register_fixup_for_uid(PHY_ID_KSZ8081, MICREL_PHY_ID_MASK,
+ 					   ksz8081_phy_fixup);
+ }
+ 
+diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
+index 7e989d61159c..474abff7e855 100644
+--- a/arch/arm/mach-mvebu/coherency.c
++++ b/arch/arm/mach-mvebu/coherency.c
+@@ -162,22 +162,16 @@ exit:
+ }
+ 
+ /*
+- * This ioremap hook is used on Armada 375/38x to ensure that PCIe
+- * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
+- * is needed as a workaround for a deadlock issue between the PCIe
+- * interface and the cache controller.
++ * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
++ * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
++ * needed for the HW I/O coherency mechanism to work properly without
++ * deadlock.
+  */
+ static void __iomem *
+-armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+-			      unsigned int mtype, void *caller)
++armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
++			 unsigned int mtype, void *caller)
+ {
+-	struct resource pcie_mem;
+-
+-	mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
+-
+-	if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
+-		mtype = MT_UNCACHED;
+-
++	mtype = MT_UNCACHED;
+ 	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
+ }
+ 
+@@ -186,7 +180,7 @@ static void __init armada_375_380_coherency_init(struct device_node *np)
+ 	struct device_node *cache_dn;
+ 
+ 	coherency_cpu_base = of_iomap(np, 0);
+-	arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
++	arch_ioremap_caller = armada_wa_ioremap_caller;
+ 
+ 	/*
+ 	 * We should switch the PL310 to I/O coherency mode only if
+diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
+index a307eb6e7fa8..7f94755089e2 100644
+--- a/arch/arm64/include/asm/ptrace.h
++++ b/arch/arm64/include/asm/ptrace.h
+@@ -117,6 +117,8 @@ struct pt_regs {
+ 	};
+ 	u64 orig_x0;
+ 	u64 syscallno;
++	u64 orig_addr_limit;
++	u64 unused;	// maintain 16 byte alignment
+ };
+ 
+ #define arch_has_single_step()	(1)
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index 3ae6b310ac9b..1abcd8829f3b 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -59,6 +59,7 @@ int main(void)
+   DEFINE(S_PC,			offsetof(struct pt_regs, pc));
+   DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
+   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
++  DEFINE(S_ORIG_ADDR_LIMIT,	offsetof(struct pt_regs, orig_addr_limit));
+   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
+   BLANK();
+   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
+diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
+index 12e8d2bcb3f9..6c3b7345a6c4 100644
+--- a/arch/arm64/kernel/entry.S
++++ b/arch/arm64/kernel/entry.S
+@@ -28,6 +28,7 @@
+ #include <asm/errno.h>
+ #include <asm/esr.h>
+ #include <asm/irq.h>
++#include <asm/memory.h>
+ #include <asm/thread_info.h>
+ #include <asm/unistd.h>
+ 
+@@ -97,7 +98,14 @@
+ 	mov	x29, xzr			// fp pointed to user-space
+ 	.else
+ 	add	x21, sp, #S_FRAME_SIZE
+-	.endif
++	get_thread_info tsk
++	/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
++	ldr	x20, [tsk, #TI_ADDR_LIMIT]
++	str	x20, [sp, #S_ORIG_ADDR_LIMIT]
++	mov	x20, #TASK_SIZE_64
++	str	x20, [tsk, #TI_ADDR_LIMIT]
++	ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
++	.endif /* \el == 0 */
+ 	mrs	x22, elr_el1
+ 	mrs	x23, spsr_el1
+ 	stp	lr, x21, [sp, #S_LR]
+@@ -128,6 +136,14 @@
+ 	.endm
+ 
+ 	.macro	kernel_exit, el
++	.if	\el != 0
++	/* Restore the task's original addr_limit. */
++	ldr	x20, [sp, #S_ORIG_ADDR_LIMIT]
++	str	x20, [tsk, #TI_ADDR_LIMIT]
++
++	/* No need to restore UAO, it will be restored from SPSR_EL1 */
++	.endif
++
+ 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
+ 	.if	\el == 0
+ 	ct_user_enter
+@@ -406,7 +422,6 @@ el1_irq:
+ 	bl	trace_hardirqs_off
+ #endif
+ 
+-	get_thread_info tsk
+ 	irq_handler
+ 
+ #ifdef CONFIG_PREEMPT
+diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
+index c5392081b49b..58651a9dfcf8 100644
+--- a/arch/arm64/kernel/traps.c
++++ b/arch/arm64/kernel/traps.c
+@@ -64,8 +64,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
+ 
+ 	/*
+ 	 * We need to switch to kernel mode so that we can use __get_user
+-	 * to safely read from kernel space.  Note that we now dump the
+-	 * code first, just in case the backtrace kills us.
++	 * to safely read from kernel space.
+ 	 */
+ 	fs = get_fs();
+ 	set_fs(KERNEL_DS);
+@@ -111,21 +110,12 @@ static void dump_backtrace_entry(unsigned long where)
+ 	print_ip_sym(where);
+ }
+ 
+-static void dump_instr(const char *lvl, struct pt_regs *regs)
++static void __dump_instr(const char *lvl, struct pt_regs *regs)
+ {
+ 	unsigned long addr = instruction_pointer(regs);
+-	mm_segment_t fs;
+ 	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
+ 	int i;
+ 
+-	/*
+-	 * We need to switch to kernel mode so that we can use __get_user
+-	 * to safely read from kernel space.  Note that we now dump the
+-	 * code first, just in case the backtrace kills us.
+-	 */
+-	fs = get_fs();
+-	set_fs(KERNEL_DS);
+-
+ 	for (i = -4; i < 1; i++) {
+ 		unsigned int val, bad;
+ 
+@@ -139,8 +129,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
+ 		}
+ 	}
+ 	printk("%sCode: %s\n", lvl, str);
++}
+ 
+-	set_fs(fs);
++static void dump_instr(const char *lvl, struct pt_regs *regs)
++{
++	if (!user_mode(regs)) {
++		mm_segment_t fs = get_fs();
++		set_fs(KERNEL_DS);
++		__dump_instr(lvl, regs);
++		set_fs(fs);
++	} else {
++		__dump_instr(lvl, regs);
++	}
+ }
+ 
+ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
+index 10b79e9e87d1..e22849a90557 100644
+--- a/arch/arm64/mm/fault.c
++++ b/arch/arm64/mm/fault.c
+@@ -284,7 +284,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
+ 	}
+ 
+ 	if (permission_fault(esr) && (addr < USER_DS)) {
+-		if (get_fs() == KERNEL_DS)
++		/* regs->orig_addr_limit may be 0 if we entered from EL0 */
++		if (regs->orig_addr_limit == KERNEL_DS)
+ 			die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+ 
+ 		if (!search_exception_tables(regs->pc))
+diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
+index dbd12ea8ce68..43a76b07eb32 100644
+--- a/arch/arm64/mm/flush.c
++++ b/arch/arm64/mm/flush.c
+@@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
+ {
+ 	struct page *page = pte_page(pte);
+ 
+-	/* no flushing needed for anonymous pages */
+-	if (!page_mapping(page))
+-		return;
+-
+ 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+ 		sync_icache_aliases(page_address(page),
+ 				    PAGE_SIZE << compound_order(page));
+diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
+index 942b8f6bf35b..1907ab379fad 100644
+--- a/arch/mips/include/asm/kvm_host.h
++++ b/arch/mips/include/asm/kvm_host.h
+@@ -336,6 +336,7 @@ struct kvm_mips_tlb {
+ #define KVM_MIPS_GUEST_TLB_SIZE	64
+ struct kvm_vcpu_arch {
+ 	void *host_ebase, *guest_ebase;
++	int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+ 	unsigned long host_stack;
+ 	unsigned long host_gp;
+ 
+diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h
+index 4ab4bdfad703..2143884709e4 100644
+--- a/arch/mips/kvm/interrupt.h
++++ b/arch/mips/kvm/interrupt.h
+@@ -28,6 +28,7 @@
+ #define MIPS_EXC_MAX                12
+ /* XXXSL More to follow */
+ 
++extern char __kvm_mips_vcpu_run_end[];
+ extern char mips32_exception[], mips32_exceptionEnd[];
+ extern char mips32_GuestException[], mips32_GuestExceptionEnd[];
+ 
+diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
+index 81687ab1b523..fc93a08b6954 100644
+--- a/arch/mips/kvm/locore.S
++++ b/arch/mips/kvm/locore.S
+@@ -227,6 +227,7 @@ FEXPORT(__kvm_mips_load_k0k1)
+ 
+ 	/* Jump to guest */
+ 	eret
++EXPORT(__kvm_mips_vcpu_run_end)
+ 
+ VECTOR(MIPSX(exception), unknown)
+ /* Find out what mode we came from and jump to the proper handler. */
+diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
+index 70ef1a43c114..e223cb3d9e81 100644
+--- a/arch/mips/kvm/mips.c
++++ b/arch/mips/kvm/mips.c
+@@ -314,6 +314,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+ 	memcpy(gebase + offset, mips32_GuestException,
+ 	       mips32_GuestExceptionEnd - mips32_GuestException);
+ 
++#ifdef MODULE
++	offset += mips32_GuestExceptionEnd - mips32_GuestException;
++	memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run,
++	       __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run);
++	vcpu->arch.vcpu_run = gebase + offset;
++#else
++	vcpu->arch.vcpu_run = __kvm_mips_vcpu_run;
++#endif
++
+ 	/* Invalidate the icache for these ranges */
+ 	local_flush_icache_range((unsigned long)gebase,
+ 				(unsigned long)gebase + ALIGN(size, PAGE_SIZE));
+@@ -403,7 +412,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+ 	/* Disable hardware page table walking while in guest */
+ 	htw_stop();
+ 
+-	r = __kvm_mips_vcpu_run(run, vcpu);
++	r = vcpu->arch.vcpu_run(run, vcpu);
+ 
+ 	/* Re-enable HTW before enabling interrupts */
+ 	htw_start();
+diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
+index b8500b4ac7fe..bec85055fc42 100644
+--- a/arch/powerpc/kernel/process.c
++++ b/arch/powerpc/kernel/process.c
+@@ -1501,6 +1501,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
+ 		current->thread.regs = regs - 1;
+ 	}
+ 
++#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
++	/*
++	 * Clear any transactional state, we're exec()ing. The cause is
++	 * not important as there will never be a recheckpoint so it's not
++	 * user visible.
++	 */
++	if (MSR_TM_SUSPENDED(mfmsr()))
++		tm_reclaim_current(0);
++#endif
++
+ 	memset(regs->gpr, 0, sizeof(regs->gpr));
+ 	regs->ctr = 0;
+ 	regs->link = 0;
+diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
+index ccd2037c797f..6ee4b72cda42 100644
+--- a/arch/powerpc/kernel/prom_init.c
++++ b/arch/powerpc/kernel/prom_init.c
+@@ -719,7 +719,7 @@ unsigned char ibm_architecture_vec[] = {
+ 	 * must match by the macro below. Update the definition if
+ 	 * the structure layout changes.
+ 	 */
+-#define IBM_ARCH_VEC_NRCORES_OFFSET	125
++#define IBM_ARCH_VEC_NRCORES_OFFSET	133
+ 	W(NR_CPUS),			/* number of cores supported */
+ 	0,
+ 	0,
+diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
+index bd98ce2be17b..3e8865b187de 100644
+--- a/arch/powerpc/platforms/pseries/iommu.c
++++ b/arch/powerpc/platforms/pseries/iommu.c
+@@ -912,7 +912,8 @@ machine_arch_initcall(pseries, find_existing_ddw_windows);
+ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+ 			struct ddw_query_response *query)
+ {
+-	struct eeh_dev *edev;
++	struct device_node *dn;
++	struct pci_dn *pdn;
+ 	u32 cfg_addr;
+ 	u64 buid;
+ 	int ret;
+@@ -923,11 +924,10 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+ 	 * Retrieve them from the pci device, not the node with the
+ 	 * dma-window property
+ 	 */
+-	edev = pci_dev_to_eeh_dev(dev);
+-	cfg_addr = edev->config_addr;
+-	if (edev->pe_config_addr)
+-		cfg_addr = edev->pe_config_addr;
+-	buid = edev->phb->buid;
++	dn = pci_device_to_OF_node(dev);
++	pdn = PCI_DN(dn);
++	buid = pdn->phb->buid;
++	cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
+ 
+ 	ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
+ 		  cfg_addr, BUID_HI(buid), BUID_LO(buid));
+@@ -941,7 +941,8 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+ 			struct ddw_create_response *create, int page_shift,
+ 			int window_shift)
+ {
+-	struct eeh_dev *edev;
++	struct device_node *dn;
++	struct pci_dn *pdn;
+ 	u32 cfg_addr;
+ 	u64 buid;
+ 	int ret;
+@@ -952,11 +953,10 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+ 	 * Retrieve them from the pci device, not the node with the
+ 	 * dma-window property
+ 	 */
+-	edev = pci_dev_to_eeh_dev(dev);
+-	cfg_addr = edev->config_addr;
+-	if (edev->pe_config_addr)
+-		cfg_addr = edev->pe_config_addr;
+-	buid = edev->phb->buid;
++	dn = pci_device_to_OF_node(dev);
++	pdn = PCI_DN(dn);
++	buid = pdn->phb->buid;
++	cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
+ 
+ 	do {
+ 		/* extra outputs are LIOBN and dma-addr (hi, lo) */
+diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
+index 5e04f3cbd320..8ae236b0f80b 100644
+--- a/arch/s390/include/asm/fpu/api.h
++++ b/arch/s390/include/asm/fpu/api.h
+@@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc)
+ 		"	la	%0,0\n"
+ 		"1:\n"
+ 		EX_TABLE(0b,1b)
+-		: "=d" (rc), "=d" (orig_fpc)
++		: "=d" (rc), "=&d" (orig_fpc)
+ 		: "d" (fpc), "0" (-EINVAL));
+ 	return rc;
+ }
+diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
+index f20abdb5630a..d14069d4b88d 100644
+--- a/arch/s390/kernel/ipl.c
++++ b/arch/s390/kernel/ipl.c
+@@ -2064,12 +2064,5 @@ void s390_reset_system(void)
+ 	S390_lowcore.program_new_psw.addr =
+ 		(unsigned long) s390_base_pgm_handler;
+ 
+-	/*
+-	 * Clear subchannel ID and number to signal new kernel that no CCW or
+-	 * SCSI IPL has been done (for kexec and kdump)
+-	 */
+-	S390_lowcore.subchannel_id = 0;
+-	S390_lowcore.subchannel_nr = 0;
+-
+ 	do_reset_calls();
+ }
+diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
+index 4324b87f9398..9f0ce0e6eeb4 100644
+--- a/arch/s390/mm/pgtable.c
++++ b/arch/s390/mm/pgtable.c
+@@ -437,7 +437,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ 	pgste = pgste_get_lock(ptep);
+ 	pgstev = pgste_val(pgste);
+ 	pte = *ptep;
+-	if (pte_swap(pte) &&
++	if (!reset && pte_swap(pte) &&
+ 	    ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+ 	     (pgstev & _PGSTE_GPS_ZERO))) {
+ 		ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
+diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
+index b1ef9e489084..b67f9e8b93a6 100644
+--- a/arch/x86/boot/Makefile
++++ b/arch/x86/boot/Makefile
+@@ -171,6 +171,9 @@ isoimage: $(obj)/bzImage
+ 	for i in lib lib64 share end ; do \
+ 		if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+ 			cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
++			if [ -f /usr/$$i/syslinux/ldlinux.c32 ]; then \
++				cp /usr/$$i/syslinux/ldlinux.c32 $(obj)/isoimage ; \
++			fi ; \
+ 			break ; \
+ 		fi ; \
+ 		if [ $$i = end ] ; then exit 1 ; fi ; \
+diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
+index 041e442a3e28..7eb806ca6b03 100644
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -2313,7 +2313,7 @@ void
+ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+ {
+ 	struct stack_frame frame;
+-	const void __user *fp;
++	const unsigned long __user *fp;
+ 
+ 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ 		/* TODO: We don't support guest os callchain now */
+@@ -2326,7 +2326,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+ 	if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
+ 		return;
+ 
+-	fp = (void __user *)regs->bp;
++	fp = (unsigned long __user *)regs->bp;
+ 
+ 	perf_callchain_store(entry, regs->ip);
+ 
+@@ -2339,16 +2339,17 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+ 	pagefault_disable();
+ 	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ 		unsigned long bytes;
++
+ 		frame.next_frame	     = NULL;
+ 		frame.return_address = 0;
+ 
+-		if (!access_ok(VERIFY_READ, fp, 16))
++		if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
+ 			break;
+ 
+-		bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
++		bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
+ 		if (bytes != 0)
+ 			break;
+-		bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
++		bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
+ 		if (bytes != 0)
+ 			break;
+ 
+diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
+index 1705c9d75e44..78ee9ebe38dd 100644
+--- a/arch/x86/events/intel/rapl.c
++++ b/arch/x86/events/intel/rapl.c
+@@ -665,7 +665,7 @@ static void __init cleanup_rapl_pmus(void)
+ 	int i;
+ 
+ 	for (i = 0; i < rapl_pmus->maxpkg; i++)
+-		kfree(rapl_pmus->pmus + i);
++		kfree(rapl_pmus->pmus[i]);
+ 	kfree(rapl_pmus);
+ }
+ 
+diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
+index 7a79ee2778b3..33c709ca2666 100644
+--- a/arch/x86/include/asm/msr.h
++++ b/arch/x86/include/asm/msr.h
+@@ -112,7 +112,7 @@ static inline void native_write_msr(unsigned int msr,
+ 				    unsigned low, unsigned high)
+ {
+ 	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+-	if (msr_tracepoint_active(__tracepoint_read_msr))
++	if (msr_tracepoint_active(__tracepoint_write_msr))
+ 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
+ }
+ 
+@@ -131,7 +131,7 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
+ 		     : "c" (msr), "0" (low), "d" (high),
+ 		       [fault] "i" (-EIO)
+ 		     : "memory");
+-	if (msr_tracepoint_active(__tracepoint_read_msr))
++	if (msr_tracepoint_active(__tracepoint_write_msr))
+ 		do_trace_write_msr(msr, ((u64)high << 32 | low), err);
+ 	return err;
+ }
+diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
+index a147e676fc7b..e991d5c8bb3a 100644
+--- a/arch/x86/kernel/amd_nb.c
++++ b/arch/x86/kernel/amd_nb.c
+@@ -71,8 +71,8 @@ int amd_cache_northbridges(void)
+ 	while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
+ 		i++;
+ 
+-	if (i == 0)
+-		return 0;
++	if (!i)
++		return -ENODEV;
+ 
+ 	nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
+ 	if (!nb)
+diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
+index ae703acb85c1..44bcd5779ec1 100644
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -960,7 +960,19 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+ 		 * normal page fault.
+ 		 */
+ 		regs->ip = (unsigned long)cur->addr;
++		/*
++		 * Trap flag (TF) has been set here because this fault
++		 * happened where the single stepping will be done.
++		 * So clear it by resetting the current kprobe:
++		 */
++		regs->flags &= ~X86_EFLAGS_TF;
++
++		/*
++		 * If the TF flag was set before the kprobe hit,
++		 * don't touch it:
++		 */
+ 		regs->flags |= kcb->kprobe_old_flags;
++
+ 		if (kcb->kprobe_status == KPROBE_REENTER)
+ 			restore_previous_kprobe(kcb);
+ 		else
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index faf52bac1416..c4217a23a98d 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2072,7 +2072,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
+ 	unsigned int dest;
+ 
+ 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP))
++		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
++		!kvm_vcpu_apicv_active(vcpu))
+ 		return;
+ 
+ 	do {
+@@ -2180,7 +2181,8 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+ 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ 
+ 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP))
++		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
++		!kvm_vcpu_apicv_active(vcpu))
+ 		return;
+ 
+ 	/* Set SN when the vCPU is preempted */
+@@ -6657,7 +6659,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
+ 
+ 	/* Checks for #GP/#SS exceptions. */
+ 	exn = false;
+-	if (is_protmode(vcpu)) {
++	if (is_long_mode(vcpu)) {
++		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
++		 * non-canonical form. This is the only check on the memory
++		 * destination for long mode!
++		 */
++		exn = is_noncanonical_address(*ret);
++	} else if (is_protmode(vcpu)) {
+ 		/* Protected mode: apply checks for segment validity in the
+ 		 * following order:
+ 		 * - segment type check (#GP(0) may be thrown)
+@@ -6674,17 +6682,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
+ 			 * execute-only code segment
+ 			 */
+ 			exn = ((s.type & 0xa) == 8);
+-	}
+-	if (exn) {
+-		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+-		return 1;
+-	}
+-	if (is_long_mode(vcpu)) {
+-		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
+-		 * non-canonical form. This is an only check for long mode.
+-		 */
+-		exn = is_noncanonical_address(*ret);
+-	} else if (is_protmode(vcpu)) {
++		if (exn) {
++			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
++			return 1;
++		}
+ 		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
+ 		 */
+ 		exn = (s.unusable != 0);
+@@ -10702,7 +10703,8 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
+ 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ 
+ 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP))
++		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
++		!kvm_vcpu_apicv_active(vcpu))
+ 		return 0;
+ 
+ 	vcpu->pre_pcpu = vcpu->cpu;
+@@ -10768,7 +10770,8 @@ static void vmx_post_block(struct kvm_vcpu *vcpu)
+ 	unsigned long flags;
+ 
+ 	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP))
++		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
++		!kvm_vcpu_apicv_active(vcpu))
+ 		return;
+ 
+ 	do {
+@@ -10821,7 +10824,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+ 	int idx, ret = -EINVAL;
+ 
+ 	if (!kvm_arch_has_assigned_device(kvm) ||
+-		!irq_remapping_cap(IRQ_POSTING_CAP))
++		!irq_remapping_cap(IRQ_POSTING_CAP) ||
++		!kvm_vcpu_apicv_active(kvm->vcpus[0]))
+ 		return 0;
+ 
+ 	idx = srcu_read_lock(&kvm->irq_srcu);
+diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
+index ead8dc0d084e..8ba426635b1b 100644
+--- a/crypto/rsa-pkcs1pad.c
++++ b/crypto/rsa-pkcs1pad.c
+@@ -102,10 +102,10 @@ struct pkcs1pad_inst_ctx {
+ };
+ 
+ struct pkcs1pad_request {
+-	struct akcipher_request child_req;
+-
+ 	struct scatterlist in_sg[3], out_sg[2];
+ 	uint8_t *in_buf, *out_buf;
++
++	struct akcipher_request child_req;
+ };
+ 
+ static int pkcs1pad_set_pub_key(struct crypto_akcipher *tfm, const void *key,
+diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
+index 961acc788f44..91a9e6af2ec4 100644
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -606,7 +606,7 @@ void ata_scsi_error(struct Scsi_Host *host)
+ 	ata_scsi_port_error_handler(host, ap);
+ 
+ 	/* finish or retry handled scmd's and clean up */
+-	WARN_ON(host->host_failed || !list_empty(&eh_work_q));
++	WARN_ON(!list_empty(&eh_work_q));
+ 
+ 	DPRINTK("EXIT\n");
+ }
+diff --git a/drivers/base/module.c b/drivers/base/module.c
+index db930d3ee312..2a215780eda2 100644
+--- a/drivers/base/module.c
++++ b/drivers/base/module.c
+@@ -24,10 +24,12 @@ static char *make_driver_name(struct device_driver *drv)
+ 
+ static void module_create_drivers_dir(struct module_kobject *mk)
+ {
+-	if (!mk || mk->drivers_dir)
+-		return;
++	static DEFINE_MUTEX(drivers_dir_mutex);
+ 
+-	mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
++	mutex_lock(&drivers_dir_mutex);
++	if (mk && !mk->drivers_dir)
++		mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
++	mutex_unlock(&drivers_dir_mutex);
+ }
+ 
+ void module_add_driver(struct module *mod, struct device_driver *drv)
+diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
+index 94fb407d8561..44b1bd6baa38 100644
+--- a/drivers/char/ipmi/ipmi_msghandler.c
++++ b/drivers/char/ipmi/ipmi_msghandler.c
+@@ -3820,6 +3820,7 @@ static void handle_new_recv_msgs(ipmi_smi_t intf)
+ 	while (!list_empty(&intf->waiting_rcv_msgs)) {
+ 		smi_msg = list_entry(intf->waiting_rcv_msgs.next,
+ 				     struct ipmi_smi_msg, link);
++		list_del(&smi_msg->link);
+ 		if (!run_to_completion)
+ 			spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
+ 					       flags);
+@@ -3829,11 +3830,14 @@ static void handle_new_recv_msgs(ipmi_smi_t intf)
+ 		if (rv > 0) {
+ 			/*
+ 			 * To preserve message order, quit if we
+-			 * can't handle a message.
++			 * can't handle a message.  Add the message
++			 * back at the head, this is safe because this
++			 * tasklet is the only thing that pulls the
++			 * messages.
+ 			 */
++			list_add(&smi_msg->link, &intf->waiting_rcv_msgs);
+ 			break;
+ 		} else {
+-			list_del(&smi_msg->link);
+ 			if (rv == 0)
+ 				/* Message handled */
+ 				ipmi_free_smi_msg(smi_msg);
+diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
+index 29c7c53d2845..92561c87f349 100644
+--- a/drivers/crypto/qat/qat_common/Makefile
++++ b/drivers/crypto/qat/qat_common/Makefile
+@@ -2,6 +2,7 @@ $(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \
+ 			     $(obj)/qat_rsapubkey-asn1.h
+ $(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \
+ 			      $(obj)/qat_rsaprivkey-asn1.h
++$(obj)/qat_asym_algs.o: $(obj)/qat_rsapubkey-asn1.h $(obj)/qat_rsaprivkey-asn1.h
+ 
+ clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h
+ clean-files += qat_rsaprivkey-asn1.c qat_rsaprivkey-asn1.h
+diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
+index 1472f48c8ac6..ff51b51d2fd4 100644
+--- a/drivers/edac/edac_mc.c
++++ b/drivers/edac/edac_mc.c
+@@ -565,7 +565,8 @@ void edac_mc_reset_delay_period(unsigned long value)
+ 	list_for_each(item, &mc_devices) {
+ 		mci = list_entry(item, struct mem_ctl_info, link);
+ 
+-		edac_mod_work(&mci->work, value);
++		if (mci->op_state == OP_RUNNING_POLL)
++			edac_mod_work(&mci->work, value);
+ 	}
+ 	mutex_unlock(&mem_ctls_mutex);
+ }
+diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
+index 8bf745d2da7e..b274fa2ffdec 100644
+--- a/drivers/edac/sb_edac.c
++++ b/drivers/edac/sb_edac.c
+@@ -239,8 +239,11 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
+ 	{ 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
+ };
+ 
+-#define RIR_RNK_TGT(reg)		GET_BITFIELD(reg, 16, 19)
+-#define RIR_OFFSET(reg)		GET_BITFIELD(reg,  2, 14)
++#define RIR_RNK_TGT(type, reg) (((type) == BROADWELL) ? \
++	GET_BITFIELD(reg, 20, 23) : GET_BITFIELD(reg, 16, 19))
++
++#define RIR_OFFSET(type, reg) (((type) == HASWELL || (type) == BROADWELL) ? \
++	GET_BITFIELD(reg,  2, 15) : GET_BITFIELD(reg,  2, 14))
+ 
+ /* Device 16, functions 2-7 */
+ 
+@@ -1916,14 +1919,14 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
+ 				pci_read_config_dword(pvt->pci_tad[i],
+ 						      rir_offset[j][k],
+ 						      &reg);
+-				tmp_mb = RIR_OFFSET(reg) << 6;
++				tmp_mb = RIR_OFFSET(pvt->info.type, reg) << 6;
+ 
+ 				gb = div_u64_rem(tmp_mb, 1024, &mb);
+ 				edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
+ 					 i, j, k,
+ 					 gb, (mb*1000)/1024,
+ 					 ((u64)tmp_mb) << 20L,
+-					 (u32)RIR_RNK_TGT(reg),
++					 (u32)RIR_RNK_TGT(pvt->info.type, reg),
+ 					 reg);
+ 			}
+ 		}
+@@ -2256,7 +2259,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
+ 	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
+ 			      rir_offset[n_rir][idx],
+ 			      &reg);
+-	*rank = RIR_RNK_TGT(reg);
++	*rank = RIR_RNK_TGT(pvt->info.type, reg);
+ 
+ 	edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
+ 		 n_rir,
+diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c
+index 8b3226dca1d9..caff46c0e214 100644
+--- a/drivers/extcon/extcon-palmas.c
++++ b/drivers/extcon/extcon-palmas.c
+@@ -360,6 +360,8 @@ static int palmas_usb_probe(struct platform_device *pdev)
+ 
+ 	palmas_enable_irq(palmas_usb);
+ 	/* perform initial detection */
++	if (palmas_usb->enable_gpio_vbus_detection)
++		palmas_vbus_irq_handler(palmas_usb->gpio_vbus_irq, palmas_usb);
+ 	palmas_gpio_id_detect(&palmas_usb->wq_detectid.work);
+ 	device_set_wakeup_capable(&pdev->dev, true);
+ 	return 0;
+diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
+index e85e7539cf5d..eb43ae4835c1 100644
+--- a/drivers/gpio/gpio-sch.c
++++ b/drivers/gpio/gpio-sch.c
+@@ -61,9 +61,8 @@ static unsigned sch_gpio_bit(struct sch_gpio *sch, unsigned gpio)
+ 	return gpio % 8;
+ }
+ 
+-static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg)
++static int sch_gpio_reg_get(struct sch_gpio *sch, unsigned gpio, unsigned reg)
+ {
+-	struct sch_gpio *sch = gpiochip_get_data(gc);
+ 	unsigned short offset, bit;
+ 	u8 reg_val;
+ 
+@@ -75,10 +74,9 @@ static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg)
+ 	return reg_val;
+ }
+ 
+-static void sch_gpio_reg_set(struct gpio_chip *gc, unsigned gpio, unsigned reg,
++static void sch_gpio_reg_set(struct sch_gpio *sch, unsigned gpio, unsigned reg,
+ 			     int val)
+ {
+-	struct sch_gpio *sch = gpiochip_get_data(gc);
+ 	unsigned short offset, bit;
+ 	u8 reg_val;
+ 
+@@ -98,14 +96,15 @@ static int sch_gpio_direction_in(struct gpio_chip *gc, unsigned gpio_num)
+ 	struct sch_gpio *sch = gpiochip_get_data(gc);
+ 
+ 	spin_lock(&sch->lock);
+-	sch_gpio_reg_set(gc, gpio_num, GIO, 1);
++	sch_gpio_reg_set(sch, gpio_num, GIO, 1);
+ 	spin_unlock(&sch->lock);
+ 	return 0;
+ }
+ 
+ static int sch_gpio_get(struct gpio_chip *gc, unsigned gpio_num)
+ {
+-	return sch_gpio_reg_get(gc, gpio_num, GLV);
++	struct sch_gpio *sch = gpiochip_get_data(gc);
++	return sch_gpio_reg_get(sch, gpio_num, GLV);
+ }
+ 
+ static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val)
+@@ -113,7 +112,7 @@ static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val)
+ 	struct sch_gpio *sch = gpiochip_get_data(gc);
+ 
+ 	spin_lock(&sch->lock);
+-	sch_gpio_reg_set(gc, gpio_num, GLV, val);
++	sch_gpio_reg_set(sch, gpio_num, GLV, val);
+ 	spin_unlock(&sch->lock);
+ }
+ 
+@@ -123,7 +122,7 @@ static int sch_gpio_direction_out(struct gpio_chip *gc, unsigned gpio_num,
+ 	struct sch_gpio *sch = gpiochip_get_data(gc);
+ 
+ 	spin_lock(&sch->lock);
+-	sch_gpio_reg_set(gc, gpio_num, GIO, 0);
++	sch_gpio_reg_set(sch, gpio_num, GIO, 0);
+ 	spin_unlock(&sch->lock);
+ 
+ 	/*
+@@ -182,13 +181,13 @@ static int sch_gpio_probe(struct platform_device *pdev)
+ 		 * GPIO7 is configured by the CMC as SLPIOVR
+ 		 * Enable GPIO[9:8] core powered gpios explicitly
+ 		 */
+-		sch_gpio_reg_set(&sch->chip, 8, GEN, 1);
+-		sch_gpio_reg_set(&sch->chip, 9, GEN, 1);
++		sch_gpio_reg_set(sch, 8, GEN, 1);
++		sch_gpio_reg_set(sch, 9, GEN, 1);
+ 		/*
+ 		 * SUS_GPIO[2:0] enabled by default
+ 		 * Enable SUS_GPIO3 resume powered gpio explicitly
+ 		 */
+-		sch_gpio_reg_set(&sch->chip, 13, GEN, 1);
++		sch_gpio_reg_set(sch, 13, GEN, 1);
+ 		break;
+ 
+ 	case PCI_DEVICE_ID_INTEL_ITC_LPC:
+diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c
+index 3a5c7011ad3b..8b830996fe02 100644
+--- a/drivers/gpio/gpiolib-legacy.c
++++ b/drivers/gpio/gpiolib-legacy.c
+@@ -28,6 +28,10 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
+ 	if (!desc && gpio_is_valid(gpio))
+ 		return -EPROBE_DEFER;
+ 
++	err = gpiod_request(desc, label);
++	if (err)
++		return err;
++
+ 	if (flags & GPIOF_OPEN_DRAIN)
+ 		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+ 
+@@ -37,10 +41,6 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
+ 	if (flags & GPIOF_ACTIVE_LOW)
+ 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+ 
+-	err = gpiod_request(desc, label);
+-	if (err)
+-		return err;
+-
+ 	if (flags & GPIOF_DIR_IN)
+ 		err = gpiod_direction_input(desc);
+ 	else
+diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
+index cf3e71243d6d..996a73390bba 100644
+--- a/drivers/gpio/gpiolib.c
++++ b/drivers/gpio/gpiolib.c
+@@ -1324,14 +1324,6 @@ static int __gpiod_request(struct gpio_desc *desc, const char *label)
+ 		spin_lock_irqsave(&gpio_lock, flags);
+ 	}
+ done:
+-	if (status < 0) {
+-		/* Clear flags that might have been set by the caller before
+-		 * requesting the GPIO.
+-		 */
+-		clear_bit(FLAG_ACTIVE_LOW, &desc->flags);
+-		clear_bit(FLAG_OPEN_DRAIN, &desc->flags);
+-		clear_bit(FLAG_OPEN_SOURCE, &desc->flags);
+-	}
+ 	spin_unlock_irqrestore(&gpio_lock, flags);
+ 	return status;
+ }
+@@ -1345,8 +1337,12 @@ done:
+ #define VALIDATE_DESC(desc) do { \
+ 	if (!desc) \
+ 		return 0; \
++	if (IS_ERR(desc)) {						\
++		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
++		return PTR_ERR(desc); \
++	} \
+ 	if (!desc->gdev) { \
+-		pr_warn("%s: invalid GPIO\n", __func__); \
++		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
+ 		return -EINVAL; \
+ 	} \
+ 	if ( !desc->gdev->chip ) { \
+@@ -1358,8 +1354,12 @@ done:
+ #define VALIDATE_DESC_VOID(desc) do { \
+ 	if (!desc) \
+ 		return; \
++	if (IS_ERR(desc)) {						\
++		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
++		return; \
++	} \
+ 	if (!desc->gdev) { \
+-		pr_warn("%s: invalid GPIO\n", __func__); \
++		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
+ 		return; \
+ 	} \
+ 	if (!desc->gdev->chip) { \
+@@ -2011,7 +2011,7 @@ int gpiod_to_irq(const struct gpio_desc *desc)
+ 	 * requires this function to not return zero on an invalid descriptor
+ 	 * but rather a negative error number.
+ 	 */
+-	if (!desc || !desc->gdev || !desc->gdev->chip)
++	if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip)
+ 		return -EINVAL;
+ 
+ 	chip = desc->gdev->chip;
+@@ -2507,28 +2507,13 @@ struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
+ }
+ EXPORT_SYMBOL_GPL(gpiod_get_optional);
+ 
+-/**
+- * gpiod_parse_flags - helper function to parse GPIO lookup flags
+- * @desc:	gpio to be setup
+- * @lflags:	gpio_lookup_flags - returned from of_find_gpio() or
+- *		of_get_gpio_hog()
+- *
+- * Set the GPIO descriptor flags based on the given GPIO lookup flags.
+- */
+-static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags)
+-{
+-	if (lflags & GPIO_ACTIVE_LOW)
+-		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+-	if (lflags & GPIO_OPEN_DRAIN)
+-		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+-	if (lflags & GPIO_OPEN_SOURCE)
+-		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+-}
+ 
+ /**
+  * gpiod_configure_flags - helper function to configure a given GPIO
+  * @desc:	gpio whose value will be assigned
+  * @con_id:	function within the GPIO consumer
++ * @lflags:	gpio_lookup_flags - returned from of_find_gpio() or
++ *		of_get_gpio_hog()
+  * @dflags:	gpiod_flags - optional GPIO initialization flags
+  *
+  * Return 0 on success, -ENOENT if no GPIO has been assigned to the
+@@ -2536,10 +2521,17 @@ static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags)
+  * occurred while trying to acquire the GPIO.
+  */
+ static int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
+-				 enum gpiod_flags dflags)
++		unsigned long lflags, enum gpiod_flags dflags)
+ {
+ 	int status;
+ 
++	if (lflags & GPIO_ACTIVE_LOW)
++		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
++	if (lflags & GPIO_OPEN_DRAIN)
++		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
++	if (lflags & GPIO_OPEN_SOURCE)
++		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
++
+ 	/* No particular flag request, return here... */
+ 	if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
+ 		pr_debug("no flags found for %s\n", con_id);
+@@ -2606,13 +2598,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+ 		return desc;
+ 	}
+ 
+-	gpiod_parse_flags(desc, lookupflags);
+-
+ 	status = gpiod_request(desc, con_id);
+ 	if (status < 0)
+ 		return ERR_PTR(status);
+ 
+-	status = gpiod_configure_flags(desc, con_id, flags);
++	status = gpiod_configure_flags(desc, con_id, lookupflags, flags);
+ 	if (status < 0) {
+ 		dev_dbg(dev, "setup of GPIO %s failed\n", con_id);
+ 		gpiod_put(desc);
+@@ -2668,6 +2658,10 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
+ 	if (IS_ERR(desc))
+ 		return desc;
+ 
++	ret = gpiod_request(desc, NULL);
++	if (ret)
++		return ERR_PTR(ret);
++
+ 	if (active_low)
+ 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+ 
+@@ -2678,10 +2672,6 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
+ 			set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+ 	}
+ 
+-	ret = gpiod_request(desc, NULL);
+-	if (ret)
+-		return ERR_PTR(ret);
+-
+ 	return desc;
+ }
+ EXPORT_SYMBOL_GPL(fwnode_get_named_gpiod);
+@@ -2734,8 +2724,6 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
+ 	chip = gpiod_to_chip(desc);
+ 	hwnum = gpio_chip_hwgpio(desc);
+ 
+-	gpiod_parse_flags(desc, lflags);
+-
+ 	local_desc = gpiochip_request_own_desc(chip, hwnum, name);
+ 	if (IS_ERR(local_desc)) {
+ 		pr_err("requesting hog GPIO %s (chip %s, offset %d) failed\n",
+@@ -2743,7 +2731,7 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
+ 		return PTR_ERR(local_desc);
+ 	}
+ 
+-	status = gpiod_configure_flags(desc, name, dflags);
++	status = gpiod_configure_flags(desc, name, lflags, dflags);
+ 	if (status < 0) {
+ 		pr_err("setup of hog GPIO %s (chip %s, offset %d) failed\n",
+ 		       name, chip->label, hwnum);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+index 6043dc7c3a94..3e21732f22e3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+@@ -880,7 +880,7 @@ static int amdgpu_cgs_acpi_eval_object(void *cgs_device,
+ 	struct cgs_acpi_method_argument *argument = NULL;
+ 	uint32_t i, count;
+ 	acpi_status status;
+-	int result;
++	int result = 0;
+ 	uint32_t func_no = 0xFFFFFFFF;
+ 
+ 	handle = ACPI_HANDLE(&adev->pdev->dev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index b04337de65d1..d78739d2952d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -448,7 +448,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
+ 		}
+ 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+-		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
++		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
++			adev->gfx.config.max_shader_engines;
+ 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
+ 		dev_info._pad = 0;
+ 		dev_info.ids_flags = 0;
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+index bb8709066fd8..d2216f83bd7a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+@@ -5074,7 +5074,7 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
+ 	case 2:
+ 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ 			ring = &adev->gfx.compute_ring[i];
+-			if ((ring->me == me_id) & (ring->pipe == pipe_id))
++			if ((ring->me == me_id) && (ring->pipe == pipe_id))
+ 				amdgpu_fence_process(ring);
+ 		}
+ 		break;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index ac005796b71c..7708d90b9da9 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -242,13 +242,19 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
+ 	pqm_uninit(&p->pqm);
+ 
+ 	/* Iterate over all process device data structure and check
+-	 * if we should reset all wavefronts */
+-	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
++	 * if we should delete debug managers and reset all wavefronts
++	 */
++	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
++		if ((pdd->dev->dbgmgr) &&
++				(pdd->dev->dbgmgr->pasid == p->pasid))
++			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
++
+ 		if (pdd->reset_wavefronts) {
+ 			pr_warn("amdkfd: Resetting all wave fronts\n");
+ 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
+ 			pdd->reset_wavefronts = false;
+ 		}
++	}
+ 
+ 	mutex_unlock(&p->mutex);
+ 
+@@ -404,42 +410,52 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
+ 
+ 	idx = srcu_read_lock(&kfd_processes_srcu);
+ 
++	/*
++	 * Look for the process that matches the pasid. If there is no such
++	 * process, we either released it in amdkfd's own notifier, or there
++	 * is a bug. Unfortunately, there is no way to tell...
++	 */
+ 	hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes)
+-		if (p->pasid == pasid)
+-			break;
++		if (p->pasid == pasid) {
+ 
+-	srcu_read_unlock(&kfd_processes_srcu, idx);
++			srcu_read_unlock(&kfd_processes_srcu, idx);
+ 
+-	BUG_ON(p->pasid != pasid);
++			pr_debug("Unbinding process %d from IOMMU\n", pasid);
+ 
+-	mutex_lock(&p->mutex);
++			mutex_lock(&p->mutex);
+ 
+-	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+-		kfd_dbgmgr_destroy(dev->dbgmgr);
++			if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
++				kfd_dbgmgr_destroy(dev->dbgmgr);
+ 
+-	pqm_uninit(&p->pqm);
++			pqm_uninit(&p->pqm);
+ 
+-	pdd = kfd_get_process_device_data(dev, p);
++			pdd = kfd_get_process_device_data(dev, p);
+ 
+-	if (!pdd) {
+-		mutex_unlock(&p->mutex);
+-		return;
+-	}
++			if (!pdd) {
++				mutex_unlock(&p->mutex);
++				return;
++			}
+ 
+-	if (pdd->reset_wavefronts) {
+-		dbgdev_wave_reset_wavefronts(pdd->dev, p);
+-		pdd->reset_wavefronts = false;
+-	}
++			if (pdd->reset_wavefronts) {
++				dbgdev_wave_reset_wavefronts(pdd->dev, p);
++				pdd->reset_wavefronts = false;
++			}
+ 
+-	/*
+-	 * Just mark pdd as unbound, because we still need it to call
+-	 * amd_iommu_unbind_pasid() in when the process exits.
+-	 * We don't call amd_iommu_unbind_pasid() here
+-	 * because the IOMMU called us.
+-	 */
+-	pdd->bound = false;
++			/*
++			 * Just mark pdd as unbound, because we still need it
++			 * to call amd_iommu_unbind_pasid() in when the
++			 * process exits.
++			 * We don't call amd_iommu_unbind_pasid() here
++			 * because the IOMMU called us.
++			 */
++			pdd->bound = false;
+ 
+-	mutex_unlock(&p->mutex);
++			mutex_unlock(&p->mutex);
++
++			return;
++		}
++
++	srcu_read_unlock(&kfd_processes_srcu, idx);
+ }
+ 
+ struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
+diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+index fa208ada6892..efb77eda7508 100644
+--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
++++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+@@ -306,10 +306,14 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
+ {
+ 	PHM_FUNC_CHECK(hwmgr);
+ 
+-	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
++	if (display_config == NULL)
+ 		return -EINVAL;
+ 
+ 	hwmgr->display_config = *display_config;
++
++	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
++		return -EINVAL;
++
+ 	/* to do pass other display configuration in furture */
+ 
+ 	if (hwmgr->hwmgr_func->store_cc6_data)
+diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+index 7b2d5000292d..7cce483b0859 100644
+--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
++++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+@@ -21,6 +21,20 @@ bool acpi_atcs_functions_supported(void *device, uint32_t index)
+ 	return result == 0 ? (output_buf.function_bits & (1 << (index - 1))) != 0 : false;
+ }
+ 
++bool acpi_atcs_notify_pcie_device_ready(void *device)
++{
++	int32_t temp_buffer = 1;
++
++	return cgs_call_acpi_method(device, CGS_ACPI_METHOD_ATCS,
++				ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION,
++						&temp_buffer,
++						NULL,
++						0,
++						sizeof(temp_buffer),
++						0);
++}
++
++
+ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
+ {
+ 	struct atcs_pref_req_input atcs_input;
+@@ -29,7 +43,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
+ 	int result;
+ 	struct cgs_system_info info = {0};
+ 
+-	if (!acpi_atcs_functions_supported(device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST))
++	if( 0 != acpi_atcs_notify_pcie_device_ready(device))
+ 		return -EINVAL;
+ 
+ 	info.size = sizeof(struct cgs_system_info);
+@@ -54,7 +68,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
+ 						ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST,
+ 						&atcs_input,
+ 						&atcs_output,
+-						0,
++						1,
+ 						sizeof(atcs_input),
+ 						sizeof(atcs_output));
+ 		if (result != 0)
+diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+index 0d5d8372953e..aae2e8ec0542 100644
+--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
++++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+@@ -1298,7 +1298,7 @@ static int tonga_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
+ 			table->Smio[count] |=
+ 				data->mvdd_voltage_table.entries[count].smio_low;
+ 		}
+-		table->SmioMask2 = data->vddci_voltage_table.mask_low;
++		table->SmioMask2 = data->mvdd_voltage_table.mask_low;
+ 
+ 		CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount);
+ 	}
+diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+index b156481b50e8..17766e8da0ca 100644
+--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
++++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+@@ -299,7 +299,7 @@ static int init_dpm_2_parameters(
+ 			(((unsigned long)powerplay_table) + le16_to_cpu(powerplay_table->usPPMTableOffset));
+ 
+ 		if (0 != powerplay_table->usPPMTableOffset) {
+-			if (1 == get_platform_power_management_table(hwmgr, atom_ppm_table)) {
++			if (get_platform_power_management_table(hwmgr, atom_ppm_table) == 0) {
+ 				phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+ 					PHM_PlatformCaps_EnablePlatformPowerManagement);
+ 			}
+diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
+index 3bd5e69b9045..3df5de2cdab0 100644
+--- a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
++++ b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
+@@ -26,3 +26,4 @@ extern bool acpi_atcs_functions_supported(void *device,
+ extern int acpi_pcie_perf_request(void *device,
+ 						uint8_t perf_req,
+ 						bool advertise);
++extern bool acpi_atcs_notify_pcie_device_ready(void *device);
+diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+index d65dcaee3832..6d9c0f5bcba6 100644
+--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
++++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+@@ -335,6 +335,8 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
+ 
+ 		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff,
+ 					     factor_reg);
++	} else {
++		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0);
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
+index d307d9627887..080a09014580 100644
+--- a/drivers/gpu/drm/drm_atomic.c
++++ b/drivers/gpu/drm/drm_atomic.c
+@@ -354,6 +354,8 @@ int drm_atomic_set_mode_prop_for_crtc(struct drm_crtc_state *state,
+ 	drm_property_unreference_blob(state->mode_blob);
+ 	state->mode_blob = NULL;
+ 
++	memset(&state->mode, 0, sizeof(state->mode));
++
+ 	if (blob) {
+ 		if (blob->length != sizeof(struct drm_mode_modeinfo) ||
+ 		    drm_mode_convert_umode(&state->mode,
+@@ -366,7 +368,6 @@ int drm_atomic_set_mode_prop_for_crtc(struct drm_crtc_state *state,
+ 		DRM_DEBUG_ATOMIC("Set [MODE:%s] for CRTC state %p\n",
+ 				 state->mode.name, state);
+ 	} else {
+-		memset(&state->mode, 0, sizeof(state->mode));
+ 		state->enable = false;
+ 		DRM_DEBUG_ATOMIC("Set [NOMODE] for CRTC state %p\n",
+ 				 state);
+@@ -1287,14 +1288,39 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes);
+  */
+ void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
+ {
++	struct drm_device *dev = state->dev;
++	unsigned crtc_mask = 0;
++	struct drm_crtc *crtc;
+ 	int ret;
++	bool global = false;
++
++	drm_for_each_crtc(crtc, dev) {
++		if (crtc->acquire_ctx != state->acquire_ctx)
++			continue;
++
++		crtc_mask |= drm_crtc_mask(crtc);
++		crtc->acquire_ctx = NULL;
++	}
++
++	if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) {
++		global = true;
++
++		dev->mode_config.acquire_ctx = NULL;
++	}
+ 
+ retry:
+ 	drm_modeset_backoff(state->acquire_ctx);
+ 
+-	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
++	ret = drm_modeset_lock_all_ctx(dev, state->acquire_ctx);
+ 	if (ret)
+ 		goto retry;
++
++	drm_for_each_crtc(crtc, dev)
++		if (drm_crtc_mask(crtc) & crtc_mask)
++			crtc->acquire_ctx = state->acquire_ctx;
++
++	if (global)
++		dev->mode_config.acquire_ctx = state->acquire_ctx;
+ }
+ EXPORT_SYMBOL(drm_atomic_legacy_backoff);
+ 
+diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
+index f30de8053545..691a1b939c1c 100644
+--- a/drivers/gpu/drm/drm_crtc.c
++++ b/drivers/gpu/drm/drm_crtc.c
+@@ -2800,8 +2800,6 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
+ 			goto out;
+ 		}
+ 
+-		drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+-
+ 		/*
+ 		 * Check whether the primary plane supports the fb pixel format.
+ 		 * Drivers not implementing the universal planes API use a
+diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
+index 71ea0521ea96..ccfe7e72d8fc 100644
+--- a/drivers/gpu/drm/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/drm_dp_mst_topology.c
+@@ -2908,11 +2908,9 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
+ 		drm_dp_port_teardown_pdt(port, port->pdt);
+ 
+ 		if (!port->input && port->vcpi.vcpi > 0) {
+-			if (mgr->mst_state) {
+-				drm_dp_mst_reset_vcpi_slots(mgr, port);
+-				drm_dp_update_payload_part1(mgr);
+-				drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
+-			}
++			drm_dp_mst_reset_vcpi_slots(mgr, port);
++			drm_dp_update_payload_part1(mgr);
++			drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
+ 		}
+ 
+ 		kref_put(&port->kref, drm_dp_free_mst_port);
+diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
+index bb88e3df9257..e619b00c7343 100644
+--- a/drivers/gpu/drm/drm_fb_cma_helper.c
++++ b/drivers/gpu/drm/drm_fb_cma_helper.c
+@@ -301,7 +301,7 @@ static int drm_fbdev_cma_create(struct drm_fb_helper *helper,
+ err_fb_info_destroy:
+ 	drm_fb_helper_release_fbi(helper);
+ err_gem_free_object:
+-	dev->driver->gem_free_object(&obj->base);
++	drm_gem_object_unreference_unlocked(&obj->base);
+ 	return ret;
+ }
+ 
+diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
+index 1f500a1b9969..d988ca0b597a 100644
+--- a/drivers/gpu/drm/drm_gem_cma_helper.c
++++ b/drivers/gpu/drm/drm_gem_cma_helper.c
+@@ -121,7 +121,7 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
+ 	return cma_obj;
+ 
+ error:
+-	drm->driver->gem_free_object(&cma_obj->base);
++	drm_gem_object_unreference_unlocked(&cma_obj->base);
+ 	return ERR_PTR(ret);
+ }
+ EXPORT_SYMBOL_GPL(drm_gem_cma_create);
+@@ -162,18 +162,12 @@ drm_gem_cma_create_with_handle(struct drm_file *file_priv,
+ 	 * and handle has the id what user can see.
+ 	 */
+ 	ret = drm_gem_handle_create(file_priv, gem_obj, handle);
+-	if (ret)
+-		goto err_handle_create;
+-
+ 	/* drop reference from allocate - handle holds it now. */
+ 	drm_gem_object_unreference_unlocked(gem_obj);
++	if (ret)
++		return ERR_PTR(ret);
+ 
+ 	return cma_obj;
+-
+-err_handle_create:
+-	drm->driver->gem_free_object(gem_obj);
+-
+-	return ERR_PTR(ret);
+ }
+ 
+ /**
+diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
+index f7448a5e95a9..5d0fc2644352 100644
+--- a/drivers/gpu/drm/drm_modes.c
++++ b/drivers/gpu/drm/drm_modes.c
+@@ -1518,6 +1518,8 @@ int drm_mode_convert_umode(struct drm_display_mode *out,
+ 	if (out->status != MODE_OK)
+ 		goto out;
+ 
++	drm_mode_set_crtcinfo(out, CRTC_INTERLACE_HALVE_V);
++
+ 	ret = 0;
+ 
+ out:
+diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+index e8d9337a66d8..77886f1182f1 100644
+--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
++++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+@@ -40,9 +40,10 @@ static const struct regmap_config fsl_dcu_regmap_config = {
+ 	.reg_bits = 32,
+ 	.reg_stride = 4,
+ 	.val_bits = 32,
+-	.cache_type = REGCACHE_RBTREE,
++	.cache_type = REGCACHE_FLAT,
+ 
+ 	.volatile_reg = fsl_dcu_drm_is_volatile_reg,
++	.max_register = 0x11fc,
+ };
+ 
+ static int fsl_dcu_drm_irq_init(struct drm_device *dev)
+diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
+index d3c473ffb90a..3af40616bf8b 100644
+--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
++++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
+@@ -39,7 +39,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
+ 	if (!mutex_is_locked(mutex))
+ 		return false;
+ 
+-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
++#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
+ 	return mutex->owner == task;
+ #else
+ 	/* Since UP may be pre-empted, we cannot assume that we own the lock */
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 7741efbd5e57..e5db9e1f623f 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -8229,12 +8229,14 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
+ {
+ 	struct drm_i915_private *dev_priv = dev->dev_private;
+ 	struct intel_encoder *encoder;
++	int i;
+ 	u32 val, final;
+ 	bool has_lvds = false;
+ 	bool has_cpu_edp = false;
+ 	bool has_panel = false;
+ 	bool has_ck505 = false;
+ 	bool can_ssc = false;
++	bool using_ssc_source = false;
+ 
+ 	/* We need to take the global config into account */
+ 	for_each_intel_encoder(dev, encoder) {
+@@ -8261,8 +8263,22 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
+ 		can_ssc = true;
+ 	}
+ 
+-	DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d\n",
+-		      has_panel, has_lvds, has_ck505);
++	/* Check if any DPLLs are using the SSC source */
++	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
++		u32 temp = I915_READ(PCH_DPLL(i));
++
++		if (!(temp & DPLL_VCO_ENABLE))
++			continue;
++
++		if ((temp & PLL_REF_INPUT_MASK) ==
++		    PLLB_REF_INPUT_SPREADSPECTRUMIN) {
++			using_ssc_source = true;
++			break;
++		}
++	}
++
++	DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d using_ssc_source %d\n",
++		      has_panel, has_lvds, has_ck505, using_ssc_source);
+ 
+ 	/* Ironlake: try to setup display ref clock before DPLL
+ 	 * enabling. This is only under driver's control after
+@@ -8299,9 +8315,9 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
+ 				final |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
+ 		} else
+ 			final |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
+-	} else {
+-		final |= DREF_SSC_SOURCE_DISABLE;
+-		final |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
++	} else if (using_ssc_source) {
++		final |= DREF_SSC_SOURCE_ENABLE;
++		final |= DREF_SSC1_ENABLE;
+ 	}
+ 
+ 	if (final == val)
+@@ -8347,7 +8363,7 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
+ 		POSTING_READ(PCH_DREF_CONTROL);
+ 		udelay(200);
+ 	} else {
+-		DRM_DEBUG_KMS("Disabling SSC entirely\n");
++		DRM_DEBUG_KMS("Disabling CPU source output\n");
+ 
+ 		val &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
+ 
+@@ -8358,16 +8374,20 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
+ 		POSTING_READ(PCH_DREF_CONTROL);
+ 		udelay(200);
+ 
+-		/* Turn off the SSC source */
+-		val &= ~DREF_SSC_SOURCE_MASK;
+-		val |= DREF_SSC_SOURCE_DISABLE;
++		if (!using_ssc_source) {
++			DRM_DEBUG_KMS("Disabling SSC source\n");
+ 
+-		/* Turn off SSC1 */
+-		val &= ~DREF_SSC1_ENABLE;
++			/* Turn off the SSC source */
++			val &= ~DREF_SSC_SOURCE_MASK;
++			val |= DREF_SSC_SOURCE_DISABLE;
+ 
+-		I915_WRITE(PCH_DREF_CONTROL, val);
+-		POSTING_READ(PCH_DREF_CONTROL);
+-		udelay(200);
++			/* Turn off SSC1 */
++			val &= ~DREF_SSC1_ENABLE;
++
++			I915_WRITE(PCH_DREF_CONTROL, val);
++			POSTING_READ(PCH_DREF_CONTROL);
++			udelay(200);
++		}
+ 	}
+ 
+ 	BUG_ON(val != final);
+diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
+index 412a34c39522..69054ef978fa 100644
+--- a/drivers/gpu/drm/i915/intel_dp.c
++++ b/drivers/gpu/drm/i915/intel_dp.c
+@@ -4942,13 +4942,15 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp)
+ 
+ void intel_dp_encoder_reset(struct drm_encoder *encoder)
+ {
+-	struct intel_dp *intel_dp;
++	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
++	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
++
++	if (!HAS_DDI(dev_priv))
++		intel_dp->DP = I915_READ(intel_dp->output_reg);
+ 
+ 	if (to_intel_encoder(encoder)->type != INTEL_OUTPUT_EDP)
+ 		return;
+ 
+-	intel_dp = enc_to_intel_dp(encoder);
+-
+ 	pps_lock(intel_dp);
+ 
+ 	/*
+@@ -5020,9 +5022,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
+ 	intel_display_power_get(dev_priv, power_domain);
+ 
+ 	if (long_hpd) {
+-		/* indicate that we need to restart link training */
+-		intel_dp->train_set_valid = false;
+-
+ 		if (!intel_digital_port_connected(dev_priv, intel_dig_port))
+ 			goto mst_fail;
+ 
+diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
+index 0b8eefc2acc5..926a1e6ea2f6 100644
+--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
++++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
+@@ -85,8 +85,7 @@ static bool
+ intel_dp_reset_link_train(struct intel_dp *intel_dp,
+ 			uint8_t dp_train_pat)
+ {
+-	if (!intel_dp->train_set_valid)
+-		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
++	memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
+ 	intel_dp_set_signal_levels(intel_dp);
+ 	return intel_dp_set_link_train(intel_dp, dp_train_pat);
+ }
+@@ -161,22 +160,6 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
+ 			break;
+ 		}
+ 
+-		/*
+-		 * if we used previously trained voltage and pre-emphasis values
+-		 * and we don't get clock recovery, reset link training values
+-		 */
+-		if (intel_dp->train_set_valid) {
+-			DRM_DEBUG_KMS("clock recovery not ok, reset");
+-			/* clear the flag as we are not reusing train set */
+-			intel_dp->train_set_valid = false;
+-			if (!intel_dp_reset_link_train(intel_dp,
+-						       DP_TRAINING_PATTERN_1 |
+-						       DP_LINK_SCRAMBLING_DISABLE)) {
+-				DRM_ERROR("failed to enable link training\n");
+-				return;
+-			}
+-			continue;
+-		}
+ 
+ 		/* Check to see if we've tried the max voltage */
+ 		for (i = 0; i < intel_dp->lane_count; i++)
+@@ -284,7 +267,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
+ 		/* Make sure clock is still ok */
+ 		if (!drm_dp_clock_recovery_ok(link_status,
+ 					      intel_dp->lane_count)) {
+-			intel_dp->train_set_valid = false;
+ 			intel_dp_link_training_clock_recovery(intel_dp);
+ 			intel_dp_set_link_train(intel_dp,
+ 						training_pattern |
+@@ -301,7 +283,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
+ 
+ 		/* Try 5 times, then try clock recovery if that fails */
+ 		if (tries > 5) {
+-			intel_dp->train_set_valid = false;
+ 			intel_dp_link_training_clock_recovery(intel_dp);
+ 			intel_dp_set_link_train(intel_dp,
+ 						training_pattern |
+@@ -322,10 +303,8 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
+ 
+ 	intel_dp_set_idle_link_train(intel_dp);
+ 
+-	if (channel_eq) {
+-		intel_dp->train_set_valid = true;
++	if (channel_eq)
+ 		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
+-	}
+ }
+ 
+ void intel_dp_stop_link_train(struct intel_dp *intel_dp)
+diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
+index 3a30b37d6885..8dd2cc56451f 100644
+--- a/drivers/gpu/drm/i915/intel_drv.h
++++ b/drivers/gpu/drm/i915/intel_drv.h
+@@ -811,8 +811,6 @@ struct intel_dp {
+ 	/* This is called before a link training is starterd */
+ 	void (*prepare_link_retrain)(struct intel_dp *intel_dp);
+ 
+-	bool train_set_valid;
+-
+ 	/* Displayport compliance testing */
+ 	unsigned long compliance_test_type;
+ 	unsigned long compliance_test_data;
+diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
+index 0f0492f4a357..28f4407722a1 100644
+--- a/drivers/gpu/drm/i915/intel_fbc.c
++++ b/drivers/gpu/drm/i915/intel_fbc.c
+@@ -823,8 +823,7 @@ static bool intel_fbc_can_choose(struct intel_crtc *crtc)
+ {
+ 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+ 	struct intel_fbc *fbc = &dev_priv->fbc;
+-	bool enable_by_default = IS_HASWELL(dev_priv) ||
+-				 IS_BROADWELL(dev_priv);
++	bool enable_by_default = IS_BROADWELL(dev_priv);
+ 
+ 	if (intel_vgpu_active(dev_priv->dev)) {
+ 		fbc->no_fbc_reason = "VGPU is active";
+diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
+index 14e64e08909e..d347dca17267 100644
+--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
++++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
+@@ -182,7 +182,7 @@ static int mga_g200se_set_plls(struct mga_device *mdev, long clock)
+ 			}
+ 		}
+ 
+-		fvv = pllreffreq * testn / testm;
++		fvv = pllreffreq * (n + 1) / (m + 1);
+ 		fvv = (fvv - 800000) / 50000;
+ 
+ 		if (fvv > 15)
+@@ -202,6 +202,14 @@ static int mga_g200se_set_plls(struct mga_device *mdev, long clock)
+ 	WREG_DAC(MGA1064_PIX_PLLC_M, m);
+ 	WREG_DAC(MGA1064_PIX_PLLC_N, n);
+ 	WREG_DAC(MGA1064_PIX_PLLC_P, p);
++
++	if (mdev->unique_rev_id >= 0x04) {
++		WREG_DAC(0x1a, 0x09);
++		msleep(20);
++		WREG_DAC(0x1a, 0x01);
++
++	}
++
+ 	return 0;
+ }
+ 
+diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
+index db10c11f0595..c5a6ebd5a478 100644
+--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
++++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
+@@ -25,7 +25,8 @@ u16 nvbios_outp_match(struct nvkm_bios *, u16 type, u16 mask,
+ 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_outp *);
+ 
+ struct nvbios_ocfg {
+-	u16 match;
++	u8  proto;
++	u8  flags;
+ 	u16 clkcmp[2];
+ };
+ 
+@@ -33,7 +34,7 @@ u16 nvbios_ocfg_entry(struct nvkm_bios *, u16 outp, u8 idx,
+ 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+ u16 nvbios_ocfg_parse(struct nvkm_bios *, u16 outp, u8 idx,
+ 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *);
+-u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u16 type,
++u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u8 proto, u8 flags,
+ 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *);
+ u16 nvbios_oclk_match(struct nvkm_bios *, u16 cmp, u32 khz);
+ #endif
+diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+index 59f27e774acb..e40a1b07a014 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+@@ -557,6 +557,8 @@ nouveau_fbcon_init(struct drm_device *dev)
+ 	if (ret)
+ 		goto fini;
+ 
++	if (fbcon->helper.fbdev)
++		fbcon->helper.fbdev->pixmap.buf_align = 4;
+ 	return 0;
+ 
+ fini:
+diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
+index 789dc2993b0d..8f715feadf56 100644
+--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
+@@ -82,7 +82,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 	uint32_t fg;
+ 	uint32_t bg;
+ 	uint32_t dsize;
+-	uint32_t width;
+ 	uint32_t *data = (uint32_t *)image->data;
+ 	int ret;
+ 
+@@ -93,9 +92,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 	if (ret)
+ 		return ret;
+ 
+-	width = ALIGN(image->width, 8);
+-	dsize = ALIGN(width * image->height, 32) >> 5;
+-
+ 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
+ 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+ 		fg = ((uint32_t *) info->pseudo_palette)[image->fg_color];
+@@ -111,10 +107,11 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 			 ((image->dx + image->width) & 0xffff));
+ 	OUT_RING(chan, bg);
+ 	OUT_RING(chan, fg);
+-	OUT_RING(chan, (image->height << 16) | width);
++	OUT_RING(chan, (image->height << 16) | image->width);
+ 	OUT_RING(chan, (image->height << 16) | image->width);
+ 	OUT_RING(chan, (image->dy << 16) | (image->dx & 0xffff));
+ 
++	dsize = ALIGN(image->width * image->height, 32) >> 5;
+ 	while (dsize) {
+ 		int iter_len = dsize > 128 ? 128 : dsize;
+ 
+diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
+index e05499d6ed83..a4e259a00430 100644
+--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
+@@ -95,7 +95,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 	struct nouveau_fbdev *nfbdev = info->par;
+ 	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+ 	struct nouveau_channel *chan = drm->channel;
+-	uint32_t width, dwords, *data = (uint32_t *)image->data;
++	uint32_t dwords, *data = (uint32_t *)image->data;
+ 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
+ 	uint32_t *palette = info->pseudo_palette;
+ 	int ret;
+@@ -107,9 +107,6 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 	if (ret)
+ 		return ret;
+ 
+-	width = ALIGN(image->width, 32);
+-	dwords = (width * image->height) >> 5;
+-
+ 	BEGIN_NV04(chan, NvSub2D, 0x0814, 2);
+ 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
+ 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+@@ -128,6 +125,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 	OUT_RING(chan, 0);
+ 	OUT_RING(chan, image->dy);
+ 
++	dwords = ALIGN(image->width * image->height, 32) >> 5;
+ 	while (dwords) {
+ 		int push = dwords > 2047 ? 2047 : dwords;
+ 
+diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+index c97395b4a312..f28315e865a5 100644
+--- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+@@ -95,7 +95,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 	struct nouveau_fbdev *nfbdev = info->par;
+ 	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+ 	struct nouveau_channel *chan = drm->channel;
+-	uint32_t width, dwords, *data = (uint32_t *)image->data;
++	uint32_t dwords, *data = (uint32_t *)image->data;
+ 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
+ 	uint32_t *palette = info->pseudo_palette;
+ 	int ret;
+@@ -107,9 +107,6 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 	if (ret)
+ 		return ret;
+ 
+-	width = ALIGN(image->width, 32);
+-	dwords = (width * image->height) >> 5;
+-
+ 	BEGIN_NVC0(chan, NvSub2D, 0x0814, 2);
+ 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
+ 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+@@ -128,6 +125,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ 	OUT_RING  (chan, 0);
+ 	OUT_RING  (chan, image->dy);
+ 
++	dwords = ALIGN(image->width * image->height, 32) >> 5;
+ 	while (dwords) {
+ 		int push = dwords > 2047 ? 2047 : dwords;
+ 
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+index 18fab3973ce5..62ad0300cfa5 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+@@ -1614,7 +1614,7 @@ nvkm_device_pci_func = {
+ 	.fini = nvkm_device_pci_fini,
+ 	.resource_addr = nvkm_device_pci_resource_addr,
+ 	.resource_size = nvkm_device_pci_resource_size,
+-	.cpu_coherent = !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64),
++	.cpu_coherent = !IS_ENABLED(CONFIG_ARM),
+ };
+ 
+ int
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+index a74c5dd27dc0..e2a64ed14b22 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+@@ -18,6 +18,7 @@ nvkm-y += nvkm/engine/disp/piornv50.o
+ nvkm-y += nvkm/engine/disp/sornv50.o
+ nvkm-y += nvkm/engine/disp/sorg94.o
+ nvkm-y += nvkm/engine/disp/sorgf119.o
++nvkm-y += nvkm/engine/disp/sorgm107.o
+ nvkm-y += nvkm/engine/disp/sorgm200.o
+ nvkm-y += nvkm/engine/disp/dport.o
+ 
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+index f0314664349c..5dd34382f55a 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+@@ -76,6 +76,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl,
+ 	mask |= 0x0001 << or;
+ 	mask |= 0x0100 << head;
+ 
++
+ 	list_for_each_entry(outp, &disp->base.outp, head) {
+ 		if ((outp->info.hasht & 0xff) == type &&
+ 		    (outp->info.hashm & mask) == mask) {
+@@ -155,25 +156,21 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
+ 	if (!outp)
+ 		return NULL;
+ 
++	*conf = (ctrl & 0x00000f00) >> 8;
+ 	switch (outp->info.type) {
+ 	case DCB_OUTPUT_TMDS:
+-		*conf = (ctrl & 0x00000f00) >> 8;
+ 		if (*conf == 5)
+ 			*conf |= 0x0100;
+ 		break;
+ 	case DCB_OUTPUT_LVDS:
+-		*conf = disp->sor.lvdsconf;
+-		break;
+-	case DCB_OUTPUT_DP:
+-		*conf = (ctrl & 0x00000f00) >> 8;
++		*conf |= disp->sor.lvdsconf;
+ 		break;
+-	case DCB_OUTPUT_ANALOG:
+ 	default:
+-		*conf = 0x00ff;
+ 		break;
+ 	}
+ 
+-	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
++	data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8,
++				 &ver, &hdr, &cnt, &len, &info2);
+ 	if (data && id < 0xff) {
+ 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
+ 		if (data) {
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
+index b6944142d616..f4b9cf8574be 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
+@@ -36,7 +36,7 @@ gm107_disp = {
+ 	.outp.internal.crt = nv50_dac_output_new,
+ 	.outp.internal.tmds = nv50_sor_output_new,
+ 	.outp.internal.lvds = nv50_sor_output_new,
+-	.outp.internal.dp = gf119_sor_dp_new,
++	.outp.internal.dp = gm107_sor_dp_new,
+ 	.dac.nr = 3,
+ 	.dac.power = nv50_dac_power,
+ 	.dac.sense = nv50_dac_sense,
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+index 4226d2153b9c..fcb1b0c46d64 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+@@ -387,22 +387,17 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
+ 	if (!outp)
+ 		return NULL;
+ 
++	*conf = (ctrl & 0x00000f00) >> 8;
+ 	if (outp->info.location == 0) {
+ 		switch (outp->info.type) {
+ 		case DCB_OUTPUT_TMDS:
+-			*conf = (ctrl & 0x00000f00) >> 8;
+ 			if (*conf == 5)
+ 				*conf |= 0x0100;
+ 			break;
+ 		case DCB_OUTPUT_LVDS:
+-			*conf = disp->sor.lvdsconf;
++			*conf |= disp->sor.lvdsconf;
+ 			break;
+-		case DCB_OUTPUT_DP:
+-			*conf = (ctrl & 0x00000f00) >> 8;
+-			break;
+-		case DCB_OUTPUT_ANALOG:
+ 		default:
+-			*conf = 0x00ff;
+ 			break;
+ 		}
+ 	} else {
+@@ -410,7 +405,8 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
+ 		pclk = pclk / 2;
+ 	}
+ 
+-	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
++	data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8,
++				 &ver, &hdr, &cnt, &len, &info2);
+ 	if (data && id < 0xff) {
+ 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
+ 		if (data) {
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
+index e9067ba4e179..4e983f6d7032 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
+@@ -62,7 +62,12 @@ int g94_sor_dp_lnk_pwr(struct nvkm_output_dp *, int);
+ int gf119_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+ 		     struct nvkm_output **);
+ int gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *, int, int, bool);
++int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *, int, int, int, int);
+ 
+-int  gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+-		      struct nvkm_output **);
++int gm107_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
++		     struct nvkm_output **);
++int gm107_sor_dp_pattern(struct nvkm_output_dp *, int);
++
++int gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
++		     struct nvkm_output **);
+ #endif
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+index b4b41b135643..49bd5da194e1 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+@@ -40,8 +40,8 @@ static int
+ gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
+ {
+ 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
+-	const u32 loff = gf119_sor_loff(outp);
+-	nvkm_mask(device, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * pattern);
++	const u32 soff = gf119_sor_soff(outp);
++	nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, 0x01010101 * pattern);
+ 	return 0;
+ }
+ 
+@@ -64,7 +64,7 @@ gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef)
+ 	return 0;
+ }
+ 
+-static int
++int
+ gf119_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
+ 		     int ln, int vs, int pe, int pc)
+ {
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
+new file mode 100644
+index 000000000000..37790b2617c5
+--- /dev/null
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
+@@ -0,0 +1,53 @@
++/*
++ * Copyright 2016 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs <bskeggs@redhat.com>
++ */
++#include "nv50.h"
++#include "outpdp.h"
++
++int
++gm107_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
++{
++	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
++	const u32 soff = outp->base.or * 0x800;
++	const u32 data = 0x01010101 * pattern;
++	if (outp->base.info.sorconf.link & 1)
++		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
++	else
++		nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data);
++	return 0;
++}
++
++static const struct nvkm_output_dp_func
++gm107_sor_dp_func = {
++	.pattern = gm107_sor_dp_pattern,
++	.lnk_pwr = g94_sor_dp_lnk_pwr,
++	.lnk_ctl = gf119_sor_dp_lnk_ctl,
++	.drv_ctl = gf119_sor_dp_drv_ctl,
++};
++
++int
++gm107_sor_dp_new(struct nvkm_disp *disp, int index,
++		 struct dcb_output *dcbE, struct nvkm_output **poutp)
++{
++	return nvkm_output_dp_new_(&gm107_sor_dp_func, disp, index, dcbE, poutp);
++}
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
+index 2cfbef9c344f..c44fa7ea672a 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
+@@ -57,19 +57,6 @@ gm200_sor_dp_lane_map(struct nvkm_device *device, u8 lane)
+ }
+ 
+ static int
+-gm200_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
+-{
+-	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
+-	const u32 soff = gm200_sor_soff(outp);
+-	const u32 data = 0x01010101 * pattern;
+-	if (outp->base.info.sorconf.link & 1)
+-		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
+-	else
+-		nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data);
+-	return 0;
+-}
+-
+-static int
+ gm200_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
+ {
+ 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
+@@ -129,7 +116,7 @@ gm200_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
+ 
+ static const struct nvkm_output_dp_func
+ gm200_sor_dp_func = {
+-	.pattern = gm200_sor_dp_pattern,
++	.pattern = gm107_sor_dp_pattern,
+ 	.lnk_pwr = gm200_sor_dp_lnk_pwr,
+ 	.lnk_ctl = gf119_sor_dp_lnk_ctl,
+ 	.drv_ctl = gm200_sor_dp_drv_ctl,
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+index b2de290da16f..b0c721616c4e 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+@@ -942,22 +942,41 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
+ }
+ 
+ static const struct nvkm_enum gf100_mp_warp_error[] = {
+-	{ 0x00, "NO_ERROR" },
+-	{ 0x01, "STACK_MISMATCH" },
++	{ 0x01, "STACK_ERROR" },
++	{ 0x02, "API_STACK_ERROR" },
++	{ 0x03, "RET_EMPTY_STACK_ERROR" },
++	{ 0x04, "PC_WRAP" },
+ 	{ 0x05, "MISALIGNED_PC" },
+-	{ 0x08, "MISALIGNED_GPR" },
+-	{ 0x09, "INVALID_OPCODE" },
+-	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
+-	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
+-	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
++	{ 0x06, "PC_OVERFLOW" },
++	{ 0x07, "MISALIGNED_IMMC_ADDR" },
++	{ 0x08, "MISALIGNED_REG" },
++	{ 0x09, "ILLEGAL_INSTR_ENCODING" },
++	{ 0x0a, "ILLEGAL_SPH_INSTR_COMBO" },
++	{ 0x0b, "ILLEGAL_INSTR_PARAM" },
++	{ 0x0c, "INVALID_CONST_ADDR" },
++	{ 0x0d, "OOR_REG" },
++	{ 0x0e, "OOR_ADDR" },
++	{ 0x0f, "MISALIGNED_ADDR" },
+ 	{ 0x10, "INVALID_ADDR_SPACE" },
+-	{ 0x11, "INVALID_PARAM" },
++	{ 0x11, "ILLEGAL_INSTR_PARAM2" },
++	{ 0x12, "INVALID_CONST_ADDR_LDC" },
++	{ 0x13, "GEOMETRY_SM_ERROR" },
++	{ 0x14, "DIVERGENT" },
++	{ 0x15, "WARP_EXIT" },
+ 	{}
+ };
+ 
+ static const struct nvkm_bitfield gf100_mp_global_error[] = {
++	{ 0x00000001, "SM_TO_SM_FAULT" },
++	{ 0x00000002, "L1_ERROR" },
+ 	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
+-	{ 0x00000008, "OUT_OF_STACK_SPACE" },
++	{ 0x00000008, "PHYSICAL_STACK_OVERFLOW" },
++	{ 0x00000010, "BPT_INT" },
++	{ 0x00000020, "BPT_PAUSE" },
++	{ 0x00000040, "SINGLE_STEP_COMPLETE" },
++	{ 0x20000000, "ECC_SEC_ERROR" },
++	{ 0x40000000, "ECC_DED_ERROR" },
++	{ 0x80000000, "TIMEOUT" },
+ 	{}
+ };
+ 
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
+index a5e92135cd77..9efb1b48cd54 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
+@@ -141,7 +141,8 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
+ {
+ 	u16 data = nvbios_ocfg_entry(bios, outp, idx, ver, hdr, cnt, len);
+ 	if (data) {
+-		info->match     = nvbios_rd16(bios, data + 0x00);
++		info->proto     = nvbios_rd08(bios, data + 0x00);
++		info->flags     = nvbios_rd16(bios, data + 0x01);
+ 		info->clkcmp[0] = nvbios_rd16(bios, data + 0x02);
+ 		info->clkcmp[1] = nvbios_rd16(bios, data + 0x04);
+ 	}
+@@ -149,12 +150,13 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
+ }
+ 
+ u16
+-nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u16 type,
++nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u8 proto, u8 flags,
+ 		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *info)
+ {
+ 	u16 data, idx = 0;
+ 	while ((data = nvbios_ocfg_parse(bios, outp, idx++, ver, hdr, cnt, len, info))) {
+-		if (info->match == type)
++		if ((info->proto == proto || info->proto == 0xff) &&
++		    (info->flags == flags))
+ 			break;
+ 	}
+ 	return data;
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
+index e292f5679418..389fb13a1998 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
+@@ -69,11 +69,11 @@ gm107_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth)
+ }
+ 
+ static void
+-gm107_ltc_lts_isr(struct nvkm_ltc *ltc, int c, int s)
++gm107_ltc_intr_lts(struct nvkm_ltc *ltc, int c, int s)
+ {
+ 	struct nvkm_subdev *subdev = &ltc->subdev;
+ 	struct nvkm_device *device = subdev->device;
+-	u32 base = 0x140000 + (c * 0x2000) + (s * 0x200);
++	u32 base = 0x140400 + (c * 0x2000) + (s * 0x200);
+ 	u32 stat = nvkm_rd32(device, base + 0x00c);
+ 
+ 	if (stat) {
+@@ -92,7 +92,7 @@ gm107_ltc_intr(struct nvkm_ltc *ltc)
+ 	while (mask) {
+ 		u32 s, c = __ffs(mask);
+ 		for (s = 0; s < ltc->lts_nr; s++)
+-			gm107_ltc_lts_isr(ltc, c, s);
++			gm107_ltc_intr_lts(ltc, c, s);
+ 		mask &= ~(1 << c);
+ 	}
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
+index 2a29bfd5125a..e18e0dc19ec8 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
+@@ -46,7 +46,7 @@ static const struct nvkm_ltc_func
+ gm200_ltc = {
+ 	.oneinit = gm200_ltc_oneinit,
+ 	.init = gm200_ltc_init,
+-	.intr = gm107_ltc_intr, /*XXX: not validated */
++	.intr = gm107_ltc_intr,
+ 	.cbc_clear = gm107_ltc_cbc_clear,
+ 	.cbc_wait = gm107_ltc_cbc_wait,
+ 	.zbc = 16,
+diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
+index d0826fb0434c..cb2986876738 100644
+--- a/drivers/gpu/drm/radeon/radeon_device.c
++++ b/drivers/gpu/drm/radeon/radeon_device.c
+@@ -630,6 +630,23 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
+ /*
+  * GPU helpers function.
+  */
++
++/**
++ * radeon_device_is_virtual - check if we are running is a virtual environment
++ *
++ * Check if the asic has been passed through to a VM (all asics).
++ * Used at driver startup.
++ * Returns true if virtual or false if not.
++ */
++static bool radeon_device_is_virtual(void)
++{
++#ifdef CONFIG_X86
++	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
++#else
++	return false;
++#endif
++}
++
+ /**
+  * radeon_card_posted - check if the hw has already been initialized
+  *
+@@ -643,6 +660,10 @@ bool radeon_card_posted(struct radeon_device *rdev)
+ {
+ 	uint32_t reg;
+ 
++	/* for pass through, always force asic_init */
++	if (radeon_device_is_virtual())
++		return false;
++
+ 	/* required for EFI mode on macbook2,1 which uses an r5xx asic */
+ 	if (efi_enabled(EFI_BOOT) &&
+ 	    (rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
+diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
+index e3daafa1be13..3e7c9ac50ccd 100644
+--- a/drivers/gpu/drm/ttm/ttm_bo.c
++++ b/drivers/gpu/drm/ttm/ttm_bo.c
+@@ -1016,9 +1016,9 @@ out_unlock:
+ 	return ret;
+ }
+ 
+-static bool ttm_bo_mem_compat(struct ttm_placement *placement,
+-			      struct ttm_mem_reg *mem,
+-			      uint32_t *new_flags)
++bool ttm_bo_mem_compat(struct ttm_placement *placement,
++		       struct ttm_mem_reg *mem,
++		       uint32_t *new_flags)
+ {
+ 	int i;
+ 
+@@ -1050,6 +1050,7 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement,
+ 
+ 	return false;
+ }
++EXPORT_SYMBOL(ttm_bo_mem_compat);
+ 
+ int ttm_bo_validate(struct ttm_buffer_object *bo,
+ 			struct ttm_placement *placement,
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+index 299925a1f6c6..eadc981ee79a 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+@@ -49,6 +49,7 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
+ {
+ 	struct ttm_buffer_object *bo = &buf->base;
+ 	int ret;
++	uint32_t new_flags;
+ 
+ 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
+ 	if (unlikely(ret != 0))
+@@ -60,7 +61,12 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
+ 	if (unlikely(ret != 0))
+ 		goto err;
+ 
+-	ret = ttm_bo_validate(bo, placement, interruptible, false);
++	if (buf->pin_count > 0)
++		ret = ttm_bo_mem_compat(placement, &bo->mem,
++					&new_flags) == true ? 0 : -EINVAL;
++	else
++		ret = ttm_bo_validate(bo, placement, interruptible, false);
++
+ 	if (!ret)
+ 		vmw_bo_pin_reserved(buf, true);
+ 
+@@ -91,6 +97,7 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
+ {
+ 	struct ttm_buffer_object *bo = &buf->base;
+ 	int ret;
++	uint32_t new_flags;
+ 
+ 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
+ 	if (unlikely(ret != 0))
+@@ -102,6 +109,12 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
+ 	if (unlikely(ret != 0))
+ 		goto err;
+ 
++	if (buf->pin_count > 0) {
++		ret = ttm_bo_mem_compat(&vmw_vram_gmr_placement, &bo->mem,
++					&new_flags) == true ? 0 : -EINVAL;
++		goto out_unreserve;
++	}
++
+ 	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, interruptible,
+ 			      false);
+ 	if (likely(ret == 0) || ret == -ERESTARTSYS)
+@@ -161,6 +174,7 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
+ 	struct ttm_placement placement;
+ 	struct ttm_place place;
+ 	int ret = 0;
++	uint32_t new_flags;
+ 
+ 	place = vmw_vram_placement.placement[0];
+ 	place.lpfn = bo->num_pages;
+@@ -185,10 +199,15 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
+ 	 */
+ 	if (bo->mem.mem_type == TTM_PL_VRAM &&
+ 	    bo->mem.start < bo->num_pages &&
+-	    bo->mem.start > 0)
++	    bo->mem.start > 0 &&
++	    buf->pin_count == 0)
+ 		(void) ttm_bo_validate(bo, &vmw_sys_placement, false, false);
+ 
+-	ret = ttm_bo_validate(bo, &placement, interruptible, false);
++	if (buf->pin_count > 0)
++		ret = ttm_bo_mem_compat(&placement, &bo->mem,
++					&new_flags) == true ? 0 : -EINVAL;
++	else
++		ret = ttm_bo_validate(bo, &placement, interruptible, false);
+ 
+ 	/* For some reason we didn't end up at the start of vram */
+ 	WARN_ON(ret == 0 && bo->offset != 0);
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+index f2cf9231872a..2a505464c50f 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -227,6 +227,7 @@ static int vmw_force_iommu;
+ static int vmw_restrict_iommu;
+ static int vmw_force_coherent;
+ static int vmw_restrict_dma_mask;
++static int vmw_assume_16bpp;
+ 
+ static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
+ static void vmw_master_init(struct vmw_master *);
+@@ -243,6 +244,8 @@ MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
+ module_param_named(force_coherent, vmw_force_coherent, int, 0600);
+ MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU");
+ module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600);
++MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
++module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
+ 
+ 
+ static void vmw_print_capabilities(uint32_t capabilities)
+@@ -653,6 +656,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
+ 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
+ 	dev_priv->mmio_start = pci_resource_start(dev->pdev, 2);
+ 
++	dev_priv->assume_16bpp = !!vmw_assume_16bpp;
++
+ 	dev_priv->enable_fb = enable_fbdev;
+ 
+ 	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
+@@ -699,6 +704,13 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
+ 			vmw_read(dev_priv,
+ 				 SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB);
+ 
++		/*
++		 * Workaround for low memory 2D VMs to compensate for the
++		 * allocation taken by fbdev
++		 */
++		if (!(dev_priv->capabilities & SVGA_CAP_3D))
++			mem_size *= 2;
++
+ 		dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE;
+ 		dev_priv->prim_bb_mem =
+ 			vmw_read(dev_priv,
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+index 6db358a85b46..cab0c54b46ae 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+@@ -386,6 +386,7 @@ struct vmw_private {
+ 	spinlock_t hw_lock;
+ 	spinlock_t cap_lock;
+ 	bool has_dx;
++	bool assume_16bpp;
+ 
+ 	/*
+ 	 * VGA registers.
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+index 679a4cb98ee3..d2d93959b119 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+@@ -517,28 +517,6 @@ static int vmw_fb_kms_framebuffer(struct fb_info *info)
+ 
+ 	par->set_fb = &vfb->base;
+ 
+-	if (!par->bo_ptr) {
+-		/*
+-		 * Pin before mapping. Since we don't know in what placement
+-		 * to pin, call into KMS to do it for us.
+-		 */
+-		ret = vfb->pin(vfb);
+-		if (ret) {
+-			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
+-			return ret;
+-		}
+-
+-		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
+-				  par->vmw_bo->base.num_pages, &par->map);
+-		if (ret) {
+-			vfb->unpin(vfb);
+-			DRM_ERROR("Could not map the fbdev framebuffer.\n");
+-			return ret;
+-		}
+-
+-		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
+-	}
+-
+ 	return 0;
+ }
+ 
+@@ -601,6 +579,31 @@ static int vmw_fb_set_par(struct fb_info *info)
+ 	if (ret)
+ 		goto out_unlock;
+ 
++	if (!par->bo_ptr) {
++		struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(set.fb);
++
++		/*
++		 * Pin before mapping. Since we don't know in what placement
++		 * to pin, call into KMS to do it for us.
++		 */
++		ret = vfb->pin(vfb);
++		if (ret) {
++			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
++			goto out_unlock;
++		}
++
++		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
++				  par->vmw_bo->base.num_pages, &par->map);
++		if (ret) {
++			vfb->unpin(vfb);
++			DRM_ERROR("Could not map the fbdev framebuffer.\n");
++			goto out_unlock;
++		}
++
++		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
++	}
++
++
+ 	vmw_fb_dirty_mark(par, par->fb_x, par->fb_y,
+ 			  par->set_fb->width, par->set_fb->height);
+ 
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+index b07543b5cea4..6ccd61d37b78 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+@@ -1553,14 +1553,10 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector,
+ 		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
+ 	};
+ 	int i;
+-	u32 assumed_bpp = 2;
++	u32 assumed_bpp = 4;
+ 
+-	/*
+-	 * If using screen objects, then assume 32-bpp because that's what the
+-	 * SVGA device is assuming
+-	 */
+-	if (dev_priv->active_display_unit == vmw_du_screen_object)
+-		assumed_bpp = 4;
++	if (dev_priv->assume_16bpp)
++		assumed_bpp = 2;
+ 
+ 	if (dev_priv->active_display_unit == vmw_du_screen_target) {
+ 		max_width  = min(max_width,  dev_priv->stdu_max_width);
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+index 9ca818fb034c..41932a7c4f79 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+@@ -399,8 +399,10 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
+ 
+ 	WARN_ON_ONCE(!stdu->defined);
+ 
+-	if (!vfb->dmabuf && new_fb->width == mode->hdisplay &&
+-	    new_fb->height == mode->vdisplay)
++	new_vfbs = (vfb->dmabuf) ? NULL : vmw_framebuffer_to_vfbs(new_fb);
++
++	if (new_vfbs && new_vfbs->surface->base_size.width == mode->hdisplay &&
++	    new_vfbs->surface->base_size.height == mode->vdisplay)
+ 		new_content_type = SAME_AS_DISPLAY;
+ 	else if (vfb->dmabuf)
+ 		new_content_type = SEPARATE_DMA;
+@@ -444,7 +446,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
+ 			content_srf.mip_levels[0]     = 1;
+ 			content_srf.multisample_count = 0;
+ 		} else {
+-			new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
+ 			content_srf = *new_vfbs->surface;
+ 		}
+ 
+@@ -464,7 +465,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
+ 			return ret;
+ 		}
+ 	} else if (new_content_type == SAME_AS_DISPLAY) {
+-		new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
+ 		new_display_srf = vmw_surface_reference(new_vfbs->surface);
+ 	}
+ 
+diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
+index aad8c162a825..0cd4f7216239 100644
+--- a/drivers/hid/hid-elo.c
++++ b/drivers/hid/hid-elo.c
+@@ -261,7 +261,7 @@ static void elo_remove(struct hid_device *hdev)
+ 	struct elo_priv *priv = hid_get_drvdata(hdev);
+ 
+ 	hid_hw_stop(hdev);
+-	flush_workqueue(wq);
++	cancel_delayed_work_sync(&priv->work);
+ 	kfree(priv);
+ }
+ 
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index c741f5e50a66..0088979f7281 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -61,6 +61,7 @@ MODULE_LICENSE("GPL");
+ #define MT_QUIRK_ALWAYS_VALID		(1 << 4)
+ #define MT_QUIRK_VALID_IS_INRANGE	(1 << 5)
+ #define MT_QUIRK_VALID_IS_CONFIDENCE	(1 << 6)
++#define MT_QUIRK_CONFIDENCE		(1 << 7)
+ #define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE	(1 << 8)
+ #define MT_QUIRK_NO_AREA		(1 << 9)
+ #define MT_QUIRK_IGNORE_DUPLICATES	(1 << 10)
+@@ -78,6 +79,7 @@ struct mt_slot {
+ 	__s32 contactid;	/* the device ContactID assigned to this slot */
+ 	bool touch_state;	/* is the touch valid? */
+ 	bool inrange_state;	/* is the finger in proximity of the sensor? */
++	bool confidence_state;  /* is the touch made by a finger? */
+ };
+ 
+ struct mt_class {
+@@ -503,10 +505,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+ 			return 1;
+ 		case HID_DG_CONFIDENCE:
+ 			if (cls->name == MT_CLS_WIN_8 &&
+-				field->application == HID_DG_TOUCHPAD) {
+-				cls->quirks &= ~MT_QUIRK_ALWAYS_VALID;
+-				cls->quirks |= MT_QUIRK_VALID_IS_CONFIDENCE;
+-			}
++				field->application == HID_DG_TOUCHPAD)
++				cls->quirks |= MT_QUIRK_CONFIDENCE;
+ 			mt_store_field(usage, td, hi);
+ 			return 1;
+ 		case HID_DG_TIPSWITCH:
+@@ -619,6 +619,7 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
+ 		return;
+ 
+ 	if (td->curvalid || (td->mtclass.quirks & MT_QUIRK_ALWAYS_VALID)) {
++		int active;
+ 		int slotnum = mt_compute_slot(td, input);
+ 		struct mt_slot *s = &td->curdata;
+ 		struct input_mt *mt = input->mt;
+@@ -633,10 +634,14 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
+ 				return;
+ 		}
+ 
++		if (!(td->mtclass.quirks & MT_QUIRK_CONFIDENCE))
++			s->confidence_state = 1;
++		active = (s->touch_state || s->inrange_state) &&
++							s->confidence_state;
++
+ 		input_mt_slot(input, slotnum);
+-		input_mt_report_slot_state(input, MT_TOOL_FINGER,
+-			s->touch_state || s->inrange_state);
+-		if (s->touch_state || s->inrange_state) {
++		input_mt_report_slot_state(input, MT_TOOL_FINGER, active);
++		if (active) {
+ 			/* this finger is in proximity of the sensor */
+ 			int wide = (s->w > s->h);
+ 			/* divided by two to match visual scale of touch */
+@@ -701,6 +706,8 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field,
+ 			td->curdata.touch_state = value;
+ 			break;
+ 		case HID_DG_CONFIDENCE:
++			if (quirks & MT_QUIRK_CONFIDENCE)
++				td->curdata.confidence_state = value;
+ 			if (quirks & MT_QUIRK_VALID_IS_CONFIDENCE)
+ 				td->curvalid = value;
+ 			break;
+diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
+index 2f1ddca6f2e0..700145b15088 100644
+--- a/drivers/hid/usbhid/hiddev.c
++++ b/drivers/hid/usbhid/hiddev.c
+@@ -516,13 +516,13 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
+ 					goto inval;
+ 			} else if (uref->usage_index >= field->report_count)
+ 				goto inval;
+-
+-			else if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
+-				 (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
+-				  uref->usage_index + uref_multi->num_values > field->report_count))
+-				goto inval;
+ 		}
+ 
++		if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
++		    (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
++		     uref->usage_index + uref_multi->num_values > field->report_count))
++			goto inval;
++
+ 		switch (cmd) {
+ 		case HIDIOCGUSAGE:
+ 			uref->value = field->value[uref->usage_index];
+diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
+index c43318d3416e..a9356a3dea92 100644
+--- a/drivers/hwmon/dell-smm-hwmon.c
++++ b/drivers/hwmon/dell-smm-hwmon.c
+@@ -66,11 +66,13 @@
+ 
+ static DEFINE_MUTEX(i8k_mutex);
+ static char bios_version[4];
++static char bios_machineid[16];
+ static struct device *i8k_hwmon_dev;
+ static u32 i8k_hwmon_flags;
+ static uint i8k_fan_mult = I8K_FAN_MULT;
+ static uint i8k_pwm_mult;
+ static uint i8k_fan_max = I8K_FAN_HIGH;
++static bool disallow_fan_type_call;
+ 
+ #define I8K_HWMON_HAVE_TEMP1	(1 << 0)
+ #define I8K_HWMON_HAVE_TEMP2	(1 << 1)
+@@ -94,13 +96,13 @@ module_param(ignore_dmi, bool, 0);
+ MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match");
+ 
+ #if IS_ENABLED(CONFIG_I8K)
+-static bool restricted;
++static bool restricted = true;
+ module_param(restricted, bool, 0);
+-MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
++MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)");
+ 
+ static bool power_status;
+ module_param(power_status, bool, 0600);
+-MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
++MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)");
+ #endif
+ 
+ static uint fan_mult;
+@@ -235,14 +237,28 @@ static int i8k_get_fan_speed(int fan)
+ /*
+  * Read the fan type.
+  */
+-static int i8k_get_fan_type(int fan)
++static int _i8k_get_fan_type(int fan)
+ {
+ 	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
+ 
++	if (disallow_fan_type_call)
++		return -EINVAL;
++
+ 	regs.ebx = fan & 0xff;
+ 	return i8k_smm(&regs) ? : regs.eax & 0xff;
+ }
+ 
++static int i8k_get_fan_type(int fan)
++{
++	/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
++	static int types[2] = { INT_MIN, INT_MIN };
++
++	if (types[fan] == INT_MIN)
++		types[fan] = _i8k_get_fan_type(fan);
++
++	return types[fan];
++}
++
+ /*
+  * Read the fan nominal rpm for specific fan speed.
+  */
+@@ -392,9 +408,11 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
+ 		break;
+ 
+ 	case I8K_MACHINE_ID:
+-		memset(buff, 0, 16);
+-		strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+-			sizeof(buff));
++		if (restricted && !capable(CAP_SYS_ADMIN))
++			return -EPERM;
++
++		memset(buff, 0, sizeof(buff));
++		strlcpy(buff, bios_machineid, sizeof(buff));
+ 		break;
+ 
+ 	case I8K_FN_STATUS:
+@@ -511,7 +529,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
+ 	seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
+ 		   I8K_PROC_FMT,
+ 		   bios_version,
+-		   i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
++		   (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid,
+ 		   cpu_temp,
+ 		   left_fan, right_fan, left_speed, right_speed,
+ 		   ac_power, fn_key);
+@@ -718,6 +736,9 @@ static struct attribute *i8k_attrs[] = {
+ static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
+ 			      int index)
+ {
++	if (disallow_fan_type_call &&
++	    (index == 9 || index == 12))
++		return 0;
+ 	if (index >= 0 && index <= 1 &&
+ 	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
+ 		return 0;
+@@ -767,13 +788,17 @@ static int __init i8k_init_hwmon(void)
+ 	if (err >= 0)
+ 		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4;
+ 
+-	/* First fan attributes, if fan type is OK */
+-	err = i8k_get_fan_type(0);
++	/* First fan attributes, if fan status or type is OK */
++	err = i8k_get_fan_status(0);
++	if (err < 0)
++		err = i8k_get_fan_type(0);
+ 	if (err >= 0)
+ 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1;
+ 
+-	/* Second fan attributes, if fan type is OK */
+-	err = i8k_get_fan_type(1);
++	/* Second fan attributes, if fan status or type is OK */
++	err = i8k_get_fan_status(1);
++	if (err < 0)
++		err = i8k_get_fan_type(1);
+ 	if (err >= 0)
+ 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
+ 
+@@ -929,12 +954,14 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
+ 
+ MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
+ 
+-static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
++/*
++ * On some machines once I8K_SMM_GET_FAN_TYPE is issued then CPU fan speed
++ * randomly going up and down due to bug in Dell SMM or BIOS. Here is blacklist
++ * of affected Dell machines for which we disallow I8K_SMM_GET_FAN_TYPE call.
++ * See bug: https://bugzilla.kernel.org/show_bug.cgi?id=100121
++ */
++static struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initdata = {
+ 	{
+-		/*
+-		 * CPU fan speed going up and down on Dell Studio XPS 8000
+-		 * for unknown reasons.
+-		 */
+ 		.ident = "Dell Studio XPS 8000",
+ 		.matches = {
+ 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+@@ -942,16 +969,19 @@ static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+ 		},
+ 	},
+ 	{
+-		/*
+-		 * CPU fan speed going up and down on Dell Studio XPS 8100
+-		 * for unknown reasons.
+-		 */
+ 		.ident = "Dell Studio XPS 8100",
+ 		.matches = {
+ 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"),
+ 		},
+ 	},
++	{
++		.ident = "Dell Inspiron 580",
++		.matches = {
++			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "),
++		},
++	},
+ 	{ }
+ };
+ 
+@@ -966,8 +996,7 @@ static int __init i8k_probe(void)
+ 	/*
+ 	 * Get DMI information
+ 	 */
+-	if (!dmi_check_system(i8k_dmi_table) ||
+-	    dmi_check_system(i8k_blacklist_dmi_table)) {
++	if (!dmi_check_system(i8k_dmi_table)) {
+ 		if (!ignore_dmi && !force)
+ 			return -ENODEV;
+ 
+@@ -978,8 +1007,13 @@ static int __init i8k_probe(void)
+ 			i8k_get_dmi_data(DMI_BIOS_VERSION));
+ 	}
+ 
++	if (dmi_check_system(i8k_blacklist_fan_type_dmi_table))
++		disallow_fan_type_call = true;
++
+ 	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
+ 		sizeof(bios_version));
++	strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
++		sizeof(bios_machineid));
+ 
+ 	/*
+ 	 * Get SMM Dell signature
+diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c
+index 923f56598d4b..3a9f106787d2 100644
+--- a/drivers/iio/accel/kxsd9.c
++++ b/drivers/iio/accel/kxsd9.c
+@@ -81,7 +81,7 @@ static int kxsd9_write_scale(struct iio_dev *indio_dev, int micro)
+ 
+ 	mutex_lock(&st->buf_lock);
+ 	ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
+-	if (ret)
++	if (ret < 0)
+ 		goto error_ret;
+ 	st->tx[0] = KXSD9_WRITE(KXSD9_REG_CTRL_C);
+ 	st->tx[1] = (ret & ~KXSD9_FS_MASK) | i;
+@@ -163,7 +163,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev,
+ 		break;
+ 	case IIO_CHAN_INFO_SCALE:
+ 		ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
+-		if (ret)
++		if (ret < 0)
+ 			goto error_ret;
+ 		*val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK];
+ 		ret = IIO_VAL_INT_PLUS_MICRO;
+diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
+index 21e19b60e2b9..2123f0ac2e2a 100644
+--- a/drivers/iio/adc/ad7266.c
++++ b/drivers/iio/adc/ad7266.c
+@@ -396,8 +396,8 @@ static int ad7266_probe(struct spi_device *spi)
+ 
+ 	st = iio_priv(indio_dev);
+ 
+-	st->reg = devm_regulator_get(&spi->dev, "vref");
+-	if (!IS_ERR_OR_NULL(st->reg)) {
++	st->reg = devm_regulator_get_optional(&spi->dev, "vref");
++	if (!IS_ERR(st->reg)) {
+ 		ret = regulator_enable(st->reg);
+ 		if (ret)
+ 			return ret;
+@@ -408,6 +408,9 @@ static int ad7266_probe(struct spi_device *spi)
+ 
+ 		st->vref_mv = ret / 1000;
+ 	} else {
++		/* Any other error indicates that the regulator does exist */
++		if (PTR_ERR(st->reg) != -ENODEV)
++			return PTR_ERR(st->reg);
+ 		/* Use internal reference */
+ 		st->vref_mv = 2500;
+ 	}
+diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
+index fa4767613173..a03832a5fc95 100644
+--- a/drivers/iio/humidity/hdc100x.c
++++ b/drivers/iio/humidity/hdc100x.c
+@@ -55,7 +55,7 @@ static const struct {
+ 	},
+ 	{ /* IIO_HUMIDITYRELATIVE channel */
+ 		.shift = 8,
+-		.mask = 2,
++		.mask = 3,
+ 	},
+ };
+ 
+@@ -164,14 +164,14 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
+ 		dev_err(&client->dev, "cannot read high byte measurement");
+ 		return ret;
+ 	}
+-	val = ret << 6;
++	val = ret << 8;
+ 
+ 	ret = i2c_smbus_read_byte(client);
+ 	if (ret < 0) {
+ 		dev_err(&client->dev, "cannot read low byte measurement");
+ 		return ret;
+ 	}
+-	val |= ret >> 2;
++	val |= ret;
+ 
+ 	return val;
+ }
+@@ -211,18 +211,18 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev,
+ 		return IIO_VAL_INT_PLUS_MICRO;
+ 	case IIO_CHAN_INFO_SCALE:
+ 		if (chan->type == IIO_TEMP) {
+-			*val = 165;
+-			*val2 = 65536 >> 2;
++			*val = 165000;
++			*val2 = 65536;
+ 			return IIO_VAL_FRACTIONAL;
+ 		} else {
+-			*val = 0;
+-			*val2 = 10000;
+-			return IIO_VAL_INT_PLUS_MICRO;
++			*val = 100;
++			*val2 = 65536;
++			return IIO_VAL_FRACTIONAL;
+ 		}
+ 		break;
+ 	case IIO_CHAN_INFO_OFFSET:
+-		*val = -3971;
+-		*val2 = 879096;
++		*val = -15887;
++		*val2 = 515151;
+ 		return IIO_VAL_INT_PLUS_MICRO;
+ 	default:
+ 		return -EINVAL;
+diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
+index ae2806aafb72..0c52dfe64977 100644
+--- a/drivers/iio/industrialio-trigger.c
++++ b/drivers/iio/industrialio-trigger.c
+@@ -210,22 +210,35 @@ static int iio_trigger_attach_poll_func(struct iio_trigger *trig,
+ 
+ 	/* Prevent the module from being removed whilst attached to a trigger */
+ 	__module_get(pf->indio_dev->info->driver_module);
++
++	/* Get irq number */
+ 	pf->irq = iio_trigger_get_irq(trig);
++	if (pf->irq < 0)
++		goto out_put_module;
++
++	/* Request irq */
+ 	ret = request_threaded_irq(pf->irq, pf->h, pf->thread,
+ 				   pf->type, pf->name,
+ 				   pf);
+-	if (ret < 0) {
+-		module_put(pf->indio_dev->info->driver_module);
+-		return ret;
+-	}
++	if (ret < 0)
++		goto out_put_irq;
+ 
++	/* Enable trigger in driver */
+ 	if (trig->ops && trig->ops->set_trigger_state && notinuse) {
+ 		ret = trig->ops->set_trigger_state(trig, true);
+ 		if (ret < 0)
+-			module_put(pf->indio_dev->info->driver_module);
++			goto out_free_irq;
+ 	}
+ 
+ 	return ret;
++
++out_free_irq:
++	free_irq(pf->irq, pf);
++out_put_irq:
++	iio_trigger_put_irq(trig, pf->irq);
++out_put_module:
++	module_put(pf->indio_dev->info->driver_module);
++	return ret;
+ }
+ 
+ static int iio_trigger_detach_poll_func(struct iio_trigger *trig,
+diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
+index a6af56ad10e1..6443aad809b2 100644
+--- a/drivers/iio/light/apds9960.c
++++ b/drivers/iio/light/apds9960.c
+@@ -1006,6 +1006,7 @@ static int apds9960_probe(struct i2c_client *client,
+ 
+ 	iio_device_attach_buffer(indio_dev, buffer);
+ 
++	indio_dev->dev.parent = &client->dev;
+ 	indio_dev->info = &apds9960_info;
+ 	indio_dev->name = APDS9960_DRV_NAME;
+ 	indio_dev->channels = apds9960_channels;
+diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
+index 172393ad34af..d3ca3207935d 100644
+--- a/drivers/iio/pressure/st_pressure_core.c
++++ b/drivers/iio/pressure/st_pressure_core.c
+@@ -28,15 +28,21 @@
+ #include <linux/iio/common/st_sensors.h>
+ #include "st_pressure.h"
+ 
++#define MCELSIUS_PER_CELSIUS			1000
++
++/* Default pressure sensitivity */
+ #define ST_PRESS_LSB_PER_MBAR			4096UL
+ #define ST_PRESS_KPASCAL_NANO_SCALE		(100000000UL / \
+ 						 ST_PRESS_LSB_PER_MBAR)
++
++/* Default temperature sensitivity */
+ #define ST_PRESS_LSB_PER_CELSIUS		480UL
+-#define ST_PRESS_CELSIUS_NANO_SCALE		(1000000000UL / \
+-						 ST_PRESS_LSB_PER_CELSIUS)
++#define ST_PRESS_MILLI_CELSIUS_OFFSET		42500UL
++
+ #define ST_PRESS_NUMBER_DATA_CHANNELS		1
+ 
+ /* FULLSCALE */
++#define ST_PRESS_FS_AVL_1100MB			1100
+ #define ST_PRESS_FS_AVL_1260MB			1260
+ 
+ #define ST_PRESS_1_OUT_XL_ADDR			0x28
+@@ -54,9 +60,6 @@
+ #define ST_PRESS_LPS331AP_PW_MASK		0x80
+ #define ST_PRESS_LPS331AP_FS_ADDR		0x23
+ #define ST_PRESS_LPS331AP_FS_MASK		0x30
+-#define ST_PRESS_LPS331AP_FS_AVL_1260_VAL	0x00
+-#define ST_PRESS_LPS331AP_FS_AVL_1260_GAIN	ST_PRESS_KPASCAL_NANO_SCALE
+-#define ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN	ST_PRESS_CELSIUS_NANO_SCALE
+ #define ST_PRESS_LPS331AP_BDU_ADDR		0x20
+ #define ST_PRESS_LPS331AP_BDU_MASK		0x04
+ #define ST_PRESS_LPS331AP_DRDY_IRQ_ADDR		0x22
+@@ -65,9 +68,14 @@
+ #define ST_PRESS_LPS331AP_IHL_IRQ_ADDR		0x22
+ #define ST_PRESS_LPS331AP_IHL_IRQ_MASK		0x80
+ #define ST_PRESS_LPS331AP_MULTIREAD_BIT		true
+-#define ST_PRESS_LPS331AP_TEMP_OFFSET		42500
+ 
+ /* CUSTOM VALUES FOR LPS001WP SENSOR */
++
++/* LPS001WP pressure resolution */
++#define ST_PRESS_LPS001WP_LSB_PER_MBAR		16UL
++/* LPS001WP temperature resolution */
++#define ST_PRESS_LPS001WP_LSB_PER_CELSIUS	64UL
++
+ #define ST_PRESS_LPS001WP_WAI_EXP		0xba
+ #define ST_PRESS_LPS001WP_ODR_ADDR		0x20
+ #define ST_PRESS_LPS001WP_ODR_MASK		0x30
+@@ -76,6 +84,8 @@
+ #define ST_PRESS_LPS001WP_ODR_AVL_13HZ_VAL	0x03
+ #define ST_PRESS_LPS001WP_PW_ADDR		0x20
+ #define ST_PRESS_LPS001WP_PW_MASK		0x40
++#define ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN \
++	(100000000UL / ST_PRESS_LPS001WP_LSB_PER_MBAR)
+ #define ST_PRESS_LPS001WP_BDU_ADDR		0x20
+ #define ST_PRESS_LPS001WP_BDU_MASK		0x04
+ #define ST_PRESS_LPS001WP_MULTIREAD_BIT		true
+@@ -92,11 +102,6 @@
+ #define ST_PRESS_LPS25H_ODR_AVL_25HZ_VAL	0x04
+ #define ST_PRESS_LPS25H_PW_ADDR			0x20
+ #define ST_PRESS_LPS25H_PW_MASK			0x80
+-#define ST_PRESS_LPS25H_FS_ADDR			0x00
+-#define ST_PRESS_LPS25H_FS_MASK			0x00
+-#define ST_PRESS_LPS25H_FS_AVL_1260_VAL		0x00
+-#define ST_PRESS_LPS25H_FS_AVL_1260_GAIN	ST_PRESS_KPASCAL_NANO_SCALE
+-#define ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN	ST_PRESS_CELSIUS_NANO_SCALE
+ #define ST_PRESS_LPS25H_BDU_ADDR		0x20
+ #define ST_PRESS_LPS25H_BDU_MASK		0x04
+ #define ST_PRESS_LPS25H_DRDY_IRQ_ADDR		0x23
+@@ -105,7 +110,6 @@
+ #define ST_PRESS_LPS25H_IHL_IRQ_ADDR		0x22
+ #define ST_PRESS_LPS25H_IHL_IRQ_MASK		0x80
+ #define ST_PRESS_LPS25H_MULTIREAD_BIT		true
+-#define ST_PRESS_LPS25H_TEMP_OFFSET		42500
+ #define ST_PRESS_LPS25H_OUT_XL_ADDR		0x28
+ #define ST_TEMP_LPS25H_OUT_L_ADDR		0x2b
+ 
+@@ -157,7 +161,9 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
+ 			.storagebits = 16,
+ 			.endianness = IIO_LE,
+ 		},
+-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
++		.info_mask_separate =
++			BIT(IIO_CHAN_INFO_RAW) |
++			BIT(IIO_CHAN_INFO_SCALE),
+ 		.modified = 0,
+ 	},
+ 	{
+@@ -173,7 +179,7 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
+ 		},
+ 		.info_mask_separate =
+ 			BIT(IIO_CHAN_INFO_RAW) |
+-			BIT(IIO_CHAN_INFO_OFFSET),
++			BIT(IIO_CHAN_INFO_SCALE),
+ 		.modified = 0,
+ 	},
+ 	IIO_CHAN_SOFT_TIMESTAMP(1)
+@@ -208,11 +214,14 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
+ 			.addr = ST_PRESS_LPS331AP_FS_ADDR,
+ 			.mask = ST_PRESS_LPS331AP_FS_MASK,
+ 			.fs_avl = {
++				/*
++				 * Pressure and temperature sensitivity values
++				 * as defined in table 3 of LPS331AP datasheet.
++				 */
+ 				[0] = {
+ 					.num = ST_PRESS_FS_AVL_1260MB,
+-					.value = ST_PRESS_LPS331AP_FS_AVL_1260_VAL,
+-					.gain = ST_PRESS_LPS331AP_FS_AVL_1260_GAIN,
+-					.gain2 = ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN,
++					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
++					.gain2 = ST_PRESS_LSB_PER_CELSIUS,
+ 				},
+ 			},
+ 		},
+@@ -254,7 +263,17 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
+ 			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
+ 		},
+ 		.fs = {
+-			.addr = 0,
++			.fs_avl = {
++				/*
++				 * Pressure and temperature resolution values
++				 * as defined in table 3 of LPS001WP datasheet.
++				 */
++				[0] = {
++					.num = ST_PRESS_FS_AVL_1100MB,
++					.gain = ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN,
++					.gain2 = ST_PRESS_LPS001WP_LSB_PER_CELSIUS,
++				},
++			},
+ 		},
+ 		.bdu = {
+ 			.addr = ST_PRESS_LPS001WP_BDU_ADDR,
+@@ -291,14 +310,15 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
+ 			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
+ 		},
+ 		.fs = {
+-			.addr = ST_PRESS_LPS25H_FS_ADDR,
+-			.mask = ST_PRESS_LPS25H_FS_MASK,
+ 			.fs_avl = {
++				/*
++				 * Pressure and temperature sensitivity values
++				 * as defined in table 3 of LPS25H datasheet.
++				 */
+ 				[0] = {
+ 					.num = ST_PRESS_FS_AVL_1260MB,
+-					.value = ST_PRESS_LPS25H_FS_AVL_1260_VAL,
+-					.gain = ST_PRESS_LPS25H_FS_AVL_1260_GAIN,
+-					.gain2 = ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN,
++					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
++					.gain2 = ST_PRESS_LSB_PER_CELSIUS,
+ 				},
+ 			},
+ 		},
+@@ -354,26 +374,26 @@ static int st_press_read_raw(struct iio_dev *indio_dev,
+ 
+ 		return IIO_VAL_INT;
+ 	case IIO_CHAN_INFO_SCALE:
+-		*val = 0;
+-
+ 		switch (ch->type) {
+ 		case IIO_PRESSURE:
++			*val = 0;
+ 			*val2 = press_data->current_fullscale->gain;
+-			break;
++			return IIO_VAL_INT_PLUS_NANO;
+ 		case IIO_TEMP:
++			*val = MCELSIUS_PER_CELSIUS;
+ 			*val2 = press_data->current_fullscale->gain2;
+-			break;
++			return IIO_VAL_FRACTIONAL;
+ 		default:
+ 			err = -EINVAL;
+ 			goto read_error;
+ 		}
+ 
+-		return IIO_VAL_INT_PLUS_NANO;
+ 	case IIO_CHAN_INFO_OFFSET:
+ 		switch (ch->type) {
+ 		case IIO_TEMP:
+-			*val = 425;
+-			*val2 = 10;
++			*val = ST_PRESS_MILLI_CELSIUS_OFFSET *
++			       press_data->current_fullscale->gain2;
++			*val2 = MCELSIUS_PER_CELSIUS;
+ 			break;
+ 		default:
+ 			err = -EINVAL;
+diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
+index f4d29d5dbd5f..e2f926cdcad2 100644
+--- a/drivers/iio/proximity/as3935.c
++++ b/drivers/iio/proximity/as3935.c
+@@ -64,6 +64,7 @@ struct as3935_state {
+ 	struct delayed_work work;
+ 
+ 	u32 tune_cap;
++	u8 buffer[16]; /* 8-bit data + 56-bit padding + 64-bit timestamp */
+ 	u8 buf[2] ____cacheline_aligned;
+ };
+ 
+@@ -72,7 +73,8 @@ static const struct iio_chan_spec as3935_channels[] = {
+ 		.type           = IIO_PROXIMITY,
+ 		.info_mask_separate =
+ 			BIT(IIO_CHAN_INFO_RAW) |
+-			BIT(IIO_CHAN_INFO_PROCESSED),
++			BIT(IIO_CHAN_INFO_PROCESSED) |
++			BIT(IIO_CHAN_INFO_SCALE),
+ 		.scan_index     = 0,
+ 		.scan_type = {
+ 			.sign           = 'u',
+@@ -181,7 +183,12 @@ static int as3935_read_raw(struct iio_dev *indio_dev,
+ 		/* storm out of range */
+ 		if (*val == AS3935_DATA_MASK)
+ 			return -EINVAL;
+-		*val *= 1000;
++
++		if (m == IIO_CHAN_INFO_PROCESSED)
++			*val *= 1000;
++		break;
++	case IIO_CHAN_INFO_SCALE:
++		*val = 1000;
+ 		break;
+ 	default:
+ 		return -EINVAL;
+@@ -206,10 +213,10 @@ static irqreturn_t as3935_trigger_handler(int irq, void *private)
+ 	ret = as3935_read(st, AS3935_DATA, &val);
+ 	if (ret)
+ 		goto err_read;
+-	val &= AS3935_DATA_MASK;
+-	val *= 1000;
+ 
+-	iio_push_to_buffers_with_timestamp(indio_dev, &val, pf->timestamp);
++	st->buffer[0] = val & AS3935_DATA_MASK;
++	iio_push_to_buffers_with_timestamp(indio_dev, &st->buffer,
++					   pf->timestamp);
+ err_read:
+ 	iio_trigger_notify_done(indio_dev->trig);
+ 
+diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
+index 1d92e091e22e..c99525512b34 100644
+--- a/drivers/infiniband/core/cm.c
++++ b/drivers/infiniband/core/cm.c
+@@ -3452,14 +3452,14 @@ static int cm_establish(struct ib_cm_id *cm_id)
+ 	work->cm_event.event = IB_CM_USER_ESTABLISHED;
+ 
+ 	/* Check if the device started its remove_one */
+-	spin_lock_irq(&cm.lock);
++	spin_lock_irqsave(&cm.lock, flags);
+ 	if (!cm_dev->going_down) {
+ 		queue_delayed_work(cm.wq, &work->work, 0);
+ 	} else {
+ 		kfree(work);
+ 		ret = -ENODEV;
+ 	}
+-	spin_unlock_irq(&cm.lock);
++	spin_unlock_irqrestore(&cm.lock, flags);
+ 
+ out:
+ 	return ret;
+diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
+index 105246fba2e7..5fc623362731 100644
+--- a/drivers/infiniband/hw/mlx4/ah.c
++++ b/drivers/infiniband/hw/mlx4/ah.c
+@@ -47,6 +47,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ 
+ 	ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+ 	ah->av.ib.g_slid  = ah_attr->src_path_bits;
++	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ 	if (ah_attr->ah_flags & IB_AH_GRH) {
+ 		ah->av.ib.g_slid   |= 0x80;
+ 		ah->av.ib.gid_index = ah_attr->grh.sgid_index;
+@@ -64,7 +65,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ 		       !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+ 			--ah->av.ib.stat_rate;
+ 	}
+-	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ 
+ 	return &ah->ibah;
+ }
+diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
+index a9e3bcc522c4..a0ecf08b2b86 100644
+--- a/drivers/infiniband/sw/rdmavt/qp.c
++++ b/drivers/infiniband/sw/rdmavt/qp.c
+@@ -683,8 +683,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
+ 		 * initialization that is needed.
+ 		 */
+ 		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
+-		if (!priv)
++		if (IS_ERR(priv)) {
++			ret = priv;
+ 			goto bail_qp;
++		}
+ 		qp->priv = priv;
+ 		qp->timeout_jiffies =
+ 			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
+index bf4959f4225b..94f1bf772ec9 100644
+--- a/drivers/iommu/amd_iommu_init.c
++++ b/drivers/iommu/amd_iommu_init.c
+@@ -1363,13 +1363,23 @@ static int __init amd_iommu_init_pci(void)
+ 			break;
+ 	}
+ 
++	/*
++	 * Order is important here to make sure any unity map requirements are
++	 * fulfilled. The unity mappings are created and written to the device
++	 * table during the amd_iommu_init_api() call.
++	 *
++	 * After that we call init_device_table_dma() to make sure any
++	 * uninitialized DTE will block DMA, and in the end we flush the caches
++	 * of all IOMMUs to make sure the changes to the device table are
++	 * active.
++	 */
++	ret = amd_iommu_init_api();
++
+ 	init_device_table_dma();
+ 
+ 	for_each_iommu(iommu)
+ 		iommu_flush_all_caches(iommu);
+ 
+-	ret = amd_iommu_init_api();
+-
+ 	if (!ret)
+ 		print_iommu_info();
+ 
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 4ff73ff64e49..3e20208d6fdb 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -1942,6 +1942,7 @@ static struct iommu_ops arm_smmu_ops = {
+ 	.attach_dev		= arm_smmu_attach_dev,
+ 	.map			= arm_smmu_map,
+ 	.unmap			= arm_smmu_unmap,
++	.map_sg			= default_iommu_map_sg,
+ 	.iova_to_phys		= arm_smmu_iova_to_phys,
+ 	.add_device		= arm_smmu_add_device,
+ 	.remove_device		= arm_smmu_remove_device,
+diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
+index e1852e845d21..ae364e07840c 100644
+--- a/drivers/iommu/intel-iommu.c
++++ b/drivers/iommu/intel-iommu.c
+@@ -3169,11 +3169,6 @@ static int __init init_dmars(void)
+ 			}
+ 		}
+ 
+-		iommu_flush_write_buffer(iommu);
+-		iommu_set_root_entry(iommu);
+-		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+-		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+-
+ 		if (!ecap_pass_through(iommu->ecap))
+ 			hw_pass_through = 0;
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+@@ -3182,6 +3177,18 @@ static int __init init_dmars(void)
+ #endif
+ 	}
+ 
++	/*
++	 * Now that qi is enabled on all iommus, set the root entry and flush
++	 * caches. This is required on some Intel X58 chipsets, otherwise the
++	 * flush_context function will loop forever and the boot hangs.
++	 */
++	for_each_active_iommu(iommu, drhd) {
++		iommu_flush_write_buffer(iommu);
++		iommu_set_root_entry(iommu);
++		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
++		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
++	}
++
+ 	if (iommu_pass_through)
+ 		iommu_identity_mapping |= IDENTMAP_ALL;
+ 
+diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
+index 5710a06c3049..0ea8d9a24de0 100644
+--- a/drivers/iommu/rockchip-iommu.c
++++ b/drivers/iommu/rockchip-iommu.c
+@@ -815,7 +815,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
+ 	dte_addr = virt_to_phys(rk_domain->dt);
+ 	for (i = 0; i < iommu->num_mmu; i++) {
+ 		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
+-		rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
++		rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
+ 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
+ 	}
+ 
+diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
+index 4dffccf532a2..40fb1209d512 100644
+--- a/drivers/irqchip/irq-mips-gic.c
++++ b/drivers/irqchip/irq-mips-gic.c
+@@ -734,6 +734,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
+ 		/* verify that it doesn't conflict with an IPI irq */
+ 		if (test_bit(spec->hwirq, ipi_resrv))
+ 			return -EBUSY;
++
++		hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
++
++		return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
++						     &gic_level_irq_controller,
++						     NULL);
+ 	} else {
+ 		base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
+ 		if (base_hwirq == gic_shared_intrs) {
+@@ -855,10 +861,14 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq,
+ 						    &gic_level_irq_controller,
+ 						    NULL);
+ 		if (ret)
+-			return ret;
++			goto error;
+ 	}
+ 
+ 	return 0;
++
++error:
++	irq_domain_free_irqs_parent(d, virq, nr_irqs);
++	return ret;
+ }
+ 
+ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
+diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
+index d7723ce772b3..12690c1ea8f8 100644
+--- a/drivers/media/usb/uvc/uvc_v4l2.c
++++ b/drivers/media/usb/uvc/uvc_v4l2.c
+@@ -1408,47 +1408,44 @@ static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp,
+ static long uvc_v4l2_compat_ioctl32(struct file *file,
+ 		     unsigned int cmd, unsigned long arg)
+ {
++	struct uvc_fh *handle = file->private_data;
+ 	union {
+ 		struct uvc_xu_control_mapping xmap;
+ 		struct uvc_xu_control_query xqry;
+ 	} karg;
+ 	void __user *up = compat_ptr(arg);
+-	mm_segment_t old_fs;
+ 	long ret;
+ 
+ 	switch (cmd) {
+ 	case UVCIOC_CTRL_MAP32:
+-		cmd = UVCIOC_CTRL_MAP;
+ 		ret = uvc_v4l2_get_xu_mapping(&karg.xmap, up);
++		if (ret)
++			return ret;
++		ret = uvc_ioctl_ctrl_map(handle->chain, &karg.xmap);
++		if (ret)
++			return ret;
++		ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up);
++		if (ret)
++			return ret;
++
+ 		break;
+ 
+ 	case UVCIOC_CTRL_QUERY32:
+-		cmd = UVCIOC_CTRL_QUERY;
+ 		ret = uvc_v4l2_get_xu_query(&karg.xqry, up);
++		if (ret)
++			return ret;
++		ret = uvc_xu_ctrl_query(handle->chain, &karg.xqry);
++		if (ret)
++			return ret;
++		ret = uvc_v4l2_put_xu_query(&karg.xqry, up);
++		if (ret)
++			return ret;
+ 		break;
+ 
+ 	default:
+ 		return -ENOIOCTLCMD;
+ 	}
+ 
+-	old_fs = get_fs();
+-	set_fs(KERNEL_DS);
+-	ret = video_ioctl2(file, cmd, (unsigned long)&karg);
+-	set_fs(old_fs);
+-
+-	if (ret < 0)
+-		return ret;
+-
+-	switch (cmd) {
+-	case UVCIOC_CTRL_MAP:
+-		ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up);
+-		break;
+-
+-	case UVCIOC_CTRL_QUERY:
+-		ret = uvc_v4l2_put_xu_query(&karg.xqry, up);
+-		break;
+-	}
+-
+ 	return ret;
+ }
+ #endif
+diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
+index 21825ddce4a3..859b4a1d11e4 100644
+--- a/drivers/memory/omap-gpmc.c
++++ b/drivers/memory/omap-gpmc.c
+@@ -394,7 +394,7 @@ static void gpmc_cs_bool_timings(int cs, const struct gpmc_bool_timings *p)
+ 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4,
+ 			   GPMC_CONFIG4_OEEXTRADELAY, p->oe_extra_delay);
+ 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4,
+-			   GPMC_CONFIG4_OEEXTRADELAY, p->we_extra_delay);
++			   GPMC_CONFIG4_WEEXTRADELAY, p->we_extra_delay);
+ 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG6,
+ 			   GPMC_CONFIG6_CYCLE2CYCLESAMECSEN,
+ 			   p->cycle2cyclesamecsen);
+diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
+index 96fddb016bf1..4dd0391d2942 100644
+--- a/drivers/mtd/ubi/eba.c
++++ b/drivers/mtd/ubi/eba.c
+@@ -575,6 +575,7 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
+ 	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
+ 	struct ubi_volume *vol = ubi->volumes[idx];
+ 	struct ubi_vid_hdr *vid_hdr;
++	uint32_t crc;
+ 
+ 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
+ 	if (!vid_hdr)
+@@ -599,14 +600,8 @@ retry:
+ 		goto out_put;
+ 	}
+ 
+-	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
+-	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+-	if (err) {
+-		up_read(&ubi->fm_eba_sem);
+-		goto write_error;
+-	}
++	ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC);
+ 
+-	data_size = offset + len;
+ 	mutex_lock(&ubi->buf_mutex);
+ 	memset(ubi->peb_buf + offset, 0xFF, len);
+ 
+@@ -621,6 +616,19 @@ retry:
+ 
+ 	memcpy(ubi->peb_buf + offset, buf, len);
+ 
++	data_size = offset + len;
++	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
++	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
++	vid_hdr->copy_flag = 1;
++	vid_hdr->data_size = cpu_to_be32(data_size);
++	vid_hdr->data_crc = cpu_to_be32(crc);
++	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
++	if (err) {
++		mutex_unlock(&ubi->buf_mutex);
++		up_read(&ubi->fm_eba_sem);
++		goto write_error;
++	}
++
+ 	err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size);
+ 	if (err) {
+ 		mutex_unlock(&ubi->buf_mutex);
+diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
+index 9fcb4898fb68..c70e51567eed 100644
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -1092,12 +1092,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
+ 
+ static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
+ {
++	struct geneve_dev *geneve = netdev_priv(dev);
+ 	/* The max_mtu calculation does not take account of GENEVE
+ 	 * options, to avoid excluding potentially valid
+ 	 * configurations.
+ 	 */
+-	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
+-		- dev->hard_header_len;
++	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
++
++	if (geneve->remote.sa.sa_family == AF_INET6)
++		max_mtu -= sizeof(struct ipv6hdr);
++	else
++		max_mtu -= sizeof(struct iphdr);
+ 
+ 	if (new_mtu < 68)
+ 		return -EINVAL;
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 9e803bbcc0b6..8f3c55d03d5d 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -2564,6 +2564,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
+ 		u64_stats_update_begin(&secy_stats->syncp);
+ 		secy_stats->stats.OutPktsUntagged++;
+ 		u64_stats_update_end(&secy_stats->syncp);
++		skb->dev = macsec->real_dev;
+ 		len = skb->len;
+ 		ret = dev_queue_xmit(skb);
+ 		count_tx(dev, ret, len);
+diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
+index 2fb31edab125..d4425c565839 100644
+--- a/drivers/net/usb/cdc_ncm.c
++++ b/drivers/net/usb/cdc_ncm.c
+@@ -852,6 +852,13 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
+ 	if (cdc_ncm_init(dev))
+ 		goto error2;
+ 
++	/* Some firmwares need a pause here or they will silently fail
++	 * to set up the interface properly.  This value was decided
++	 * empirically on a Sierra Wireless MC7455 running 02.08.02.00
++	 * firmware.
++	 */
++	usleep_range(10000, 20000);
++
+ 	/* configure data interface */
+ 	temp = usb_set_interface(dev->udev, iface_no, data_altsetting);
+ 	if (temp) {
+diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
+index e85e0737771c..06664baa43d9 100644
+--- a/drivers/net/wireless/mac80211_hwsim.c
++++ b/drivers/net/wireless/mac80211_hwsim.c
+@@ -2771,6 +2771,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
+ 	if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] ||
+ 	    !info->attrs[HWSIM_ATTR_FLAGS] ||
+ 	    !info->attrs[HWSIM_ATTR_COOKIE] ||
++	    !info->attrs[HWSIM_ATTR_SIGNAL] ||
+ 	    !info->attrs[HWSIM_ATTR_TX_INFO])
+ 		goto out;
+ 
+diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
+index 0f48048b8654..3a0faa8fe9d4 100644
+--- a/drivers/net/wireless/realtek/rtlwifi/core.c
++++ b/drivers/net/wireless/realtek/rtlwifi/core.c
+@@ -54,7 +54,7 @@ EXPORT_SYMBOL(channel5g_80m);
+ void rtl_addr_delay(u32 addr)
+ {
+ 	if (addr == 0xfe)
+-		msleep(50);
++		mdelay(50);
+ 	else if (addr == 0xfd)
+ 		msleep(5);
+ 	else if (addr == 0xfc)
+@@ -75,7 +75,7 @@ void rtl_rfreg_delay(struct ieee80211_hw *hw, enum radio_path rfpath, u32 addr,
+ 		rtl_addr_delay(addr);
+ 	} else {
+ 		rtl_set_rfreg(hw, rfpath, addr, mask, data);
+-		usleep_range(1, 2);
++		udelay(1);
+ 	}
+ }
+ EXPORT_SYMBOL(rtl_rfreg_delay);
+@@ -86,7 +86,7 @@ void rtl_bb_delay(struct ieee80211_hw *hw, u32 addr, u32 data)
+ 		rtl_addr_delay(addr);
+ 	} else {
+ 		rtl_set_bbreg(hw, addr, MASKDWORD, data);
+-		usleep_range(1, 2);
++		udelay(1);
+ 	}
+ }
+ EXPORT_SYMBOL(rtl_bb_delay);
+diff --git a/drivers/of/irq.c b/drivers/of/irq.c
+index e7bfc175b8e1..6ec743faabe8 100644
+--- a/drivers/of/irq.c
++++ b/drivers/of/irq.c
+@@ -386,13 +386,13 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
+ EXPORT_SYMBOL_GPL(of_irq_to_resource);
+ 
+ /**
+- * of_irq_get - Decode a node's IRQ and return it as a Linux irq number
++ * of_irq_get - Decode a node's IRQ and return it as a Linux IRQ number
+  * @dev: pointer to device tree node
+- * @index: zero-based index of the irq
+- *
+- * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
+- * is not yet created.
++ * @index: zero-based index of the IRQ
+  *
++ * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or
++ * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case
++ * of any other failure.
+  */
+ int of_irq_get(struct device_node *dev, int index)
+ {
+@@ -413,12 +413,13 @@ int of_irq_get(struct device_node *dev, int index)
+ EXPORT_SYMBOL_GPL(of_irq_get);
+ 
+ /**
+- * of_irq_get_byname - Decode a node's IRQ and return it as a Linux irq number
++ * of_irq_get_byname - Decode a node's IRQ and return it as a Linux IRQ number
+  * @dev: pointer to device tree node
+- * @name: irq name
++ * @name: IRQ name
+  *
+- * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
+- * is not yet created, or error code in case of any other failure.
++ * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or
++ * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case
++ * of any other failure.
+  */
+ int of_irq_get_byname(struct device_node *dev, const char *name)
+ {
+diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
+index dfbab61a1b47..1fa3a3219c45 100644
+--- a/drivers/pci/vc.c
++++ b/drivers/pci/vc.c
+@@ -221,9 +221,9 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos,
+ 		else
+ 			pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL,
+ 					      *(u16 *)buf);
+-		buf += 2;
++		buf += 4;
+ 	}
+-	len += 2;
++	len += 4;
+ 
+ 	/*
+ 	 * If we have any Low Priority VCs and a VC Arbitration Table Offset
+diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
+index 56a17ec5b5ef..6c7fe4778793 100644
+--- a/drivers/regulator/qcom_smd-regulator.c
++++ b/drivers/regulator/qcom_smd-regulator.c
+@@ -140,6 +140,18 @@ static const struct regulator_ops rpm_smps_ldo_ops = {
+ 	.enable = rpm_reg_enable,
+ 	.disable = rpm_reg_disable,
+ 	.is_enabled = rpm_reg_is_enabled,
++	.list_voltage = regulator_list_voltage_linear_range,
++
++	.get_voltage = rpm_reg_get_voltage,
++	.set_voltage = rpm_reg_set_voltage,
++
++	.set_load = rpm_reg_set_load,
++};
++
++static const struct regulator_ops rpm_smps_ldo_ops_fixed = {
++	.enable = rpm_reg_enable,
++	.disable = rpm_reg_disable,
++	.is_enabled = rpm_reg_is_enabled,
+ 
+ 	.get_voltage = rpm_reg_get_voltage,
+ 	.set_voltage = rpm_reg_set_voltage,
+@@ -247,7 +259,7 @@ static const struct regulator_desc pm8941_nldo = {
+ static const struct regulator_desc pm8941_lnldo = {
+ 	.fixed_uV = 1740000,
+ 	.n_voltages = 1,
+-	.ops = &rpm_smps_ldo_ops,
++	.ops = &rpm_smps_ldo_ops_fixed,
+ };
+ 
+ static const struct regulator_desc pm8941_switch = {
+diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
+index d4c285688ce9..3ddc85e6efd6 100644
+--- a/drivers/scsi/53c700.c
++++ b/drivers/scsi/53c700.c
+@@ -1122,7 +1122,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
+ 		} else {
+ 			struct scsi_cmnd *SCp;
+ 
+-			SCp = scsi_host_find_tag(SDp->host, SCSI_NO_TAG);
++			SCp = SDp->current_cmnd;
+ 			if(unlikely(SCp == NULL)) {
+ 				sdev_printk(KERN_ERR, SDp,
+ 					"no saved request for untagged cmd\n");
+@@ -1826,7 +1826,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
+ 		       slot->tag, slot);
+ 	} else {
+ 		slot->tag = SCSI_NO_TAG;
+-		/* must populate current_cmnd for scsi_host_find_tag to work */
++		/* save current command for reselection */
+ 		SCp->device->current_cmnd = SCp;
+ 	}
+ 	/* sanity check: some of the commands generated by the mid-layer
+diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
+index 984ddcb4786d..1b9c049bd5c5 100644
+--- a/drivers/scsi/scsi_error.c
++++ b/drivers/scsi/scsi_error.c
+@@ -1127,7 +1127,6 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
+  */
+ void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
+ {
+-	scmd->device->host->host_failed--;
+ 	scmd->eh_eflags = 0;
+ 	list_move_tail(&scmd->eh_entry, done_q);
+ }
+@@ -2226,6 +2225,9 @@ int scsi_error_handler(void *data)
+ 		else
+ 			scsi_unjam_host(shost);
+ 
++		/* All scmds have been handled */
++		shost->host_failed = 0;
++
+ 		/*
+ 		 * Note - if the above fails completely, the action is to take
+ 		 * individual devices offline and flush the queue of any
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index f52b74cf8d1e..41c3a2c4f112 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -2862,10 +2862,10 @@ static int sd_revalidate_disk(struct gendisk *disk)
+ 	if (sdkp->opt_xfer_blocks &&
+ 	    sdkp->opt_xfer_blocks <= dev_max &&
+ 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
+-	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
+-		rw_max = q->limits.io_opt =
+-			sdkp->opt_xfer_blocks * sdp->sector_size;
+-	else
++	    logical_to_bytes(sdp, sdkp->opt_xfer_blocks) >= PAGE_SIZE) {
++		q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
++		rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
++	} else
+ 		rw_max = BLK_DEF_MAX_SECTORS;
+ 
+ 	/* Combine with controller limits */
+diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
+index 654630bb7d0e..765a6f1ac1b7 100644
+--- a/drivers/scsi/sd.h
++++ b/drivers/scsi/sd.h
+@@ -151,6 +151,11 @@ static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blo
+ 	return blocks << (ilog2(sdev->sector_size) - 9);
+ }
+ 
++static inline unsigned int logical_to_bytes(struct scsi_device *sdev, sector_t blocks)
++{
++	return blocks * sdev->sector_size;
++}
++
+ /*
+  * A DIF-capable target device can be formatted with different
+  * protection schemes.  Currently 0 through 3 are defined:
+diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c
+index a8f533af9eca..ec12181822e6 100644
+--- a/drivers/staging/iio/accel/sca3000_core.c
++++ b/drivers/staging/iio/accel/sca3000_core.c
+@@ -594,7 +594,7 @@ static ssize_t sca3000_read_frequency(struct device *dev,
+ 		goto error_ret_mut;
+ 	ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL);
+ 	mutex_unlock(&st->lock);
+-	if (ret)
++	if (ret < 0)
+ 		goto error_ret;
+ 	val = ret;
+ 	if (base_freq > 0)
+diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
+index 6ceac4f2d4b2..5b4b47ed948b 100644
+--- a/drivers/thermal/cpu_cooling.c
++++ b/drivers/thermal/cpu_cooling.c
+@@ -857,14 +857,6 @@ __cpufreq_cooling_register(struct device_node *np,
+ 		goto free_power_table;
+ 	}
+ 
+-	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
+-		 cpufreq_dev->id);
+-
+-	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
+-						      &cpufreq_cooling_ops);
+-	if (IS_ERR(cool_dev))
+-		goto remove_idr;
+-
+ 	/* Fill freq-table in descending order of frequencies */
+ 	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
+ 		freq = find_next_max(table, freq);
+@@ -877,6 +869,14 @@ __cpufreq_cooling_register(struct device_node *np,
+ 			pr_debug("%s: freq:%u KHz\n", __func__, freq);
+ 	}
+ 
++	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
++		 cpufreq_dev->id);
++
++	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
++						      &cpufreq_cooling_ops);
++	if (IS_ERR(cool_dev))
++		goto remove_idr;
++
+ 	cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
+ 	cpufreq_dev->cool_dev = cool_dev;
+ 
+diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
+index f973bfce5d08..1e93a37e27f0 100644
+--- a/drivers/tty/vt/keyboard.c
++++ b/drivers/tty/vt/keyboard.c
+@@ -366,34 +366,22 @@ static void to_utf8(struct vc_data *vc, uint c)
+ 
+ static void do_compute_shiftstate(void)
+ {
+-	unsigned int i, j, k, sym, val;
++	unsigned int k, sym, val;
+ 
+ 	shift_state = 0;
+ 	memset(shift_down, 0, sizeof(shift_down));
+ 
+-	for (i = 0; i < ARRAY_SIZE(key_down); i++) {
+-
+-		if (!key_down[i])
++	for_each_set_bit(k, key_down, min(NR_KEYS, KEY_CNT)) {
++		sym = U(key_maps[0][k]);
++		if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
+ 			continue;
+ 
+-		k = i * BITS_PER_LONG;
+-
+-		for (j = 0; j < BITS_PER_LONG; j++, k++) {
+-
+-			if (!test_bit(k, key_down))
+-				continue;
++		val = KVAL(sym);
++		if (val == KVAL(K_CAPSSHIFT))
++			val = KVAL(K_SHIFT);
+ 
+-			sym = U(key_maps[0][k]);
+-			if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
+-				continue;
+-
+-			val = KVAL(sym);
+-			if (val == KVAL(K_CAPSSHIFT))
+-				val = KVAL(K_SHIFT);
+-
+-			shift_down[val]++;
+-			shift_state |= (1 << val);
+-		}
++		shift_down[val]++;
++		shift_state |= BIT(val);
+ 	}
+ }
+ 
+diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
+index bd523adb9643..e9e29ded3f30 100644
+--- a/drivers/tty/vt/vt.c
++++ b/drivers/tty/vt/vt.c
+@@ -750,6 +750,7 @@ static void visual_init(struct vc_data *vc, int num, int init)
+ 	vc->vc_complement_mask = 0;
+ 	vc->vc_can_do_color = 0;
+ 	vc->vc_panic_force_write = false;
++	vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
+ 	vc->vc_sw->con_init(vc, init);
+ 	if (!vc->vc_complement_mask)
+ 		vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
+diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c
+index 504708f59b93..6c6040c22c7a 100644
+--- a/drivers/usb/common/usb-otg-fsm.c
++++ b/drivers/usb/common/usb-otg-fsm.c
+@@ -21,6 +21,7 @@
+  * 675 Mass Ave, Cambridge, MA 02139, USA.
+  */
+ 
++#include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <linux/mutex.h>
+@@ -452,3 +453,4 @@ int otg_statemachine(struct otg_fsm *fsm)
+ 	return state_changed;
+ }
+ EXPORT_SYMBOL_GPL(otg_statemachine);
++MODULE_LICENSE("GPL");
+diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
+index 980fc5774151..2d107d0f61b0 100644
+--- a/drivers/usb/core/hcd.c
++++ b/drivers/usb/core/hcd.c
+@@ -2597,26 +2597,23 @@ EXPORT_SYMBOL_GPL(usb_create_hcd);
+  * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is
+  * deallocated.
+  *
+- * Make sure to only deallocate the bandwidth_mutex when the primary HCD is
+- * freed.  When hcd_release() is called for either hcd in a peer set
+- * invalidate the peer's ->shared_hcd and ->primary_hcd pointers to
+- * block new peering attempts
++ * Make sure to deallocate the bandwidth_mutex only when the last HCD is
++ * freed.  When hcd_release() is called for either hcd in a peer set,
++ * invalidate the peer's ->shared_hcd and ->primary_hcd pointers.
+  */
+ static void hcd_release(struct kref *kref)
+ {
+ 	struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);
+ 
+ 	mutex_lock(&usb_port_peer_mutex);
+-	if (usb_hcd_is_primary_hcd(hcd)) {
+-		kfree(hcd->address0_mutex);
+-		kfree(hcd->bandwidth_mutex);
+-	}
+ 	if (hcd->shared_hcd) {
+ 		struct usb_hcd *peer = hcd->shared_hcd;
+ 
+ 		peer->shared_hcd = NULL;
+-		if (peer->primary_hcd == hcd)
+-			peer->primary_hcd = NULL;
++		peer->primary_hcd = NULL;
++	} else {
++		kfree(hcd->address0_mutex);
++		kfree(hcd->bandwidth_mutex);
+ 	}
+ 	mutex_unlock(&usb_port_peer_mutex);
+ 	kfree(hcd);
+diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
+index 3c58d633ce80..dec0b21fc626 100644
+--- a/drivers/usb/dwc2/core.h
++++ b/drivers/usb/dwc2/core.h
+@@ -64,6 +64,17 @@
+ 	DWC2_TRACE_SCHEDULER_VB(pr_fmt("%s: SCH: " fmt),		\
+ 				dev_name(hsotg->dev), ##__VA_ARGS__)
+ 
++#ifdef CONFIG_MIPS
++/*
++ * There are some MIPS machines that can run in either big-endian
++ * or little-endian mode and that use the dwc2 register without
++ * a byteswap in both ways.
++ * Unlike other architectures, MIPS apparently does not require a
++ * barrier before the __raw_writel() to synchronize with DMA but does
++ * require the barrier after the __raw_writel() to serialize a set of
++ * writes. This set of operations was added specifically for MIPS and
++ * should only be used there.
++ */
+ static inline u32 dwc2_readl(const void __iomem *addr)
+ {
+ 	u32 value = __raw_readl(addr);
+@@ -90,6 +101,22 @@ static inline void dwc2_writel(u32 value, void __iomem *addr)
+ 	pr_info("INFO:: wrote %08x to %p\n", value, addr);
+ #endif
+ }
++#else
++/* Normal architectures just use readl/write */
++static inline u32 dwc2_readl(const void __iomem *addr)
++{
++	return readl(addr);
++}
++
++static inline void dwc2_writel(u32 value, void __iomem *addr)
++{
++	writel(value, addr);
++
++#ifdef DWC2_LOG_WRITES
++	pr_info("info:: wrote %08x to %p\n", value, addr);
++#endif
++}
++#endif
+ 
+ /* Maximum number of Endpoints/HostChannels */
+ #define MAX_EPS_CHANNELS	16
+diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
+index 7b6d74f0c72f..476c0e3a7150 100644
+--- a/drivers/virtio/virtio_balloon.c
++++ b/drivers/virtio/virtio_balloon.c
+@@ -75,7 +75,7 @@ struct virtio_balloon {
+ 
+ 	/* The array of pfns we tell the Host about. */
+ 	unsigned int num_pfns;
+-	u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
++	__virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
+ 
+ 	/* Memory statistics */
+ 	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
+@@ -127,14 +127,16 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+ 
+ }
+ 
+-static void set_page_pfns(u32 pfns[], struct page *page)
++static void set_page_pfns(struct virtio_balloon *vb,
++			  __virtio32 pfns[], struct page *page)
+ {
+ 	unsigned int i;
+ 
+ 	/* Set balloon pfns pointing at this page.
+ 	 * Note that the first pfn points at start of the page. */
+ 	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
+-		pfns[i] = page_to_balloon_pfn(page) + i;
++		pfns[i] = cpu_to_virtio32(vb->vdev,
++					  page_to_balloon_pfn(page) + i);
+ }
+ 
+ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
+@@ -158,7 +160,7 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
+ 			msleep(200);
+ 			break;
+ 		}
+-		set_page_pfns(vb->pfns + vb->num_pfns, page);
++		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+ 		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
+ 		if (!virtio_has_feature(vb->vdev,
+ 					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+@@ -177,10 +179,12 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
+ static void release_pages_balloon(struct virtio_balloon *vb)
+ {
+ 	unsigned int i;
++	struct page *page;
+ 
+ 	/* Find pfns pointing at start of each page, get pages and free them. */
+ 	for (i = 0; i < vb->num_pfns; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
+-		struct page *page = balloon_pfn_to_page(vb->pfns[i]);
++		page = balloon_pfn_to_page(virtio32_to_cpu(vb->vdev,
++							   vb->pfns[i]));
+ 		if (!virtio_has_feature(vb->vdev,
+ 					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+ 			adjust_managed_page_count(page, 1);
+@@ -203,7 +207,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
+ 		page = balloon_page_dequeue(vb_dev_info);
+ 		if (!page)
+ 			break;
+-		set_page_pfns(vb->pfns + vb->num_pfns, page);
++		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+ 		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
+ 	}
+ 
+@@ -471,13 +475,13 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
+ 	__count_vm_event(BALLOON_MIGRATE);
+ 	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
+ 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+-	set_page_pfns(vb->pfns, newpage);
++	set_page_pfns(vb, vb->pfns, newpage);
+ 	tell_host(vb, vb->inflate_vq);
+ 
+ 	/* balloon's page migration 2nd step -- deflate "page" */
+ 	balloon_page_delete(page);
+ 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+-	set_page_pfns(vb->pfns, page);
++	set_page_pfns(vb, vb->pfns, page);
+ 	tell_host(vb, vb->deflate_vq);
+ 
+ 	mutex_unlock(&vb->balloon_lock);
+diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
+index d46839f51e73..e4db19e88ab1 100644
+--- a/drivers/xen/balloon.c
++++ b/drivers/xen/balloon.c
+@@ -151,8 +151,6 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
+ static void balloon_process(struct work_struct *work);
+ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
+ 
+-static void release_memory_resource(struct resource *resource);
+-
+ /* When ballooning out (allocating memory to return to Xen) we don't really
+    want the kernel to try too hard since that can trigger the oom killer. */
+ #define GFP_BALLOON \
+@@ -248,6 +246,19 @@ static enum bp_state update_schedule(enum bp_state state)
+ }
+ 
+ #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
++static void release_memory_resource(struct resource *resource)
++{
++	if (!resource)
++		return;
++
++	/*
++	 * No need to reset region to identity mapped since we now
++	 * know that no I/O can be in this region
++	 */
++	release_resource(resource);
++	kfree(resource);
++}
++
+ static struct resource *additional_memory_resource(phys_addr_t size)
+ {
+ 	struct resource *res;
+@@ -286,19 +297,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
+ 	return res;
+ }
+ 
+-static void release_memory_resource(struct resource *resource)
+-{
+-	if (!resource)
+-		return;
+-
+-	/*
+-	 * No need to reset region to identity mapped since we now
+-	 * know that no I/O can be in this region
+-	 */
+-	release_resource(resource);
+-	kfree(resource);
+-}
+-
+ static enum bp_state reserve_additional_memory(void)
+ {
+ 	long credit;
+diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
+index 076970a54f89..4ce10bcca18b 100644
+--- a/drivers/xen/xen-acpi-processor.c
++++ b/drivers/xen/xen-acpi-processor.c
+@@ -423,36 +423,7 @@ upload:
+ 
+ 	return 0;
+ }
+-static int __init check_prereq(void)
+-{
+-	struct cpuinfo_x86 *c = &cpu_data(0);
+-
+-	if (!xen_initial_domain())
+-		return -ENODEV;
+-
+-	if (!acpi_gbl_FADT.smi_command)
+-		return -ENODEV;
+-
+-	if (c->x86_vendor == X86_VENDOR_INTEL) {
+-		if (!cpu_has(c, X86_FEATURE_EST))
+-			return -ENODEV;
+ 
+-		return 0;
+-	}
+-	if (c->x86_vendor == X86_VENDOR_AMD) {
+-		/* Copied from powernow-k8.h, can't include ../cpufreq/powernow
+-		 * as we get compile warnings for the static functions.
+-		 */
+-#define CPUID_FREQ_VOLT_CAPABILITIES    0x80000007
+-#define USE_HW_PSTATE                   0x00000080
+-		u32 eax, ebx, ecx, edx;
+-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+-		if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE)
+-			return -ENODEV;
+-		return 0;
+-	}
+-	return -ENODEV;
+-}
+ /* acpi_perf_data is a pointer to percpu data. */
+ static struct acpi_processor_performance __percpu *acpi_perf_data;
+ 
+@@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor_resume_nb = {
+ static int __init xen_acpi_processor_init(void)
+ {
+ 	unsigned int i;
+-	int rc = check_prereq();
++	int rc;
+ 
+-	if (rc)
+-		return rc;
++	if (!xen_initial_domain())
++		return -ENODEV;
+ 
+ 	nr_acpi_bits = get_max_acpi_id() + 1;
+ 	acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
+diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
+index ec7928a27aaa..234707cc419c 100644
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -1552,6 +1552,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
+ 		       trans->transid, root->fs_info->generation);
+ 
+ 	if (!should_cow_block(trans, root, buf)) {
++		trans->dirty = true;
+ 		*cow_ret = buf;
+ 		return 0;
+ 	}
+@@ -2773,8 +2774,10 @@ again:
+ 			 * then we don't want to set the path blocking,
+ 			 * so we test it here
+ 			 */
+-			if (!should_cow_block(trans, root, b))
++			if (!should_cow_block(trans, root, b)) {
++				trans->dirty = true;
+ 				goto cow_done;
++			}
+ 
+ 			/*
+ 			 * must have write locks on this node and the
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 84e060eb0de8..78f1b57d0b46 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -7929,7 +7929,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ 		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
+ 			 buf->start + buf->len - 1, GFP_NOFS);
+ 	}
+-	trans->blocks_used++;
++	trans->dirty = true;
+ 	/* this returns a buffer locked for blocking */
+ 	return buf;
+ }
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index 00b8f37cc306..d7c138f42bdf 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -239,7 +239,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
+ 	trans->aborted = errno;
+ 	/* Nothing used. The other threads that have joined this
+ 	 * transaction may be able to continue. */
+-	if (!trans->blocks_used && list_empty(&trans->new_bgs)) {
++	if (!trans->dirty && list_empty(&trans->new_bgs)) {
+ 		const char *errstr;
+ 
+ 		errstr = btrfs_decode_error(errno);
+diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
+index 72be51f7ca2f..c0b501a5a353 100644
+--- a/fs/btrfs/transaction.h
++++ b/fs/btrfs/transaction.h
+@@ -110,7 +110,6 @@ struct btrfs_trans_handle {
+ 	u64 chunk_bytes_reserved;
+ 	unsigned long use_count;
+ 	unsigned long blocks_reserved;
+-	unsigned long blocks_used;
+ 	unsigned long delayed_ref_updates;
+ 	struct btrfs_transaction *transaction;
+ 	struct btrfs_block_rsv *block_rsv;
+@@ -121,6 +120,7 @@ struct btrfs_trans_handle {
+ 	bool can_flush_pending_bgs;
+ 	bool reloc_reserved;
+ 	bool sync;
++	bool dirty;
+ 	unsigned int type;
+ 	/*
+ 	 * this root is only needed to validate that the root passed to
+diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
+index 5a53ac6b1e02..02b071bf3732 100644
+--- a/fs/cifs/cifs_unicode.c
++++ b/fs/cifs/cifs_unicode.c
+@@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target)
+ 	case SFM_SLASH:
+ 		*target = '\\';
+ 		break;
++	case SFM_SPACE:
++		*target = ' ';
++		break;
++	case SFM_PERIOD:
++		*target = '.';
++		break;
+ 	default:
+ 		return false;
+ 	}
+@@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char)
+ 	return dest_char;
+ }
+ 
+-static __le16 convert_to_sfm_char(char src_char)
++static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
+ {
+ 	__le16 dest_char;
+ 
+@@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char)
+ 	case '|':
+ 		dest_char = cpu_to_le16(SFM_PIPE);
+ 		break;
++	case '.':
++		if (end_of_string)
++			dest_char = cpu_to_le16(SFM_PERIOD);
++		else
++			dest_char = 0;
++		break;
++	case ' ':
++		if (end_of_string)
++			dest_char = cpu_to_le16(SFM_SPACE);
++		else
++			dest_char = 0;
++		break;
+ 	default:
+ 		dest_char = 0;
+ 	}
+@@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
+ 		/* see if we must remap this char */
+ 		if (map_chars == SFU_MAP_UNI_RSVD)
+ 			dst_char = convert_to_sfu_char(src_char);
+-		else if (map_chars == SFM_MAP_UNI_RSVD)
+-			dst_char = convert_to_sfm_char(src_char);
+-		else
++		else if (map_chars == SFM_MAP_UNI_RSVD) {
++			bool end_of_string;
++
++			if (i == srclen - 1)
++				end_of_string = true;
++			else
++				end_of_string = false;
++
++			dst_char = convert_to_sfm_char(src_char, end_of_string);
++		} else
+ 			dst_char = 0;
+ 		/*
+ 		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
+diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
+index bdc52cb9a676..479bc0a941f3 100644
+--- a/fs/cifs/cifs_unicode.h
++++ b/fs/cifs/cifs_unicode.h
+@@ -64,6 +64,8 @@
+ #define SFM_LESSTHAN    ((__u16) 0xF023)
+ #define SFM_PIPE        ((__u16) 0xF027)
+ #define SFM_SLASH       ((__u16) 0xF026)
++#define SFM_PERIOD	((__u16) 0xF028)
++#define SFM_SPACE	((__u16) 0xF029)
+ 
+ /*
+  * Mapping mechanism to use when one of the seven reserved characters is
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index 6f62ac821a84..34cbc582e8d7 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work)
+ 	 * server->ops->need_neg() == true. Also, no need to ping if
+ 	 * we got a response recently.
+ 	 */
+-	if (!server->ops->need_neg || server->ops->need_neg(server) ||
++
++	if (server->tcpStatus == CifsNeedReconnect ||
++	    server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew ||
+ 	    (server->ops->can_echo && !server->ops->can_echo(server)) ||
+ 	    time_before(jiffies, server->lstrp + echo_interval - HZ))
+ 		goto requeue_echo;
+diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
+index 848249fa120f..3079b38f0afb 100644
+--- a/fs/cifs/ntlmssp.h
++++ b/fs/cifs/ntlmssp.h
+@@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE {
+ 
+ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
+ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
+-int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen,
++int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
+ 			struct cifs_ses *ses,
+ 			const struct nls_table *nls_cp);
+diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
+index af0ec2d5ad0e..e88ffe1da045 100644
+--- a/fs/cifs/sess.c
++++ b/fs/cifs/sess.c
+@@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
+ 	sec_blob->DomainName.MaximumLength = 0;
+ }
+ 
+-/* We do not malloc the blob, it is passed in pbuffer, because its
+-   maximum possible size is fixed and small, making this approach cleaner.
+-   This function returns the length of the data in the blob */
+-int build_ntlmssp_auth_blob(unsigned char *pbuffer,
++static int size_of_ntlmssp_blob(struct cifs_ses *ses)
++{
++	int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
++		- CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
++
++	if (ses->domainName)
++		sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
++	else
++		sz += 2;
++
++	if (ses->user_name)
++		sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
++	else
++		sz += 2;
++
++	return sz;
++}
++
++int build_ntlmssp_auth_blob(unsigned char **pbuffer,
+ 					u16 *buflen,
+ 				   struct cifs_ses *ses,
+ 				   const struct nls_table *nls_cp)
+ {
+ 	int rc;
+-	AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
++	AUTHENTICATE_MESSAGE *sec_blob;
+ 	__u32 flags;
+ 	unsigned char *tmp;
+ 
++	rc = setup_ntlmv2_rsp(ses, nls_cp);
++	if (rc) {
++		cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
++		*buflen = 0;
++		goto setup_ntlmv2_ret;
++	}
++	*pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
++	sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
++
+ 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
+ 	sec_blob->MessageType = NtLmAuthenticate;
+ 
+@@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+ 			flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+ 	}
+ 
+-	tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
++	tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
+ 	sec_blob->NegotiateFlags = cpu_to_le32(flags);
+ 
+ 	sec_blob->LmChallengeResponse.BufferOffset =
+@@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+ 	sec_blob->LmChallengeResponse.Length = 0;
+ 	sec_blob->LmChallengeResponse.MaximumLength = 0;
+ 
+-	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
++	sec_blob->NtChallengeResponse.BufferOffset =
++				cpu_to_le32(tmp - *pbuffer);
+ 	if (ses->user_name != NULL) {
+-		rc = setup_ntlmv2_rsp(ses, nls_cp);
+-		if (rc) {
+-			cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+-			goto setup_ntlmv2_ret;
+-		}
+ 		memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ 				ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ 		tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+@@ -423,7 +443,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+ 	}
+ 
+ 	if (ses->domainName == NULL) {
+-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
++		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
+ 		sec_blob->DomainName.Length = 0;
+ 		sec_blob->DomainName.MaximumLength = 0;
+ 		tmp += 2;
+@@ -432,14 +452,14 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+ 		len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
+ 				      CIFS_MAX_USERNAME_LEN, nls_cp);
+ 		len *= 2; /* unicode is 2 bytes each */
+-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
++		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
+ 		sec_blob->DomainName.Length = cpu_to_le16(len);
+ 		sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
+ 		tmp += len;
+ 	}
+ 
+ 	if (ses->user_name == NULL) {
+-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
++		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
+ 		sec_blob->UserName.Length = 0;
+ 		sec_blob->UserName.MaximumLength = 0;
+ 		tmp += 2;
+@@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+ 		len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
+ 				      CIFS_MAX_USERNAME_LEN, nls_cp);
+ 		len *= 2; /* unicode is 2 bytes each */
+-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
++		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
+ 		sec_blob->UserName.Length = cpu_to_le16(len);
+ 		sec_blob->UserName.MaximumLength = cpu_to_le16(len);
+ 		tmp += len;
+ 	}
+ 
+-	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
++	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
+ 	sec_blob->WorkstationName.Length = 0;
+ 	sec_blob->WorkstationName.MaximumLength = 0;
+ 	tmp += 2;
+@@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+ 		(ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
+ 			&& !calc_seckey(ses)) {
+ 		memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
++		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
+ 		sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
+ 		sec_blob->SessionKey.MaximumLength =
+ 				cpu_to_le16(CIFS_CPHTXT_SIZE);
+ 		tmp += CIFS_CPHTXT_SIZE;
+ 	} else {
+-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
++		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
+ 		sec_blob->SessionKey.Length = 0;
+ 		sec_blob->SessionKey.MaximumLength = 0;
+ 	}
+ 
++	*buflen = tmp - *pbuffer;
+ setup_ntlmv2_ret:
+-	*buflen = tmp - pbuffer;
+ 	return rc;
+ }
+ 
+@@ -1266,7 +1286,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
+ 	struct cifs_ses *ses = sess_data->ses;
+ 	__u16 bytes_remaining;
+ 	char *bcc_ptr;
+-	char *ntlmsspblob = NULL;
++	unsigned char *ntlmsspblob = NULL;
+ 	u16 blob_len;
+ 
+ 	cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
+@@ -1279,19 +1299,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
+ 	/* Build security blob before we assemble the request */
+ 	pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
+ 	smb_buf = (struct smb_hdr *)pSMB;
+-	/*
+-	 * 5 is an empirical value, large enough to hold
+-	 * authenticate message plus max 10 of av paris,
+-	 * domain, user, workstation names, flags, etc.
+-	 */
+-	ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
+-				GFP_KERNEL);
+-	if (!ntlmsspblob) {
+-		rc = -ENOMEM;
+-		goto out;
+-	}
+-
+-	rc = build_ntlmssp_auth_blob(ntlmsspblob,
++	rc = build_ntlmssp_auth_blob(&ntlmsspblob,
+ 					&blob_len, ses, sess_data->nls_cp);
+ 	if (rc)
+ 		goto out_free_ntlmsspblob;
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index 8f38e33d365b..29e06db5f187 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
+ 	u16 blob_length = 0;
+ 	struct key *spnego_key = NULL;
+ 	char *security_blob = NULL;
+-	char *ntlmssp_blob = NULL;
++	unsigned char *ntlmssp_blob = NULL;
+ 	bool use_spnego = false; /* else use raw ntlmssp */
+ 
+ 	cifs_dbg(FYI, "Session Setup\n");
+@@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate:
+ 		iov[1].iov_len = blob_length;
+ 	} else if (phase == NtLmAuthenticate) {
+ 		req->hdr.SessionId = ses->Suid;
+-		ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
+-				       GFP_KERNEL);
+-		if (ntlmssp_blob == NULL) {
+-			rc = -ENOMEM;
+-			goto ssetup_exit;
+-		}
+-		rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses,
++		rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
+ 					     nls_cp);
+ 		if (rc) {
+ 			cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
+@@ -1818,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server)
+ 
+ 	cifs_dbg(FYI, "In echo request\n");
+ 
++	if (server->tcpStatus == CifsNeedNegotiate) {
++		struct list_head *tmp, *tmp2;
++		struct cifs_ses *ses;
++		struct cifs_tcon *tcon;
++
++		cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
++		spin_lock(&cifs_tcp_ses_lock);
++		list_for_each(tmp, &server->smb_ses_list) {
++			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
++			list_for_each(tmp2, &ses->tcon_list) {
++				tcon = list_entry(tmp2, struct cifs_tcon,
++						  tcon_list);
++				/* add check for persistent handle reconnect */
++				if (tcon && tcon->need_reconnect) {
++					spin_unlock(&cifs_tcp_ses_lock);
++					rc = smb2_reconnect(SMB2_ECHO, tcon);
++					spin_lock(&cifs_tcp_ses_lock);
++				}
++			}
++		}
++		spin_unlock(&cifs_tcp_ses_lock);
++	}
++
++	/* if no session, renegotiate failed above */
++	if (server->tcpStatus == CifsNeedNegotiate)
++		return -EIO;
++
+ 	rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
+ 	if (rc)
+ 		return rc;
+diff --git a/fs/namei.c b/fs/namei.c
+index 30145f8f21ed..aaa3b693ec0b 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -3173,6 +3173,10 @@ retry_lookup:
+ 		got_write = false;
+ 	}
+ 
++	error = follow_managed(&path, nd);
++	if (unlikely(error < 0))
++		return error;
++
+ 	if (unlikely(d_is_negative(path.dentry))) {
+ 		path_to_nameidata(&path, nd);
+ 		return -ENOENT;
+@@ -3188,10 +3192,6 @@ retry_lookup:
+ 		return -EEXIST;
+ 	}
+ 
+-	error = follow_managed(&path, nd);
+-	if (unlikely(error < 0))
+-		return error;
+-
+ 	seq = 0;	/* out of RCU mode, so the value doesn't matter */
+ 	inode = d_backing_inode(path.dentry);
+ finish_lookup:
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 4fb1691b4355..783004af5707 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2409,8 +2409,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
+ 			mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
+ 		}
+ 		if (type->fs_flags & FS_USERNS_VISIBLE) {
+-			if (!fs_fully_visible(type, &mnt_flags))
++			if (!fs_fully_visible(type, &mnt_flags)) {
++				put_filesystem(type);
+ 				return -EPERM;
++			}
+ 		}
+ 	}
+ 
+@@ -3245,6 +3247,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
+ 		if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
+ 			mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
+ 
++		/* Don't miss readonly hidden in the superblock flags */
++		if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
++			mnt_flags |= MNT_LOCK_READONLY;
++
+ 		/* Verify the mount flags are equal to or more permissive
+ 		 * than the proposed new mount.
+ 		 */
+@@ -3271,7 +3277,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
+ 		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ 			struct inode *inode = child->mnt_mountpoint->d_inode;
+ 			/* Only worry about locked mounts */
+-			if (!(mnt_flags & MNT_LOCKED))
++			if (!(child->mnt.mnt_flags & MNT_LOCKED))
+ 				continue;
+ 			/* Is the directory permanetly empty? */
+ 			if (!is_empty_dir_inode(inode))
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 33eb81738d03..a7dd1fee8f13 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1527,9 +1527,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+ 		err = PTR_ERR(inode);
+ 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
+ 		put_nfs_open_context(ctx);
++		d_drop(dentry);
+ 		switch (err) {
+ 		case -ENOENT:
+-			d_drop(dentry);
+ 			d_add(dentry, NULL);
+ 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ 			break;
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index 327b8c34d360..de2523f5e7c9 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -2860,12 +2860,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
+ 			call_close |= is_wronly;
+ 		else if (is_wronly)
+ 			calldata->arg.fmode |= FMODE_WRITE;
++		if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE))
++			call_close |= is_rdwr;
+ 	} else if (is_rdwr)
+ 		calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
+ 
+-	if (calldata->arg.fmode == 0)
+-		call_close |= is_rdwr;
+-
+ 	if (!nfs4_valid_open_stateid(state))
+ 		call_close = 0;
+ 	spin_unlock(&state->owner->so_lock);
+diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
+index 776dccbc306d..dcb70001ae2c 100644
+--- a/fs/nfs/pnfs_nfs.c
++++ b/fs/nfs/pnfs_nfs.c
+@@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
+ }
+ 
+ /* Helper function for pnfs_generic_commit_pagelist to catch an empty
+- * page list. This can happen when two commits race. */
++ * page list. This can happen when two commits race.
++ *
++ * This must be called instead of nfs_init_commit - call one or the other, but
++ * not both!
++ */
+ static bool
+ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
+ 					  struct nfs_commit_data *data,
+@@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
+ 	if (list_empty(pages)) {
+ 		if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
+ 			wake_up_atomic_t(&cinfo->mds->rpcs_out);
+-		nfs_commitdata_release(data);
++		/* don't call nfs_commitdata_release - it tries to put
++		 * the open_context which is not acquired until nfs_init_commit
++		 * which has not been called on @data */
++		WARN_ON_ONCE(data->context);
++		nfs_commit_free(data);
+ 		return true;
+ 	}
+ 
+diff --git a/fs/nfs/read.c b/fs/nfs/read.c
+index 6776d7a7839e..572e5b3b06f1 100644
+--- a/fs/nfs/read.c
++++ b/fs/nfs/read.c
+@@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page)
+ 		nfs_list_remove_request(new);
+ 		nfs_readpage_release(new);
+ 		error = desc->pgio->pg_error;
+-		goto out_unlock;
++		goto out;
+ 	}
+ 	return 0;
+ out_error:
+ 	error = PTR_ERR(new);
+-out_unlock:
+ 	unlock_page(page);
++out:
+ 	return error;
+ }
+ 
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index 1580ea6fd64d..d08cd88155c7 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
+ 		goto out;
+ 
+ 	inode = d_inode(fh->fh_dentry);
+-	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
+-		error = -EOPNOTSUPP;
+-		goto out_errno;
+-	}
+ 
+ 	error = fh_want_write(fh);
+ 	if (error)
+ 		goto out_errno;
+ 
+-	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
++	fh_lock(fh);
++
++	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
+ 	if (error)
+-		goto out_drop_write;
+-	error = inode->i_op->set_acl(inode, argp->acl_default,
+-				     ACL_TYPE_DEFAULT);
++		goto out_drop_lock;
++	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
+ 	if (error)
+-		goto out_drop_write;
++		goto out_drop_lock;
++
++	fh_unlock(fh);
+ 
+ 	fh_drop_write(fh);
+ 
+@@ -131,7 +130,8 @@ out:
+ 	posix_acl_release(argp->acl_access);
+ 	posix_acl_release(argp->acl_default);
+ 	return nfserr;
+-out_drop_write:
++out_drop_lock:
++	fh_unlock(fh);
+ 	fh_drop_write(fh);
+ out_errno:
+ 	nfserr = nfserrno(error);
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index 01df4cd7c753..0c890347cde3 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
+ 		goto out;
+ 
+ 	inode = d_inode(fh->fh_dentry);
+-	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
+-		error = -EOPNOTSUPP;
+-		goto out_errno;
+-	}
+ 
+ 	error = fh_want_write(fh);
+ 	if (error)
+ 		goto out_errno;
+ 
+-	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
++	fh_lock(fh);
++
++	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
+ 	if (error)
+-		goto out_drop_write;
+-	error = inode->i_op->set_acl(inode, argp->acl_default,
+-				     ACL_TYPE_DEFAULT);
++		goto out_drop_lock;
++	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
+ 
+-out_drop_write:
++out_drop_lock:
++	fh_unlock(fh);
+ 	fh_drop_write(fh);
+ out_errno:
+ 	nfserr = nfserrno(error);
+diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
+index 6adabd6049b7..71292a0d6f09 100644
+--- a/fs/nfsd/nfs4acl.c
++++ b/fs/nfsd/nfs4acl.c
+@@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ 	dentry = fhp->fh_dentry;
+ 	inode = d_inode(dentry);
+ 
+-	if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
+-		return nfserr_attrnotsupp;
+-
+ 	if (S_ISDIR(inode->i_mode))
+ 		flags = NFS4_ACL_DIR;
+ 
+@@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ 	if (host_error < 0)
+ 		goto out_nfserr;
+ 
+-	host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS);
++	fh_lock(fhp);
++
++	host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
+ 	if (host_error < 0)
+-		goto out_release;
++		goto out_drop_lock;
+ 
+ 	if (S_ISDIR(inode->i_mode)) {
+-		host_error = inode->i_op->set_acl(inode, dpacl,
+-						  ACL_TYPE_DEFAULT);
++		host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
+ 	}
+ 
+-out_release:
++out_drop_lock:
++	fh_unlock(fhp);
++
+ 	posix_acl_release(pacl);
+ 	posix_acl_release(dpacl);
+ out_nfserr:
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index 7389cb1d7409..04c68d900324 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
+ 	}
+ }
+ 
+-static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
+-{
+-	struct rpc_xprt *xprt;
+-
+-	if (args->protocol != XPRT_TRANSPORT_BC_TCP)
+-		return rpc_create(args);
+-
+-	xprt = args->bc_xprt->xpt_bc_xprt;
+-	if (xprt) {
+-		xprt_get(xprt);
+-		return rpc_create_xprt(args, xprt);
+-	}
+-
+-	return rpc_create(args);
+-}
+-
+ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
+ {
+ 	int maxtime = max_cb_time(clp->net);
+@@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
+ 		args.authflavor = ses->se_cb_sec.flavor;
+ 	}
+ 	/* Create RPC client */
+-	client = create_backchannel_client(&args);
++	client = rpc_create(&args);
+ 	if (IS_ERR(client)) {
+ 		dprintk("NFSD: couldn't create callback client: %ld\n",
+ 			PTR_ERR(client));
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 0462eeddfff9..9e04e49df681 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -3487,6 +3487,10 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
+ 	struct nfs4_openowner *oo = open->op_openowner;
+ 	struct nfs4_ol_stateid *retstp = NULL;
+ 
++	/* We are moving these outside of the spinlocks to avoid the warnings */
++	mutex_init(&stp->st_mutex);
++	mutex_lock(&stp->st_mutex);
++
+ 	spin_lock(&oo->oo_owner.so_client->cl_lock);
+ 	spin_lock(&fp->fi_lock);
+ 
+@@ -3502,13 +3506,17 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
+ 	stp->st_access_bmap = 0;
+ 	stp->st_deny_bmap = 0;
+ 	stp->st_openstp = NULL;
+-	init_rwsem(&stp->st_rwsem);
+ 	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+ 
+ out_unlock:
+ 	spin_unlock(&fp->fi_lock);
+ 	spin_unlock(&oo->oo_owner.so_client->cl_lock);
++	if (retstp) {
++		mutex_lock(&retstp->st_mutex);
++		/* Not that we need to, just for neatness */
++		mutex_unlock(&stp->st_mutex);
++	}
+ 	return retstp;
+ }
+ 
+@@ -4335,32 +4343,34 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
+ 	 */
+ 	if (stp) {
+ 		/* Stateid was found, this is an OPEN upgrade */
+-		down_read(&stp->st_rwsem);
++		mutex_lock(&stp->st_mutex);
+ 		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
+ 		if (status) {
+-			up_read(&stp->st_rwsem);
++			mutex_unlock(&stp->st_mutex);
+ 			goto out;
+ 		}
+ 	} else {
+ 		stp = open->op_stp;
+ 		open->op_stp = NULL;
++		/*
++		 * init_open_stateid() either returns a locked stateid
++		 * it found, or initializes and locks the new one we passed in
++		 */
+ 		swapstp = init_open_stateid(stp, fp, open);
+ 		if (swapstp) {
+ 			nfs4_put_stid(&stp->st_stid);
+ 			stp = swapstp;
+-			down_read(&stp->st_rwsem);
+ 			status = nfs4_upgrade_open(rqstp, fp, current_fh,
+ 						stp, open);
+ 			if (status) {
+-				up_read(&stp->st_rwsem);
++				mutex_unlock(&stp->st_mutex);
+ 				goto out;
+ 			}
+ 			goto upgrade_out;
+ 		}
+-		down_read(&stp->st_rwsem);
+ 		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
+ 		if (status) {
+-			up_read(&stp->st_rwsem);
++			mutex_unlock(&stp->st_mutex);
+ 			release_open_stateid(stp);
+ 			goto out;
+ 		}
+@@ -4372,7 +4382,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
+ 	}
+ upgrade_out:
+ 	nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
+-	up_read(&stp->st_rwsem);
++	mutex_unlock(&stp->st_mutex);
+ 
+ 	if (nfsd4_has_session(&resp->cstate)) {
+ 		if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
+@@ -4983,12 +4993,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
+ 		 * revoked delegations are kept only for free_stateid.
+ 		 */
+ 		return nfserr_bad_stateid;
+-	down_write(&stp->st_rwsem);
++	mutex_lock(&stp->st_mutex);
+ 	status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
+ 	if (status == nfs_ok)
+ 		status = nfs4_check_fh(current_fh, &stp->st_stid);
+ 	if (status != nfs_ok)
+-		up_write(&stp->st_rwsem);
++		mutex_unlock(&stp->st_mutex);
+ 	return status;
+ }
+ 
+@@ -5036,7 +5046,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
+ 		return status;
+ 	oo = openowner(stp->st_stateowner);
+ 	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+-		up_write(&stp->st_rwsem);
++		mutex_unlock(&stp->st_mutex);
+ 		nfs4_put_stid(&stp->st_stid);
+ 		return nfserr_bad_stateid;
+ 	}
+@@ -5068,12 +5078,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	oo = openowner(stp->st_stateowner);
+ 	status = nfserr_bad_stateid;
+ 	if (oo->oo_flags & NFS4_OO_CONFIRMED) {
+-		up_write(&stp->st_rwsem);
++		mutex_unlock(&stp->st_mutex);
+ 		goto put_stateid;
+ 	}
+ 	oo->oo_flags |= NFS4_OO_CONFIRMED;
+ 	nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
+-	up_write(&stp->st_rwsem);
++	mutex_unlock(&stp->st_mutex);
+ 	dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
+ 		__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
+ 
+@@ -5149,7 +5159,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
+ 	nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
+ 	status = nfs_ok;
+ put_stateid:
+-	up_write(&stp->st_rwsem);
++	mutex_unlock(&stp->st_mutex);
+ 	nfs4_put_stid(&stp->st_stid);
+ out:
+ 	nfsd4_bump_seqid(cstate, status);
+@@ -5202,7 +5212,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	if (status)
+ 		goto out; 
+ 	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
+-	up_write(&stp->st_rwsem);
++	mutex_unlock(&stp->st_mutex);
+ 
+ 	nfsd4_close_open_stateid(stp);
+ 
+@@ -5428,7 +5438,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
+ 	stp->st_access_bmap = 0;
+ 	stp->st_deny_bmap = open_stp->st_deny_bmap;
+ 	stp->st_openstp = open_stp;
+-	init_rwsem(&stp->st_rwsem);
++	mutex_init(&stp->st_mutex);
+ 	list_add(&stp->st_locks, &open_stp->st_locks);
+ 	list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
+ 	spin_lock(&fp->fi_lock);
+@@ -5597,7 +5607,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 					&open_stp, nn);
+ 		if (status)
+ 			goto out;
+-		up_write(&open_stp->st_rwsem);
++		mutex_unlock(&open_stp->st_mutex);
+ 		open_sop = openowner(open_stp->st_stateowner);
+ 		status = nfserr_bad_stateid;
+ 		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
+@@ -5606,7 +5616,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 		status = lookup_or_create_lock_state(cstate, open_stp, lock,
+ 							&lock_stp, &new);
+ 		if (status == nfs_ok)
+-			down_write(&lock_stp->st_rwsem);
++			mutex_lock(&lock_stp->st_mutex);
+ 	} else {
+ 		status = nfs4_preprocess_seqid_op(cstate,
+ 				       lock->lk_old_lock_seqid,
+@@ -5710,7 +5720,7 @@ out:
+ 		    seqid_mutating_err(ntohl(status)))
+ 			lock_sop->lo_owner.so_seqid++;
+ 
+-		up_write(&lock_stp->st_rwsem);
++		mutex_unlock(&lock_stp->st_mutex);
+ 
+ 		/*
+ 		 * If this is a new, never-before-used stateid, and we are
+@@ -5880,7 +5890,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ fput:
+ 	fput(filp);
+ put_stateid:
+-	up_write(&stp->st_rwsem);
++	mutex_unlock(&stp->st_mutex);
+ 	nfs4_put_stid(&stp->st_stid);
+ out:
+ 	nfsd4_bump_seqid(cstate, status);
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index c050c53036a6..c89d7b55fb9a 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -535,7 +535,7 @@ struct nfs4_ol_stateid {
+ 	unsigned char			st_access_bmap;
+ 	unsigned char			st_deny_bmap;
+ 	struct nfs4_ol_stateid		*st_openstp;
+-	struct rw_semaphore		st_rwsem;
++	struct mutex			st_mutex;
+ };
+ 
+ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
+diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
+index b3fc0a35bf62..fb35aa21b34b 100644
+--- a/fs/overlayfs/dir.c
++++ b/fs/overlayfs/dir.c
+@@ -511,6 +511,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
+ 	struct dentry *upper;
+ 	struct dentry *opaquedir = NULL;
+ 	int err;
++	int flags = 0;
+ 
+ 	if (WARN_ON(!workdir))
+ 		return -EROFS;
+@@ -540,46 +541,39 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
+ 	if (err)
+ 		goto out_dput;
+ 
+-	whiteout = ovl_whiteout(workdir, dentry);
+-	err = PTR_ERR(whiteout);
+-	if (IS_ERR(whiteout))
++	upper = lookup_one_len(dentry->d_name.name, upperdir,
++			       dentry->d_name.len);
++	err = PTR_ERR(upper);
++	if (IS_ERR(upper))
+ 		goto out_unlock;
+ 
+-	upper = ovl_dentry_upper(dentry);
+-	if (!upper) {
+-		upper = lookup_one_len(dentry->d_name.name, upperdir,
+-				       dentry->d_name.len);
+-		err = PTR_ERR(upper);
+-		if (IS_ERR(upper))
+-			goto kill_whiteout;
+-
+-		err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
+-		dput(upper);
+-		if (err)
+-			goto kill_whiteout;
+-	} else {
+-		int flags = 0;
++	err = -ESTALE;
++	if ((opaquedir && upper != opaquedir) ||
++	    (!opaquedir && ovl_dentry_upper(dentry) &&
++	     upper != ovl_dentry_upper(dentry))) {
++		goto out_dput_upper;
++	}
+ 
+-		if (opaquedir)
+-			upper = opaquedir;
+-		err = -ESTALE;
+-		if (upper->d_parent != upperdir)
+-			goto kill_whiteout;
++	whiteout = ovl_whiteout(workdir, dentry);
++	err = PTR_ERR(whiteout);
++	if (IS_ERR(whiteout))
++		goto out_dput_upper;
+ 
+-		if (is_dir)
+-			flags |= RENAME_EXCHANGE;
++	if (d_is_dir(upper))
++		flags = RENAME_EXCHANGE;
+ 
+-		err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+-		if (err)
+-			goto kill_whiteout;
++	err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
++	if (err)
++		goto kill_whiteout;
++	if (flags)
++		ovl_cleanup(wdir, upper);
+ 
+-		if (is_dir)
+-			ovl_cleanup(wdir, upper);
+-	}
+ 	ovl_dentry_version_inc(dentry->d_parent);
+ out_d_drop:
+ 	d_drop(dentry);
+ 	dput(whiteout);
++out_dput_upper:
++	dput(upper);
+ out_unlock:
+ 	unlock_rename(workdir, upperdir);
+ out_dput:
+diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
+index a4ff5d0d7db9..d46fa609e803 100644
+--- a/fs/overlayfs/inode.c
++++ b/fs/overlayfs/inode.c
+@@ -59,16 +59,40 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
+ 	if (err)
+ 		goto out;
+ 
++	if (attr->ia_valid & ATTR_SIZE) {
++		struct inode *realinode = d_inode(ovl_dentry_real(dentry));
++
++		err = -ETXTBSY;
++		if (atomic_read(&realinode->i_writecount) < 0)
++			goto out_drop_write;
++	}
++
+ 	err = ovl_copy_up(dentry);
+ 	if (!err) {
++		struct inode *winode = NULL;
++
+ 		upperdentry = ovl_dentry_upper(dentry);
+ 
++		if (attr->ia_valid & ATTR_SIZE) {
++			winode = d_inode(upperdentry);
++			err = get_write_access(winode);
++			if (err)
++				goto out_drop_write;
++		}
++
++		if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
++			attr->ia_valid &= ~ATTR_MODE;
++
+ 		inode_lock(upperdentry->d_inode);
+ 		err = notify_change(upperdentry, attr, NULL);
+ 		if (!err)
+ 			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
+ 		inode_unlock(upperdentry->d_inode);
++
++		if (winode)
++			put_write_access(winode);
+ 	}
++out_drop_write:
+ 	ovl_drop_write(dentry);
+ out:
+ 	return err;
+@@ -121,16 +145,18 @@ int ovl_permission(struct inode *inode, int mask)
+ 
+ 		err = vfs_getattr(&realpath, &stat);
+ 		if (err)
+-			return err;
++			goto out_dput;
+ 
++		err = -ESTALE;
+ 		if ((stat.mode ^ inode->i_mode) & S_IFMT)
+-			return -ESTALE;
++			goto out_dput;
+ 
+ 		inode->i_mode = stat.mode;
+ 		inode->i_uid = stat.uid;
+ 		inode->i_gid = stat.gid;
+ 
+-		return generic_permission(inode, mask);
++		err = generic_permission(inode, mask);
++		goto out_dput;
+ 	}
+ 
+ 	/* Careful in RCU walk mode */
+@@ -400,12 +426,11 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
+ 	if (!inode)
+ 		return NULL;
+ 
+-	mode &= S_IFMT;
+-
+ 	inode->i_ino = get_next_ino();
+ 	inode->i_mode = mode;
+ 	inode->i_flags |= S_NOATIME | S_NOCMTIME;
+ 
++	mode &= S_IFMT;
+ 	switch (mode) {
+ 	case S_IFDIR:
+ 		inode->i_private = oe;
+diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
+index 6a7090f4a441..294ccc0c1fc7 100644
+--- a/fs/overlayfs/overlayfs.h
++++ b/fs/overlayfs/overlayfs.h
+@@ -185,6 +185,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
+ {
+ 	to->i_uid = from->i_uid;
+ 	to->i_gid = from->i_gid;
++	to->i_mode = from->i_mode;
+ }
+ 
+ /* dir.c */
+diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
+index 791235e03d17..7952a50f0a72 100644
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -1064,16 +1064,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+ 		/*
+ 		 * Upper should support d_type, else whiteouts are visible.
+ 		 * Given workdir and upper are on same fs, we can do
+-		 * iterate_dir() on workdir.
++		 * iterate_dir() on workdir. This check requires successful
++		 * creation of workdir in previous step.
+ 		 */
+-		err = ovl_check_d_type_supported(&workpath);
+-		if (err < 0)
+-			goto out_put_workdir;
++		if (ufs->workdir) {
++			err = ovl_check_d_type_supported(&workpath);
++			if (err < 0)
++				goto out_put_workdir;
+ 
+-		if (!err) {
+-			pr_err("overlayfs: upper fs needs to support d_type.\n");
+-			err = -EINVAL;
+-			goto out_put_workdir;
++			/*
++			 * We allowed this configuration and don't want to
++			 * break users over kernel upgrade. So warn instead
++			 * of erroring out.
++			 */
++			if (!err)
++				pr_warn("overlayfs: upper fs needs to support d_type.\n");
+ 		}
+ 	}
+ 
+diff --git a/fs/posix_acl.c b/fs/posix_acl.c
+index 711dd5170376..e11ea5fb1bad 100644
+--- a/fs/posix_acl.c
++++ b/fs/posix_acl.c
+@@ -786,39 +786,43 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
+ 	return error;
+ }
+ 
+-static int
+-posix_acl_xattr_set(const struct xattr_handler *handler,
+-		    struct dentry *dentry, const char *name,
+-		    const void *value, size_t size, int flags)
++int
++set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
+ {
+-	struct inode *inode = d_backing_inode(dentry);
+-	struct posix_acl *acl = NULL;
+-	int ret;
+-
+ 	if (!IS_POSIXACL(inode))
+ 		return -EOPNOTSUPP;
+ 	if (!inode->i_op->set_acl)
+ 		return -EOPNOTSUPP;
+ 
+-	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+-		return value ? -EACCES : 0;
++	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
++		return acl ? -EACCES : 0;
+ 	if (!inode_owner_or_capable(inode))
+ 		return -EPERM;
+ 
++	if (acl) {
++		int ret = posix_acl_valid(acl);
++		if (ret)
++			return ret;
++	}
++	return inode->i_op->set_acl(inode, acl, type);
++}
++EXPORT_SYMBOL(set_posix_acl);
++
++static int
++posix_acl_xattr_set(const struct xattr_handler *handler,
++		    struct dentry *dentry, const char *name,
++		    const void *value, size_t size, int flags)
++{
++	struct inode *inode = d_backing_inode(dentry);
++	struct posix_acl *acl = NULL;
++	int ret;
++
+ 	if (value) {
+ 		acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ 		if (IS_ERR(acl))
+ 			return PTR_ERR(acl);
+-
+-		if (acl) {
+-			ret = posix_acl_valid(acl);
+-			if (ret)
+-				goto out;
+-		}
+ 	}
+-
+-	ret = inode->i_op->set_acl(inode, acl, handler->flags);
+-out:
++	ret = set_posix_acl(inode, handler->flags, acl);
+ 	posix_acl_release(acl);
+ 	return ret;
+ }
+diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
+index 446753d8ac34..5b5ec8d0f324 100644
+--- a/fs/ubifs/file.c
++++ b/fs/ubifs/file.c
+@@ -52,6 +52,7 @@
+ #include "ubifs.h"
+ #include <linux/mount.h>
+ #include <linux/slab.h>
++#include <linux/migrate.h>
+ 
+ static int read_block(struct inode *inode, void *addr, unsigned int block,
+ 		      struct ubifs_data_node *dn)
+@@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page)
+ 	return ret;
+ }
+ 
++#ifdef CONFIG_MIGRATION
++static int ubifs_migrate_page(struct address_space *mapping,
++		struct page *newpage, struct page *page, enum migrate_mode mode)
++{
++	int rc;
++
++	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
++	if (rc != MIGRATEPAGE_SUCCESS)
++		return rc;
++
++	if (PagePrivate(page)) {
++		ClearPagePrivate(page);
++		SetPagePrivate(newpage);
++	}
++
++	migrate_page_copy(newpage, page);
++	return MIGRATEPAGE_SUCCESS;
++}
++#endif
++
+ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
+ {
+ 	/*
+@@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = {
+ 	.write_end      = ubifs_write_end,
+ 	.invalidatepage = ubifs_invalidatepage,
+ 	.set_page_dirty = ubifs_set_page_dirty,
++#ifdef CONFIG_MIGRATION
++	.migratepage	= ubifs_migrate_page,
++#endif
+ 	.releasepage    = ubifs_releasepage,
+ };
+ 
+diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
+index 6bd05700d8c9..05f05f17a7c2 100644
+--- a/include/asm-generic/qspinlock.h
++++ b/include/asm-generic/qspinlock.h
+@@ -22,37 +22,33 @@
+ #include <asm-generic/qspinlock_types.h>
+ 
+ /**
++ * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock
++ * @lock : Pointer to queued spinlock structure
++ *
++ * There is a very slight possibility of live-lock if the lockers keep coming
++ * and the waiter is just unfortunate enough to not see any unlock state.
++ */
++#ifndef queued_spin_unlock_wait
++extern void queued_spin_unlock_wait(struct qspinlock *lock);
++#endif
++
++/**
+  * queued_spin_is_locked - is the spinlock locked?
+  * @lock: Pointer to queued spinlock structure
+  * Return: 1 if it is locked, 0 otherwise
+  */
++#ifndef queued_spin_is_locked
+ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+ {
+ 	/*
+-	 * queued_spin_lock_slowpath() can ACQUIRE the lock before
+-	 * issuing the unordered store that sets _Q_LOCKED_VAL.
+-	 *
+-	 * See both smp_cond_acquire() sites for more detail.
+-	 *
+-	 * This however means that in code like:
+-	 *
+-	 *   spin_lock(A)		spin_lock(B)
+-	 *   spin_unlock_wait(B)	spin_is_locked(A)
+-	 *   do_something()		do_something()
+-	 *
+-	 * Both CPUs can end up running do_something() because the store
+-	 * setting _Q_LOCKED_VAL will pass through the loads in
+-	 * spin_unlock_wait() and/or spin_is_locked().
++	 * See queued_spin_unlock_wait().
+ 	 *
+-	 * Avoid this by issuing a full memory barrier between the spin_lock()
+-	 * and the loads in spin_unlock_wait() and spin_is_locked().
+-	 *
+-	 * Note that regular mutual exclusion doesn't care about this
+-	 * delayed store.
++	 * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
++	 * isn't immediately observable.
+ 	 */
+-	smp_mb();
+-	return atomic_read(&lock->val) & _Q_LOCKED_MASK;
++	return atomic_read(&lock->val);
+ }
++#endif
+ 
+ /**
+  * queued_spin_value_unlocked - is the spinlock structure unlocked?
+@@ -122,21 +118,6 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
+ }
+ #endif
+ 
+-/**
+- * queued_spin_unlock_wait - wait until current lock holder releases the lock
+- * @lock : Pointer to queued spinlock structure
+- *
+- * There is a very slight possibility of live-lock if the lockers keep coming
+- * and the waiter is just unfortunate enough to not see any unlock state.
+- */
+-static inline void queued_spin_unlock_wait(struct qspinlock *lock)
+-{
+-	/* See queued_spin_is_locked() */
+-	smp_mb();
+-	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+-		cpu_relax();
+-}
+-
+ #ifndef virt_spin_lock
+ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
+ {
+diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
+index 055a08ddac02..a74c49d7c0fc 100644
+--- a/include/drm/ttm/ttm_bo_api.h
++++ b/include/drm/ttm/ttm_bo_api.h
+@@ -316,6 +316,20 @@ ttm_bo_reference(struct ttm_buffer_object *bo)
+  */
+ extern int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy,
+ 		       bool interruptible, bool no_wait);
++
++/**
++ * ttm_bo_mem_compat - Check if proposed placement is compatible with a bo
++ *
++ * @placement:  Return immediately if buffer is busy.
++ * @mem:  The struct ttm_mem_reg indicating the region where the bo resides
++ * @new_flags: Describes compatible placement found
++ *
++ * Returns true if the placement is compatible
++ */
++extern bool ttm_bo_mem_compat(struct ttm_placement *placement,
++			      struct ttm_mem_reg *mem,
++			      uint32_t *new_flags);
++
+ /**
+  * ttm_bo_validate
+  *
+diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
+index 786ad32631a6..07b83d32f66c 100644
+--- a/include/linux/cpuidle.h
++++ b/include/linux/cpuidle.h
+@@ -152,6 +152,8 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev);
+ extern int cpuidle_play_dead(void);
+ 
+ extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
++static inline struct cpuidle_device *cpuidle_get_device(void)
++{return __this_cpu_read(cpuidle_devices); }
+ #else
+ static inline void disable_cpuidle(void) { }
+ static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
+@@ -187,6 +189,7 @@ static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
+ static inline int cpuidle_play_dead(void) {return -ENODEV; }
+ static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
+ 	struct cpuidle_device *dev) {return NULL; }
++static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
+ #endif
+ 
+ #if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND)
+diff --git a/include/linux/dcache.h b/include/linux/dcache.h
+index 7e9422cb5989..ad5d582f9b14 100644
+--- a/include/linux/dcache.h
++++ b/include/linux/dcache.h
+@@ -576,5 +576,17 @@ static inline struct inode *vfs_select_inode(struct dentry *dentry,
+ 	return inode;
+ }
+ 
++/**
++ * d_real_inode - Return the real inode
++ * @dentry: The dentry to query
++ *
++ * If dentry is on an union/overlay, then return the underlying, real inode.
++ * Otherwise return d_inode().
++ */
++static inline struct inode *d_real_inode(struct dentry *dentry)
++{
++	return d_backing_inode(d_real(dentry));
++}
++
+ 
+ #endif	/* __LINUX_DCACHE_H */
+diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
+index 0536524bb9eb..68904469fba1 100644
+--- a/include/linux/jump_label.h
++++ b/include/linux/jump_label.h
+@@ -117,13 +117,18 @@ struct module;
+ 
+ #include <linux/atomic.h>
+ 
++#ifdef HAVE_JUMP_LABEL
++
+ static inline int static_key_count(struct static_key *key)
+ {
+-	return atomic_read(&key->enabled);
++	/*
++	 * -1 means the first static_key_slow_inc() is in progress.
++	 *  static_key_enabled() must return true, so return 1 here.
++	 */
++	int n = atomic_read(&key->enabled);
++	return n >= 0 ? n : 1;
+ }
+ 
+-#ifdef HAVE_JUMP_LABEL
+-
+ #define JUMP_TYPE_FALSE	0UL
+ #define JUMP_TYPE_TRUE	1UL
+ #define JUMP_TYPE_MASK	1UL
+@@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod);
+ 
+ #else  /* !HAVE_JUMP_LABEL */
+ 
++static inline int static_key_count(struct static_key *key)
++{
++	return atomic_read(&key->enabled);
++}
++
+ static __always_inline void jump_label_init(void)
+ {
+ 	static_key_initialized = true;
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 15d0df943466..794b924e9669 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1062,6 +1062,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
+ }
+ 
+ void __skb_get_hash(struct sk_buff *skb);
++u32 __skb_get_hash_symmetric(struct sk_buff *skb);
+ u32 skb_get_poff(const struct sk_buff *skb);
+ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+ 		   const struct flow_keys *keys, int hlen);
+@@ -2860,6 +2861,25 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb,
+ }
+ 
+ /**
++ *	skb_push_rcsum - push skb and update receive checksum
++ *	@skb: buffer to update
++ *	@len: length of data pulled
++ *
++ *	This function performs an skb_push on the packet and updates
++ *	the CHECKSUM_COMPLETE checksum.  It should be used on
++ *	receive path processing instead of skb_push unless you know
++ *	that the checksum difference is zero (e.g., a valid IP header)
++ *	or you are setting ip_summed to CHECKSUM_NONE.
++ */
++static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
++					    unsigned int len)
++{
++	skb_push(skb, len);
++	skb_postpush_rcsum(skb, skb->data, len);
++	return skb->data;
++}
++
++/**
+  *	pskb_trim_rcsum - trim received skb and update checksum
+  *	@skb: buffer to trim
+  *	@len: new length
+diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
+index 9a7ddbaf116e..14d70f59f0c2 100644
+--- a/include/linux/sunrpc/clnt.h
++++ b/include/linux/sunrpc/clnt.h
+@@ -137,8 +137,6 @@ struct rpc_create_args {
+ #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT	(1UL << 9)
+ 
+ struct rpc_clnt *rpc_create(struct rpc_create_args *args);
+-struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+-					struct rpc_xprt *xprt);
+ struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
+ 				const struct rpc_program *, u32);
+ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
+diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
+index b7dabc4baafd..79ba50856707 100644
+--- a/include/linux/sunrpc/svc_xprt.h
++++ b/include/linux/sunrpc/svc_xprt.h
+@@ -84,6 +84,7 @@ struct svc_xprt {
+ 
+ 	struct net		*xpt_net;
+ 	struct rpc_xprt		*xpt_bc_xprt;	/* NFSv4.1 backchannel */
++	struct rpc_xprt_switch	*xpt_bc_xps;	/* NFSv4.1 backchannel */
+ };
+ 
+ static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
+diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
+index fb0d212e0d3a..9f51e1df3023 100644
+--- a/include/linux/sunrpc/xprt.h
++++ b/include/linux/sunrpc/xprt.h
+@@ -296,6 +296,7 @@ struct xprt_create {
+ 	size_t			addrlen;
+ 	const char		*servername;
+ 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
++	struct rpc_xprt_switch	*bc_xps;
+ 	unsigned int		flags;
+ };
+ 
+diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
+index 966889a20ea3..e479033bd782 100644
+--- a/include/linux/usb/ehci_def.h
++++ b/include/linux/usb/ehci_def.h
+@@ -180,11 +180,11 @@ struct ehci_regs {
+  * PORTSCx
+  */
+ 	/* HOSTPC: offset 0x84 */
+-	u32		hostpc[1];	/* HOSTPC extension */
++	u32		hostpc[0];	/* HOSTPC extension */
+ #define HOSTPC_PHCD	(1<<22)		/* Phy clock disable */
+ #define HOSTPC_PSPD	(3<<25)		/* Port speed detection */
+ 
+-	u32		reserved5[16];
++	u32		reserved5[17];
+ 
+ 	/* USBMODE_EX: offset 0xc8 */
+ 	u32		usbmode_ex;	/* USB Device mode extension */
+diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
+index fb2cef4e9747..b8334a637095 100644
+--- a/include/rdma/ib_verbs.h
++++ b/include/rdma/ib_verbs.h
+@@ -217,7 +217,7 @@ enum ib_device_cap_flags {
+ 	IB_DEVICE_CROSS_CHANNEL		= (1 << 27),
+ 	IB_DEVICE_MANAGED_FLOW_STEERING		= (1 << 29),
+ 	IB_DEVICE_SIGNATURE_HANDOVER		= (1 << 30),
+-	IB_DEVICE_ON_DEMAND_PAGING		= (1 << 31),
++	IB_DEVICE_ON_DEMAND_PAGING		= (1ULL << 31),
+ 	IB_DEVICE_SG_GAPS_REG			= (1ULL << 32),
+ 	IB_DEVICE_VIRTUAL_FUNCTION		= ((u64)1 << 33),
+ };
+diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
+index a8696551abb1..6ee9d97004d5 100644
+--- a/include/rdma/rdma_vt.h
++++ b/include/rdma/rdma_vt.h
+@@ -203,7 +203,9 @@ struct rvt_driver_provided {
+ 
+ 	/*
+ 	 * Allocate a private queue pair data structure for driver specific
+-	 * information which is opaque to rdmavt.
++	 * information which is opaque to rdmavt.  Errors are returned via
++	 * ERR_PTR(err).  The driver is free to return NULL or a valid
++	 * pointer.
+ 	 */
+ 	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ 				gfp_t gfp);
+diff --git a/kernel/futex.c b/kernel/futex.c
+index c20f06f38ef3..6555d5459e98 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
+ {
+ 	unsigned long address = (unsigned long)uaddr;
+ 	struct mm_struct *mm = current->mm;
+-	struct page *page;
++	struct page *page, *tail;
+ 	struct address_space *mapping;
+ 	int err, ro = 0;
+ 
+@@ -530,7 +530,15 @@ again:
+ 	 * considered here and page lock forces unnecessarily serialization
+ 	 * From this point on, mapping will be re-verified if necessary and
+ 	 * page lock will be acquired only if it is unavoidable
+-	 */
++	 *
++	 * Mapping checks require the head page for any compound page so the
++	 * head page and mapping is looked up now. For anonymous pages, it
++	 * does not matter if the page splits in the future as the key is
++	 * based on the address. For filesystem-backed pages, the tail is
++	 * required as the index of the page determines the key. For
++	 * base pages, there is no tail page and tail == page.
++	 */
++	tail = page;
+ 	page = compound_head(page);
+ 	mapping = READ_ONCE(page->mapping);
+ 
+@@ -654,7 +662,7 @@ again:
+ 
+ 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+ 		key->shared.inode = inode;
+-		key->shared.pgoff = basepage_index(page);
++		key->shared.pgoff = basepage_index(tail);
+ 		rcu_read_unlock();
+ 	}
+ 
+diff --git a/kernel/jump_label.c b/kernel/jump_label.c
+index 05254eeb4b4e..4b353e0be121 100644
+--- a/kernel/jump_label.c
++++ b/kernel/jump_label.c
+@@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key);
+ 
+ void static_key_slow_inc(struct static_key *key)
+ {
++	int v, v1;
++
+ 	STATIC_KEY_CHECK_USE();
+-	if (atomic_inc_not_zero(&key->enabled))
+-		return;
++
++	/*
++	 * Careful if we get concurrent static_key_slow_inc() calls;
++	 * later calls must wait for the first one to _finish_ the
++	 * jump_label_update() process.  At the same time, however,
++	 * the jump_label_update() call below wants to see
++	 * static_key_enabled(&key) for jumps to be updated properly.
++	 *
++	 * So give a special meaning to negative key->enabled: it sends
++	 * static_key_slow_inc() down the slow path, and it is non-zero
++	 * so it counts as "enabled" in jump_label_update().  Note that
++	 * atomic_inc_unless_negative() checks >= 0, so roll our own.
++	 */
++	for (v = atomic_read(&key->enabled); v > 0; v = v1) {
++		v1 = atomic_cmpxchg(&key->enabled, v, v + 1);
++		if (likely(v1 == v))
++			return;
++	}
+ 
+ 	jump_label_lock();
+-	if (atomic_inc_return(&key->enabled) == 1)
++	if (atomic_read(&key->enabled) == 0) {
++		atomic_set(&key->enabled, -1);
+ 		jump_label_update(key);
++		atomic_set(&key->enabled, 1);
++	} else {
++		atomic_inc(&key->enabled);
++	}
+ 	jump_label_unlock();
+ }
+ EXPORT_SYMBOL_GPL(static_key_slow_inc);
+@@ -72,6 +95,13 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc);
+ static void __static_key_slow_dec(struct static_key *key,
+ 		unsigned long rate_limit, struct delayed_work *work)
+ {
++	/*
++	 * The negative count check is valid even when a negative
++	 * key->enabled is in use by static_key_slow_inc(); a
++	 * __static_key_slow_dec() before the first static_key_slow_inc()
++	 * returns is unbalanced, because all other static_key_slow_inc()
++	 * instances block while the update is in progress.
++	 */
+ 	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
+ 		WARN(atomic_read(&key->enabled) < 0,
+ 		     "jump label: negative count!\n");
+diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
+index e364b424b019..79d2d765a75f 100644
+--- a/kernel/locking/mutex.c
++++ b/kernel/locking/mutex.c
+@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
+ 	if (!hold_ctx)
+ 		return 0;
+ 
+-	if (unlikely(ctx == hold_ctx))
+-		return -EALREADY;
+-
+ 	if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
+ 	    (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
+ #ifdef CONFIG_DEBUG_MUTEXES
+@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+ 	unsigned long flags;
+ 	int ret;
+ 
++	if (use_ww_ctx) {
++		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
++		if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
++			return -EALREADY;
++	}
++
+ 	preempt_disable();
+ 	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
+ 
+diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
+index ce2f75e32ae1..5fc8c311b8fe 100644
+--- a/kernel/locking/qspinlock.c
++++ b/kernel/locking/qspinlock.c
+@@ -267,6 +267,66 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
+ #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
+ #endif
+ 
++/*
++ * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
++ * issuing an _unordered_ store to set _Q_LOCKED_VAL.
++ *
++ * This means that the store can be delayed, but no later than the
++ * store-release from the unlock. This means that simply observing
++ * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
++ *
++ * There are two paths that can issue the unordered store:
++ *
++ *  (1) clear_pending_set_locked():	*,1,0 -> *,0,1
++ *
++ *  (2) set_locked():			t,0,0 -> t,0,1 ; t != 0
++ *      atomic_cmpxchg_relaxed():	t,0,0 -> 0,0,1
++ *
++ * However, in both cases we have other !0 state we've set before to queue
++ * ourseves:
++ *
++ * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
++ * load is constrained by that ACQUIRE to not pass before that, and thus must
++ * observe the store.
++ *
++ * For (2) we have a more intersting scenario. We enqueue ourselves using
++ * xchg_tail(), which ends up being a RELEASE. This in itself is not
++ * sufficient, however that is followed by an smp_cond_acquire() on the same
++ * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
++ * guarantees we must observe that store.
++ *
++ * Therefore both cases have other !0 state that is observable before the
++ * unordered locked byte store comes through. This means we can use that to
++ * wait for the lock store, and then wait for an unlock.
++ */
++#ifndef queued_spin_unlock_wait
++void queued_spin_unlock_wait(struct qspinlock *lock)
++{
++	u32 val;
++
++	for (;;) {
++		val = atomic_read(&lock->val);
++
++		if (!val) /* not locked, we're done */
++			goto done;
++
++		if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
++			break;
++
++		/* not locked, but pending, wait until we observe the lock */
++		cpu_relax();
++	}
++
++	/* any unlock is good */
++	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
++		cpu_relax();
++
++done:
++	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
++}
++EXPORT_SYMBOL(queued_spin_unlock_wait);
++#endif
++
+ #endif /* _GEN_PV_LOCK_SLOWPATH */
+ 
+ /**
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index e7dd0ec169be..eeaf920f46b9 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -2821,6 +2821,23 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
+ 
+ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
+ 
++/*
++ * Unsigned subtract and clamp on underflow.
++ *
++ * Explicitly do a load-store to ensure the intermediate value never hits
++ * memory. This allows lockless observations without ever seeing the negative
++ * values.
++ */
++#define sub_positive(_ptr, _val) do {				\
++	typeof(_ptr) ptr = (_ptr);				\
++	typeof(*ptr) val = (_val);				\
++	typeof(*ptr) res, var = READ_ONCE(*ptr);		\
++	res = var - val;					\
++	if (res > var)						\
++		res = 0;					\
++	WRITE_ONCE(*ptr, res);					\
++} while (0)
++
+ /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
+ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+ {
+@@ -2829,15 +2846,15 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+ 
+ 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
+ 		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
+-		sa->load_avg = max_t(long, sa->load_avg - r, 0);
+-		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
++		sub_positive(&sa->load_avg, r);
++		sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
+ 		removed = 1;
+ 	}
+ 
+ 	if (atomic_long_read(&cfs_rq->removed_util_avg)) {
+ 		long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
+-		sa->util_avg = max_t(long, sa->util_avg - r, 0);
+-		sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
++		sub_positive(&sa->util_avg, r);
++		sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
+ 	}
+ 
+ 	decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
+@@ -2927,10 +2944,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
+ 			  &se->avg, se->on_rq * scale_load_down(se->load.weight),
+ 			  cfs_rq->curr == se, NULL);
+ 
+-	cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
+-	cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
+-	cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
+-	cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
++	sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
++	sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
++	sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
++	sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+ }
+ 
+ /* Add the load generated by se into cfs_rq's load average */
+diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
+index bd12c6c714ec..c5aeedf4e93a 100644
+--- a/kernel/sched/idle.c
++++ b/kernel/sched/idle.c
+@@ -127,7 +127,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+  */
+ static void cpuidle_idle_call(void)
+ {
+-	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
++	struct cpuidle_device *dev = cpuidle_get_device();
+ 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ 	int next_state, entered_state;
+ 
+diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
+index f96f0383f6c6..ad1d6164e946 100644
+--- a/kernel/trace/trace_printk.c
++++ b/kernel/trace/trace_printk.c
+@@ -36,6 +36,10 @@ struct trace_bprintk_fmt {
+ static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
+ {
+ 	struct trace_bprintk_fmt *pos;
++
++	if (!fmt)
++		return ERR_PTR(-EINVAL);
++
+ 	list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
+ 		if (!strcmp(pos->fmt, fmt))
+ 			return pos;
+@@ -57,7 +61,8 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
+ 	for (iter = start; iter < end; iter++) {
+ 		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
+ 		if (tb_fmt) {
+-			*iter = tb_fmt->fmt;
++			if (!IS_ERR(tb_fmt))
++				*iter = tb_fmt->fmt;
+ 			continue;
+ 		}
+ 
+diff --git a/mm/migrate.c b/mm/migrate.c
+index f9dfb18a4eba..bdf3410bb4fa 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -431,6 +431,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
+ 
+ 	return MIGRATEPAGE_SUCCESS;
+ }
++EXPORT_SYMBOL(migrate_page_move_mapping);
+ 
+ /*
+  * The expected number of remaining references is the same as that
+@@ -586,6 +587,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
+ 
+ 	mem_cgroup_migrate(page, newpage);
+ }
++EXPORT_SYMBOL(migrate_page_copy);
+ 
+ /************************************************************
+  *                    Migration functions
+diff --git a/mm/page-writeback.c b/mm/page-writeback.c
+index bc5149d5ec38..e389f0a998f1 100644
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -369,8 +369,9 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
+ 	struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
+ 	unsigned long bytes = vm_dirty_bytes;
+ 	unsigned long bg_bytes = dirty_background_bytes;
+-	unsigned long ratio = vm_dirty_ratio;
+-	unsigned long bg_ratio = dirty_background_ratio;
++	/* convert ratios to per-PAGE_SIZE for higher precision */
++	unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
++	unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
+ 	unsigned long thresh;
+ 	unsigned long bg_thresh;
+ 	struct task_struct *tsk;
+@@ -382,26 +383,28 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
+ 		/*
+ 		 * The byte settings can't be applied directly to memcg
+ 		 * domains.  Convert them to ratios by scaling against
+-		 * globally available memory.
++		 * globally available memory.  As the ratios are in
++		 * per-PAGE_SIZE, they can be obtained by dividing bytes by
++		 * number of pages.
+ 		 */
+ 		if (bytes)
+-			ratio = min(DIV_ROUND_UP(bytes, PAGE_SIZE) * 100 /
+-				    global_avail, 100UL);
++			ratio = min(DIV_ROUND_UP(bytes, global_avail),
++				    PAGE_SIZE);
+ 		if (bg_bytes)
+-			bg_ratio = min(DIV_ROUND_UP(bg_bytes, PAGE_SIZE) * 100 /
+-				       global_avail, 100UL);
++			bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
++				       PAGE_SIZE);
+ 		bytes = bg_bytes = 0;
+ 	}
+ 
+ 	if (bytes)
+ 		thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
+ 	else
+-		thresh = (ratio * available_memory) / 100;
++		thresh = (ratio * available_memory) / PAGE_SIZE;
+ 
+ 	if (bg_bytes)
+ 		bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
+ 	else
+-		bg_thresh = (bg_ratio * available_memory) / 100;
++		bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
+ 
+ 	if (bg_thresh >= thresh)
+ 		bg_thresh = thresh / 2;
+diff --git a/mm/percpu.c b/mm/percpu.c
+index 0c59684f1ff2..9903830aaebb 100644
+--- a/mm/percpu.c
++++ b/mm/percpu.c
+@@ -112,7 +112,7 @@ struct pcpu_chunk {
+ 	int			map_used;	/* # of map entries used before the sentry */
+ 	int			map_alloc;	/* # of map entries allocated */
+ 	int			*map;		/* allocation map */
+-	struct work_struct	map_extend_work;/* async ->map[] extension */
++	struct list_head	map_extend_list;/* on pcpu_map_extend_chunks */
+ 
+ 	void			*data;		/* chunk data */
+ 	int			first_free;	/* no free below this */
+@@ -162,10 +162,13 @@ static struct pcpu_chunk *pcpu_reserved_chunk;
+ static int pcpu_reserved_chunk_limit;
+ 
+ static DEFINE_SPINLOCK(pcpu_lock);	/* all internal data structures */
+-static DEFINE_MUTEX(pcpu_alloc_mutex);	/* chunk create/destroy, [de]pop */
++static DEFINE_MUTEX(pcpu_alloc_mutex);	/* chunk create/destroy, [de]pop, map ext */
+ 
+ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
+ 
++/* chunks which need their map areas extended, protected by pcpu_lock */
++static LIST_HEAD(pcpu_map_extend_chunks);
++
+ /*
+  * The number of empty populated pages, protected by pcpu_lock.  The
+  * reserved chunk doesn't contribute to the count.
+@@ -395,13 +398,19 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
+ {
+ 	int margin, new_alloc;
+ 
++	lockdep_assert_held(&pcpu_lock);
++
+ 	if (is_atomic) {
+ 		margin = 3;
+ 
+ 		if (chunk->map_alloc <
+-		    chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW &&
+-		    pcpu_async_enabled)
+-			schedule_work(&chunk->map_extend_work);
++		    chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) {
++			if (list_empty(&chunk->map_extend_list)) {
++				list_add_tail(&chunk->map_extend_list,
++					      &pcpu_map_extend_chunks);
++				pcpu_schedule_balance_work();
++			}
++		}
+ 	} else {
+ 		margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
+ 	}
+@@ -435,6 +444,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
+ 	size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
+ 	unsigned long flags;
+ 
++	lockdep_assert_held(&pcpu_alloc_mutex);
++
+ 	new = pcpu_mem_zalloc(new_size);
+ 	if (!new)
+ 		return -ENOMEM;
+@@ -467,20 +478,6 @@ out_unlock:
+ 	return 0;
+ }
+ 
+-static void pcpu_map_extend_workfn(struct work_struct *work)
+-{
+-	struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk,
+-						map_extend_work);
+-	int new_alloc;
+-
+-	spin_lock_irq(&pcpu_lock);
+-	new_alloc = pcpu_need_to_extend(chunk, false);
+-	spin_unlock_irq(&pcpu_lock);
+-
+-	if (new_alloc)
+-		pcpu_extend_area_map(chunk, new_alloc);
+-}
+-
+ /**
+  * pcpu_fit_in_area - try to fit the requested allocation in a candidate area
+  * @chunk: chunk the candidate area belongs to
+@@ -740,7 +737,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
+ 	chunk->map_used = 1;
+ 
+ 	INIT_LIST_HEAD(&chunk->list);
+-	INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn);
++	INIT_LIST_HEAD(&chunk->map_extend_list);
+ 	chunk->free_size = pcpu_unit_size;
+ 	chunk->contig_hint = pcpu_unit_size;
+ 
+@@ -895,6 +892,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
+ 		return NULL;
+ 	}
+ 
++	if (!is_atomic)
++		mutex_lock(&pcpu_alloc_mutex);
++
+ 	spin_lock_irqsave(&pcpu_lock, flags);
+ 
+ 	/* serve reserved allocations from the reserved chunk if available */
+@@ -967,12 +967,9 @@ restart:
+ 	if (is_atomic)
+ 		goto fail;
+ 
+-	mutex_lock(&pcpu_alloc_mutex);
+-
+ 	if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
+ 		chunk = pcpu_create_chunk();
+ 		if (!chunk) {
+-			mutex_unlock(&pcpu_alloc_mutex);
+ 			err = "failed to allocate new chunk";
+ 			goto fail;
+ 		}
+@@ -983,7 +980,6 @@ restart:
+ 		spin_lock_irqsave(&pcpu_lock, flags);
+ 	}
+ 
+-	mutex_unlock(&pcpu_alloc_mutex);
+ 	goto restart;
+ 
+ area_found:
+@@ -993,8 +989,6 @@ area_found:
+ 	if (!is_atomic) {
+ 		int page_start, page_end, rs, re;
+ 
+-		mutex_lock(&pcpu_alloc_mutex);
+-
+ 		page_start = PFN_DOWN(off);
+ 		page_end = PFN_UP(off + size);
+ 
+@@ -1005,7 +999,6 @@ area_found:
+ 
+ 			spin_lock_irqsave(&pcpu_lock, flags);
+ 			if (ret) {
+-				mutex_unlock(&pcpu_alloc_mutex);
+ 				pcpu_free_area(chunk, off, &occ_pages);
+ 				err = "failed to populate";
+ 				goto fail_unlock;
+@@ -1045,6 +1038,8 @@ fail:
+ 		/* see the flag handling in pcpu_blance_workfn() */
+ 		pcpu_atomic_alloc_failed = true;
+ 		pcpu_schedule_balance_work();
++	} else {
++		mutex_unlock(&pcpu_alloc_mutex);
+ 	}
+ 	return NULL;
+ }
+@@ -1129,6 +1124,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
+ 		if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
+ 			continue;
+ 
++		list_del_init(&chunk->map_extend_list);
+ 		list_move(&chunk->list, &to_free);
+ 	}
+ 
+@@ -1146,6 +1142,25 @@ static void pcpu_balance_workfn(struct work_struct *work)
+ 		pcpu_destroy_chunk(chunk);
+ 	}
+ 
++	/* service chunks which requested async area map extension */
++	do {
++		int new_alloc = 0;
++
++		spin_lock_irq(&pcpu_lock);
++
++		chunk = list_first_entry_or_null(&pcpu_map_extend_chunks,
++					struct pcpu_chunk, map_extend_list);
++		if (chunk) {
++			list_del_init(&chunk->map_extend_list);
++			new_alloc = pcpu_need_to_extend(chunk, false);
++		}
++
++		spin_unlock_irq(&pcpu_lock);
++
++		if (new_alloc)
++			pcpu_extend_area_map(chunk, new_alloc);
++	} while (chunk);
++
+ 	/*
+ 	 * Ensure there are certain number of free populated pages for
+ 	 * atomic allocs.  Fill up from the most packed so that atomic
+@@ -1644,7 +1659,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+ 	 */
+ 	schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
+ 	INIT_LIST_HEAD(&schunk->list);
+-	INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn);
++	INIT_LIST_HEAD(&schunk->map_extend_list);
+ 	schunk->base_addr = base_addr;
+ 	schunk->map = smap;
+ 	schunk->map_alloc = ARRAY_SIZE(smap);
+@@ -1673,7 +1688,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+ 	if (dyn_size) {
+ 		dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
+ 		INIT_LIST_HEAD(&dchunk->list);
+-		INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn);
++		INIT_LIST_HEAD(&dchunk->map_extend_list);
+ 		dchunk->base_addr = base_addr;
+ 		dchunk->map = dmap;
+ 		dchunk->map_alloc = ARRAY_SIZE(dmap);
+diff --git a/mm/shmem.c b/mm/shmem.c
+index 719bd6b88d98..9ca09f52fef5 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2236,9 +2236,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
+ 									NULL);
+ 		if (error) {
+ 			/* Remove the !PageUptodate pages we added */
+-			shmem_undo_range(inode,
+-				(loff_t)start << PAGE_SHIFT,
+-				(loff_t)index << PAGE_SHIFT, true);
++			if (index > start) {
++				shmem_undo_range(inode,
++				    (loff_t)start << PAGE_SHIFT,
++				    ((loff_t)index << PAGE_SHIFT) - 1, true);
++			}
+ 			goto undone;
+ 		}
+ 
+diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
+index a669dea146c6..61ad43f61c5e 100644
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
+ }
+ EXPORT_SYMBOL(make_flow_keys_digest);
+ 
++static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
++
++u32 __skb_get_hash_symmetric(struct sk_buff *skb)
++{
++	struct flow_keys keys;
++
++	__flow_hash_secret_init();
++
++	memset(&keys, 0, sizeof(keys));
++	__skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
++			   NULL, 0, 0, 0,
++			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
++
++	return __flow_hash_from_keys(&keys, hashrnd);
++}
++EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
++
+ /**
+  * __skb_get_hash: calculate a flow hash
+  * @skb: sk_buff to calculate flow hash from
+@@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
+ 	},
+ };
+ 
++static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
++	{
++		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
++		.offset = offsetof(struct flow_keys, control),
++	},
++	{
++		.key_id = FLOW_DISSECTOR_KEY_BASIC,
++		.offset = offsetof(struct flow_keys, basic),
++	},
++	{
++		.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
++		.offset = offsetof(struct flow_keys, addrs.v4addrs),
++	},
++	{
++		.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
++		.offset = offsetof(struct flow_keys, addrs.v6addrs),
++	},
++	{
++		.key_id = FLOW_DISSECTOR_KEY_PORTS,
++		.offset = offsetof(struct flow_keys, ports),
++	},
++};
++
+ static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+ 	{
+ 		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
+@@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void)
+ 	skb_flow_dissector_init(&flow_keys_dissector,
+ 				flow_keys_dissector_keys,
+ 				ARRAY_SIZE(flow_keys_dissector_keys));
++	skb_flow_dissector_init(&flow_keys_dissector_symmetric,
++				flow_keys_dissector_symmetric_keys,
++				ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
+ 	skb_flow_dissector_init(&flow_keys_buf_dissector,
+ 				flow_keys_buf_dissector_keys,
+ 				ARRAY_SIZE(flow_keys_buf_dissector_keys));
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index e561f9f07d6d..59bf4d77154f 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -3016,24 +3016,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
+ EXPORT_SYMBOL_GPL(skb_append_pagefrags);
+ 
+ /**
+- *	skb_push_rcsum - push skb and update receive checksum
+- *	@skb: buffer to update
+- *	@len: length of data pulled
+- *
+- *	This function performs an skb_push on the packet and updates
+- *	the CHECKSUM_COMPLETE checksum.  It should be used on
+- *	receive path processing instead of skb_push unless you know
+- *	that the checksum difference is zero (e.g., a valid IP header)
+- *	or you are setting ip_summed to CHECKSUM_NONE.
+- */
+-static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
+-{
+-	skb_push(skb, len);
+-	skb_postpush_rcsum(skb, skb->data, len);
+-	return skb->data;
+-}
+-
+-/**
+  *	skb_pull_rcsum - pull skb and update receive checksum
+  *	@skb: buffer to update
+  *	@len: length of data pulled
+diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+index ea071fad67a0..c26fac26b23c 100644
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+ 		}
+ 	}
+ 
++	free_percpu(non_pcpu_rt->rt6i_pcpu);
+ 	non_pcpu_rt->rt6i_pcpu = NULL;
+ }
+ 
+diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
+index d32cefcb63b0..34a5712d467f 100644
+--- a/net/mac80211/mesh.c
++++ b/net/mac80211/mesh.c
+@@ -150,19 +150,26 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
+ void mesh_sta_cleanup(struct sta_info *sta)
+ {
+ 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+-	u32 changed;
++	u32 changed = 0;
+ 
+ 	/*
+ 	 * maybe userspace handles peer allocation and peering, but in either
+ 	 * case the beacon is still generated by the kernel and we might need
+ 	 * an update.
+ 	 */
+-	changed = mesh_accept_plinks_update(sdata);
++	if (sdata->u.mesh.user_mpm &&
++	    sta->mesh->plink_state == NL80211_PLINK_ESTAB)
++		changed |= mesh_plink_dec_estab_count(sdata);
++	changed |= mesh_accept_plinks_update(sdata);
+ 	if (!sdata->u.mesh.user_mpm) {
+ 		changed |= mesh_plink_deactivate(sta);
+ 		del_timer_sync(&sta->mesh->plink_timer);
+ 	}
+ 
++	/* make sure no readers can access nexthop sta from here on */
++	mesh_path_flush_by_nexthop(sta);
++	synchronize_net();
++
+ 	if (changed)
+ 		ieee80211_mbss_info_change_notify(sdata, changed);
+ }
+diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
+index 62193f4bc37b..ba7ce53ec615 100644
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -275,7 +275,7 @@ struct ieee80211_fast_tx {
+ 	u8 sa_offs, da_offs, pn_offs;
+ 	u8 band;
+ 	u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV +
+-	       sizeof(rfc1042_header)];
++	       sizeof(rfc1042_header)] __aligned(2);
+ 
+ 	struct rcu_head rcu_head;
+ };
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index 18d0becbc46d..8012f67ca5ae 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1340,7 +1340,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
+ 				      struct sk_buff *skb,
+ 				      unsigned int num)
+ {
+-	return reciprocal_scale(skb_get_hash(skb), num);
++	return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
+ }
+ 
+ static unsigned int fanout_demux_lb(struct packet_fanout *f,
+diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
+index 8f3948dd38b8..934336e12a65 100644
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -180,7 +180,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
+ 
+ 	if (!(at & AT_EGRESS)) {
+ 		if (m->tcfm_ok_push)
+-			skb_push(skb2, skb->mac_len);
++			skb_push_rcsum(skb2, skb->mac_len);
+ 	}
+ 
+ 	/* mirror is always swallowed */
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index 7e0c9bf22df8..837dd910a252 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -446,16 +446,27 @@ out_no_rpciod:
+ 	return ERR_PTR(err);
+ }
+ 
+-struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
++static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+ 					struct rpc_xprt *xprt)
+ {
+ 	struct rpc_clnt *clnt = NULL;
+ 	struct rpc_xprt_switch *xps;
+ 
+-	xps = xprt_switch_alloc(xprt, GFP_KERNEL);
+-	if (xps == NULL)
+-		return ERR_PTR(-ENOMEM);
+-
++	if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) {
++		WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
++		xps = args->bc_xprt->xpt_bc_xps;
++		xprt_switch_get(xps);
++	} else {
++		xps = xprt_switch_alloc(xprt, GFP_KERNEL);
++		if (xps == NULL) {
++			xprt_put(xprt);
++			return ERR_PTR(-ENOMEM);
++		}
++		if (xprt->bc_xprt) {
++			xprt_switch_get(xps);
++			xprt->bc_xprt->xpt_bc_xps = xps;
++		}
++	}
+ 	clnt = rpc_new_client(args, xps, xprt, NULL);
+ 	if (IS_ERR(clnt))
+ 		return clnt;
+@@ -483,7 +494,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+ 
+ 	return clnt;
+ }
+-EXPORT_SYMBOL_GPL(rpc_create_xprt);
+ 
+ /**
+  * rpc_create - create an RPC client and transport with one call
+@@ -509,6 +519,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
+ 	};
+ 	char servername[48];
+ 
++	if (args->bc_xprt) {
++		WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
++		xprt = args->bc_xprt->xpt_bc_xprt;
++		if (xprt) {
++			xprt_get(xprt);
++			return rpc_create_xprt(args, xprt);
++		}
++	}
++
+ 	if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
+ 		xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
+ 	if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 7422f28818b2..7231cb413a2c 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -136,6 +136,8 @@ static void svc_xprt_free(struct kref *kref)
+ 	/* See comment on corresponding get in xs_setup_bc_tcp(): */
+ 	if (xprt->xpt_bc_xprt)
+ 		xprt_put(xprt->xpt_bc_xprt);
++	if (xprt->xpt_bc_xps)
++		xprt_switch_put(xprt->xpt_bc_xps);
+ 	xprt->xpt_ops->xpo_free(xprt);
+ 	module_put(owner);
+ }
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index 65e759569e48..e9e5dd0dc8f4 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -3050,6 +3050,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
+ 		return xprt;
+ 
+ 	args->bc_xprt->xpt_bc_xprt = NULL;
++	args->bc_xprt->xpt_bc_xps = NULL;
+ 	xprt_put(xprt);
+ 	ret = ERR_PTR(-EINVAL);
+ out_err:
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 8269da73e9e5..7748199b3568 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
+ 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
+ 		struct dentry *dentry = unix_sk(s)->path.dentry;
+ 
+-		if (dentry && d_backing_inode(dentry) == i) {
++		if (dentry && d_real_inode(dentry) == i) {
+ 			sock_hold(s);
+ 			goto found;
+ 		}
+@@ -911,7 +911,7 @@ static struct sock *unix_find_other(struct net *net,
+ 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
+ 		if (err)
+ 			goto fail;
+-		inode = d_backing_inode(path.dentry);
++		inode = d_real_inode(path.dentry);
+ 		err = inode_permission(inode, MAY_WRITE);
+ 		if (err)
+ 			goto put_fail;
+@@ -1048,7 +1048,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ 			goto out_up;
+ 		}
+ 		addr->hash = UNIX_HASH_SIZE;
+-		hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
++		hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ 		spin_lock(&unix_table_lock);
+ 		u->path = u_path;
+ 		list = &unix_socket_table[hash];
+diff --git a/net/wireless/core.c b/net/wireless/core.c
+index 9f1c4aa851ef..c878045d146a 100644
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -360,8 +360,6 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
+ 	WARN_ON(ops->remain_on_channel && !ops->cancel_remain_on_channel);
+ 	WARN_ON(ops->tdls_channel_switch && !ops->tdls_cancel_channel_switch);
+ 	WARN_ON(ops->add_tx_ts && !ops->del_tx_ts);
+-	WARN_ON(ops->set_tx_power && !ops->get_tx_power);
+-	WARN_ON(ops->set_antenna && !ops->get_antenna);
+ 
+ 	alloc_size = sizeof(*rdev) + sizeof_priv;
+ 
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 9f440a9de63b..47b917841623 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -509,7 +509,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
+ 		 * replace EtherType */
+ 		hdrlen += ETH_ALEN + 2;
+ 	else
+-		tmp.h_proto = htons(skb->len);
++		tmp.h_proto = htons(skb->len - hdrlen);
+ 
+ 	pskb_pull(skb, hdrlen);
+ 
+diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
+index a9155077feef..fec75786f75b 100644
+--- a/scripts/mod/file2alias.c
++++ b/scripts/mod/file2alias.c
+@@ -384,7 +384,7 @@ static void do_of_entry_multi(void *symval, struct module *mod)
+ 	len = sprintf(alias, "of:N%sT%s", (*name)[0] ? *name : "*",
+ 		      (*type)[0] ? *type : "*");
+ 
+-	if (compatible[0])
++	if ((*compatible)[0])
+ 		sprintf(&alias[len], "%sC%s", (*type)[0] ? "*" : "",
+ 			*compatible);
+ 
+diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
+index dec607c17b64..5ee820111027 100644
+--- a/security/apparmor/lsm.c
++++ b/security/apparmor/lsm.c
+@@ -523,34 +523,34 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
+ {
+ 	struct common_audit_data sa;
+ 	struct apparmor_audit_data aad = {0,};
+-	char *command, *args = value;
++	char *command, *largs = NULL, *args = value;
+ 	size_t arg_size;
+ 	int error;
+ 
+ 	if (size == 0)
+ 		return -EINVAL;
+-	/* args points to a PAGE_SIZE buffer, AppArmor requires that
+-	 * the buffer must be null terminated or have size <= PAGE_SIZE -1
+-	 * so that AppArmor can null terminate them
+-	 */
+-	if (args[size - 1] != '\0') {
+-		if (size == PAGE_SIZE)
+-			return -EINVAL;
+-		args[size] = '\0';
+-	}
+-
+ 	/* task can only write its own attributes */
+ 	if (current != task)
+ 		return -EACCES;
+ 
+-	args = value;
++	/* AppArmor requires that the buffer must be null terminated atm */
++	if (args[size - 1] != '\0') {
++		/* null terminate */
++		largs = args = kmalloc(size + 1, GFP_KERNEL);
++		if (!args)
++			return -ENOMEM;
++		memcpy(args, value, size);
++		args[size] = '\0';
++	}
++
++	error = -EINVAL;
+ 	args = strim(args);
+ 	command = strsep(&args, " ");
+ 	if (!args)
+-		return -EINVAL;
++		goto out;
+ 	args = skip_spaces(args);
+ 	if (!*args)
+-		return -EINVAL;
++		goto out;
+ 
+ 	arg_size = size - (args - (char *) value);
+ 	if (strcmp(name, "current") == 0) {
+@@ -576,10 +576,12 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
+ 			goto fail;
+ 	} else
+ 		/* only support the "current" and "exec" process attributes */
+-		return -EINVAL;
++		goto fail;
+ 
+ 	if (!error)
+ 		error = size;
++out:
++	kfree(largs);
+ 	return error;
+ 
+ fail:
+@@ -588,9 +590,9 @@ fail:
+ 	aad.profile = aa_current_profile();
+ 	aad.op = OP_SETPROCATTR;
+ 	aad.info = name;
+-	aad.error = -EINVAL;
++	aad.error = error = -EINVAL;
+ 	aa_audit_msg(AUDIT_APPARMOR_DENIED, &sa, NULL);
+-	return -EINVAL;
++	goto out;
+ }
+ 
+ static int apparmor_task_setrlimit(struct task_struct *task,
+diff --git a/security/keys/key.c b/security/keys/key.c
+index b28755131687..af7f6821d26b 100644
+--- a/security/keys/key.c
++++ b/security/keys/key.c
+@@ -584,7 +584,7 @@ int key_reject_and_link(struct key *key,
+ 
+ 	mutex_unlock(&key_construction_mutex);
+ 
+-	if (keyring)
++	if (keyring && link_ret == 0)
+ 		__key_link_end(keyring, &key->index_key, edit);
+ 
+ 	/* wake up anyone waiting for a key to be constructed */
+diff --git a/sound/core/control.c b/sound/core/control.c
+index a85d45595d02..b4fe9b002512 100644
+--- a/sound/core/control.c
++++ b/sound/core/control.c
+@@ -160,6 +160,8 @@ void snd_ctl_notify(struct snd_card *card, unsigned int mask,
+ 	
+ 	if (snd_BUG_ON(!card || !id))
+ 		return;
++	if (card->shutdown)
++		return;
+ 	read_lock(&card->ctl_files_rwlock);
+ #if IS_ENABLED(CONFIG_SND_MIXER_OSS)
+ 	card->mixer_oss_change_count++;
+diff --git a/sound/core/pcm.c b/sound/core/pcm.c
+index 308c9ecf73db..8e980aa678d0 100644
+--- a/sound/core/pcm.c
++++ b/sound/core/pcm.c
+@@ -849,6 +849,14 @@ int snd_pcm_new_internal(struct snd_card *card, const char *id, int device,
+ }
+ EXPORT_SYMBOL(snd_pcm_new_internal);
+ 
++static void free_chmap(struct snd_pcm_str *pstr)
++{
++	if (pstr->chmap_kctl) {
++		snd_ctl_remove(pstr->pcm->card, pstr->chmap_kctl);
++		pstr->chmap_kctl = NULL;
++	}
++}
++
+ static void snd_pcm_free_stream(struct snd_pcm_str * pstr)
+ {
+ 	struct snd_pcm_substream *substream, *substream_next;
+@@ -871,6 +879,7 @@ static void snd_pcm_free_stream(struct snd_pcm_str * pstr)
+ 		kfree(setup);
+ 	}
+ #endif
++	free_chmap(pstr);
+ 	if (pstr->substream_count)
+ 		put_device(&pstr->dev);
+ }
+@@ -1135,10 +1144,7 @@ static int snd_pcm_dev_disconnect(struct snd_device *device)
+ 	for (cidx = 0; cidx < 2; cidx++) {
+ 		if (!pcm->internal)
+ 			snd_unregister_device(&pcm->streams[cidx].dev);
+-		if (pcm->streams[cidx].chmap_kctl) {
+-			snd_ctl_remove(pcm->card, pcm->streams[cidx].chmap_kctl);
+-			pcm->streams[cidx].chmap_kctl = NULL;
+-		}
++		free_chmap(&pcm->streams[cidx]);
+ 	}
+ 	mutex_unlock(&pcm->open_mutex);
+ 	mutex_unlock(&register_mutex);
+diff --git a/sound/core/timer.c b/sound/core/timer.c
+index 6469bedda2f3..23b73f6ac040 100644
+--- a/sound/core/timer.c
++++ b/sound/core/timer.c
+@@ -1954,6 +1954,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
+ 
+ 		qhead = tu->qhead++;
+ 		tu->qhead %= tu->queue_size;
++		tu->qused--;
+ 		spin_unlock_irq(&tu->qlock);
+ 
+ 		if (tu->tread) {
+@@ -1967,7 +1968,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
+ 		}
+ 
+ 		spin_lock_irq(&tu->qlock);
+-		tu->qused--;
+ 		if (err < 0)
+ 			goto _error;
+ 		result += unit;
+diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
+index c0f8f613f1f1..172dacd925f5 100644
+--- a/sound/drivers/dummy.c
++++ b/sound/drivers/dummy.c
+@@ -420,6 +420,7 @@ static int dummy_hrtimer_stop(struct snd_pcm_substream *substream)
+ 
+ static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm)
+ {
++	hrtimer_cancel(&dpcm->timer);
+ 	tasklet_kill(&dpcm->tasklet);
+ }
+ 
+diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
+index 87041ddd29cb..47a358fab132 100644
+--- a/sound/hda/hdac_regmap.c
++++ b/sound/hda/hdac_regmap.c
+@@ -444,7 +444,7 @@ int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg,
+ 	err = reg_raw_write(codec, reg, val);
+ 	if (err == -EAGAIN) {
+ 		err = snd_hdac_power_up_pm(codec);
+-		if (!err)
++		if (err >= 0)
+ 			err = reg_raw_write(codec, reg, val);
+ 		snd_hdac_power_down_pm(codec);
+ 	}
+@@ -470,7 +470,7 @@ static int __snd_hdac_regmap_read_raw(struct hdac_device *codec,
+ 	err = reg_raw_read(codec, reg, val, uncached);
+ 	if (err == -EAGAIN) {
+ 		err = snd_hdac_power_up_pm(codec);
+-		if (!err)
++		if (err >= 0)
+ 			err = reg_raw_read(codec, reg, val, uncached);
+ 		snd_hdac_power_down_pm(codec);
+ 	}
+diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
+index 4667c3232b7f..74177189063c 100644
+--- a/sound/pci/au88x0/au88x0_core.c
++++ b/sound/pci/au88x0/au88x0_core.c
+@@ -1444,9 +1444,8 @@ static int vortex_wtdma_bufshift(vortex_t * vortex, int wtdma)
+ 	int page, p, pp, delta, i;
+ 
+ 	page =
+-	    (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) &
+-	     WT_SUBBUF_MASK)
+-	    >> WT_SUBBUF_SHIFT;
++	    (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2))
++	     >> WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK;
+ 	if (dma->nr_periods >= 4)
+ 		delta = (page - dma->period_real) & 3;
+ 	else {
+diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c
+index 1cb85aeb0cea..286f5e3686a3 100644
+--- a/sound/pci/echoaudio/echoaudio.c
++++ b/sound/pci/echoaudio/echoaudio.c
+@@ -2200,11 +2200,11 @@ static int snd_echo_resume(struct device *dev)
+ 	u32 pipe_alloc_mask;
+ 	int err;
+ 
+-	commpage_bak = kmalloc(sizeof(struct echoaudio), GFP_KERNEL);
++	commpage_bak = kmalloc(sizeof(*commpage), GFP_KERNEL);
+ 	if (commpage_bak == NULL)
+ 		return -ENOMEM;
+ 	commpage = chip->comm_page;
+-	memcpy(commpage_bak, commpage, sizeof(struct comm_page));
++	memcpy(commpage_bak, commpage, sizeof(*commpage));
+ 
+ 	err = init_hw(chip, chip->pci->device, chip->pci->subsystem_device);
+ 	if (err < 0) {
+diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
+index dfaf1a93fb8a..d77cc76aadab 100644
+--- a/sound/pci/hda/hda_generic.c
++++ b/sound/pci/hda/hda_generic.c
+@@ -3977,6 +3977,8 @@ static hda_nid_t set_path_power(struct hda_codec *codec, hda_nid_t nid,
+ 
+ 	for (n = 0; n < spec->paths.used; n++) {
+ 		path = snd_array_elem(&spec->paths, n);
++		if (!path->depth)
++			continue;
+ 		if (path->path[0] == nid ||
+ 		    path->path[path->depth - 1] == nid) {
+ 			bool pin_old = path->pin_enabled;
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index 94089fc71884..6f8ea13323c1 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -367,9 +367,10 @@ enum {
+ #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
+ #define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171)
+ #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
++#define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
+ #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+ #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
+-			IS_KBL(pci) || IS_KBL_LP(pci)
++			IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)
+ 
+ static char *driver_short_names[] = {
+ 	[AZX_DRIVER_ICH] = "HDA Intel",
+@@ -1217,8 +1218,10 @@ static int azx_free(struct azx *chip)
+ 	if (use_vga_switcheroo(hda)) {
+ 		if (chip->disabled && hda->probe_continued)
+ 			snd_hda_unlock_devices(&chip->bus);
+-		if (hda->vga_switcheroo_registered)
++		if (hda->vga_switcheroo_registered) {
+ 			vga_switcheroo_unregister_client(chip->pci);
++			vga_switcheroo_fini_domain_pm_ops(chip->card->dev);
++		}
+ 	}
+ 
+ 	if (bus->chip_init) {
+@@ -2190,6 +2193,9 @@ static const struct pci_device_id azx_ids[] = {
+ 	/* Kabylake-LP */
+ 	{ PCI_DEVICE(0x8086, 0x9d71),
+ 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
++	/* Kabylake-H */
++	{ PCI_DEVICE(0x8086, 0xa2f0),
++	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+ 	/* Broxton-P(Apollolake) */
+ 	{ PCI_DEVICE(0x8086, 0x5a98),
+ 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
+@@ -2263,6 +2269,8 @@ static const struct pci_device_id azx_ids[] = {
+ 	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+ 	{ PCI_DEVICE(0x1002, 0x157a),
+ 	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
++	{ PCI_DEVICE(0x1002, 0x15b3),
++	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+ 	{ PCI_DEVICE(0x1002, 0x793b),
+ 	  .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI },
+ 	{ PCI_DEVICE(0x1002, 0x7919),
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 0fe18ede3e85..abcb5a6a1cd9 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -5650,6 +5650,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
++	SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
++	SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460),
++	SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
+ 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+ 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
+ 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
+@@ -5735,7 +5738,6 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
+ 	{}
+ };
+ #define ALC225_STANDARD_PINS \
+-	{0x12, 0xb7a60130}, \
+ 	{0x21, 0x04211020}
+ 
+ #define ALC256_STANDARD_PINS \
+@@ -5760,10 +5762,24 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
+ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ 	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+ 		ALC225_STANDARD_PINS,
++		{0x12, 0xb7a60130},
+ 		{0x14, 0x901701a0}),
+ 	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+ 		ALC225_STANDARD_PINS,
++		{0x12, 0xb7a60130},
+ 		{0x14, 0x901701b0}),
++	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
++		ALC225_STANDARD_PINS,
++		{0x12, 0xb7a60150},
++		{0x14, 0x901701a0}),
++	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
++		ALC225_STANDARD_PINS,
++		{0x12, 0xb7a60150},
++		{0x14, 0x901701b0}),
++	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
++		ALC225_STANDARD_PINS,
++		{0x12, 0xb7a60130},
++		{0x1b, 0x90170110}),
+ 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
+ 		{0x14, 0x90170110},
+ 		{0x21, 0x02211020}),
+@@ -5832,6 +5848,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
+ 		{0x14, 0x90170120},
+ 		{0x21, 0x02211030}),
+ 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++		{0x12, 0x90a60170},
++		{0x14, 0x90170120},
++		{0x21, 0x02211030}),
++	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ 		ALC256_STANDARD_PINS),
+ 	SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
+ 		{0x12, 0x90a60130},
+diff --git a/sound/usb/card.c b/sound/usb/card.c
+index 3fc63583a537..2d493501b7f6 100644
+--- a/sound/usb/card.c
++++ b/sound/usb/card.c
+@@ -552,7 +552,6 @@ static int usb_audio_probe(struct usb_interface *intf,
+ 				goto __error;
+ 			}
+ 			chip = usb_chip[i];
+-			dev_set_drvdata(&dev->dev, chip);
+ 			atomic_inc(&chip->active); /* avoid autopm */
+ 			break;
+ 		}
+@@ -578,6 +577,7 @@ static int usb_audio_probe(struct usb_interface *intf,
+ 			goto __error;
+ 		}
+ 	}
++	dev_set_drvdata(&dev->dev, chip);
+ 
+ 	/*
+ 	 * For devices with more than one control interface, we assume the
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 4fd482fb9260..7cb12249baa5 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -2868,7 +2868,7 @@ static long kvm_vm_ioctl(struct file *filp,
+ 		if (copy_from_user(&routing, argp, sizeof(routing)))
+ 			goto out;
+ 		r = -EINVAL;
+-		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
++		if (routing.nr > KVM_MAX_IRQ_ROUTES)
+ 			goto out;
+ 		if (routing.flags)
+ 			goto out;


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-07-27 23:52 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-07-27 23:52 UTC (permalink / raw
  To: gentoo-commits

commit:     c8839f5a116f5e0d27d587a09b40e1cc668c9885
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Wed Jul 27 23:51:56 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Wed Jul 27 23:51:56 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=c8839f5a

Add BFQ patches for 4.6.X: http://algogroup.unimore.it/people/paolo/disk_sched/patches/4.6.0-v8/

 0000_README                                        |   16 +
 ...oups-kconfig-build-bits-for-BFQ-v7r11-4.6.patch |  103 +
 ...ntroduce-the-BFQ-v7r11-I-O-sched-for-4.6.patch1 | 7097 ++++++++++++++++++++
 ...arly-Queue-Merge-EQM-to-BFQ-v7r11-for-4.6.patch | 1101 +++
 ...rn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2 | 6361 ++++++++++++++++++
 5 files changed, 14678 insertions(+)

diff --git a/0000_README b/0000_README
index 67da565..9e42d11 100644
--- a/0000_README
+++ b/0000_README
@@ -91,6 +91,22 @@ Patch:  5000_enable-additional-cpu-optimizations-for-gcc.patch
 From:   https://github.com/graysky2/kernel_gcc_patch/
 Desc:   Kernel patch enables gcc < v4.9 optimizations for additional CPUs.
 
+Patch:  5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.6.patch
+From:   http://algo.ing.unimo.it/people/paolo/disk_sched/
+Desc:   BFQ v7r11 patch 1 for 4.6: Build, cgroups and kconfig bits
+
+Patch:  5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.6.patch1
+From:   http://algo.ing.unimo.it/people/paolo/disk_sched/
+Desc:   BFQ v7r11 patch 2 for 4.6: BFQ Scheduler
+
+Patch:  5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.6.patch
+From:   http://algo.ing.unimo.it/people/paolo/disk_sched/
+Desc:   BFQ v7r11 patch 3 for 4.6: Early Queue Merge (EQM)
+
+Patch:  5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2
+From:   http://algo.ing.unimo.it/people/paolo/disk_sched/
+Desc:   BFQ v7r11 patch 4 for 4.7: Early Queue Merge (EQM)
+
 Patch:  5010_enable-additional-cpu-optimizations-for-gcc-4.9.patch
 From:   https://github.com/graysky2/kernel_gcc_patch/
 Desc:   Kernel patch enables gcc >= v4.9 optimizations for additional CPUs.

diff --git a/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.6.patch b/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.6.patch
new file mode 100644
index 0000000..ee3934f
--- /dev/null
+++ b/5001_block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.6.patch
@@ -0,0 +1,103 @@
+From 4cf5d043709bfe73b4553272706cb5beb8072301 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@unimore.it>
+Date: Tue, 7 Apr 2015 13:39:12 +0200
+Subject: [PATCH 1/4] block: cgroups, kconfig, build bits for BFQ-v7r11-4.6.0
+
+Update Kconfig.iosched and do the related Makefile changes to include
+kernel configuration options for BFQ. Also increase the number of
+policies supported by the blkio controller so that BFQ can add its
+own.
+
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini@google.com>
+---
+ block/Kconfig.iosched  | 32 ++++++++++++++++++++++++++++++++
+ block/Makefile         |  1 +
+ include/linux/blkdev.h |  2 +-
+ 3 files changed, 34 insertions(+), 1 deletion(-)
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 421bef9..0ee5f0f 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED
+ 	---help---
+ 	  Enable group IO scheduling in CFQ.
+ 
++config IOSCHED_BFQ
++	tristate "BFQ I/O scheduler"
++	default n
++	---help---
++	  The BFQ I/O scheduler tries to distribute bandwidth among
++	  all processes according to their weights.
++	  It aims at distributing the bandwidth as desired, independently of
++	  the disk parameters and with any workload. It also tries to
++	  guarantee low latency to interactive and soft real-time
++	  applications. If compiled built-in (saying Y here), BFQ can
++	  be configured to support hierarchical scheduling.
++
++config CGROUP_BFQIO
++	bool "BFQ hierarchical scheduling support"
++	depends on CGROUPS && IOSCHED_BFQ=y
++	default n
++	---help---
++	  Enable hierarchical scheduling in BFQ, using the cgroups
++	  filesystem interface.  The name of the subsystem will be
++	  bfqio.
++
+ choice
+ 	prompt "Default I/O scheduler"
+ 	default DEFAULT_CFQ
+@@ -52,6 +73,16 @@ choice
+ 	config DEFAULT_CFQ
+ 		bool "CFQ" if IOSCHED_CFQ=y
+ 
++	config DEFAULT_BFQ
++		bool "BFQ" if IOSCHED_BFQ=y
++		help
++		  Selects BFQ as the default I/O scheduler which will be
++		  used by default for all block devices.
++		  The BFQ I/O scheduler aims at distributing the bandwidth
++		  as desired, independently of the disk parameters and with
++		  any workload. It also tries to guarantee low latency to
++		  interactive and soft real-time applications.
++
+ 	config DEFAULT_NOOP
+ 		bool "No-op"
+ 
+@@ -61,6 +92,7 @@ config DEFAULT_IOSCHED
+ 	string
+ 	default "deadline" if DEFAULT_DEADLINE
+ 	default "cfq" if DEFAULT_CFQ
++	default "bfq" if DEFAULT_BFQ
+ 	default "noop" if DEFAULT_NOOP
+ 
+ endmenu
+diff --git a/block/Makefile b/block/Makefile
+index 9eda232..4a36683 100644
+--- a/block/Makefile
++++ b/block/Makefile
+@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
+ obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
+ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
+ obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
++obj-$(CONFIG_IOSCHED_BFQ)	+= bfq-iosched.o
+ 
+ obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
+ obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 669e419..119be87 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -45,7 +45,7 @@ struct pr_ops;
+  * Maximum number of blkcg policies allowed to be registered concurrently.
+  * Defined here to simplify include dependency.
+  */
+-#define BLKCG_MAX_POLS		2
++#define BLKCG_MAX_POLS		3
+ 
+ struct request;
+ typedef void (rq_end_io_fn)(struct request *, int);
+-- 
+1.9.1
+

diff --git a/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.6.patch1 b/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.6.patch1
new file mode 100644
index 0000000..c232a83
--- /dev/null
+++ b/5002_block-introduce-the-BFQ-v7r11-I-O-sched-for-4.6.patch1
@@ -0,0 +1,7097 @@
+From 75c0230fa4f82fb77e41cf60c06e22b5e07f7f97 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@unimore.it>
+Date: Thu, 9 May 2013 19:10:02 +0200
+Subject: [PATCH 2/4] block: introduce the BFQ-v7r11 I/O sched for 4.6.0
+
+The general structure is borrowed from CFQ, as much of the code for
+handling I/O contexts. Over time, several useful features have been
+ported from CFQ as well (details in the changelog in README.BFQ). A
+(bfq_)queue is associated to each task doing I/O on a device, and each
+time a scheduling decision has to be made a queue is selected and served
+until it expires.
+
+    - Slices are given in the service domain: tasks are assigned
+      budgets, measured in number of sectors. Once got the disk, a task
+      must however consume its assigned budget within a configurable
+      maximum time (by default, the maximum possible value of the
+      budgets is automatically computed to comply with this timeout).
+      This allows the desired latency vs "throughput boosting" tradeoff
+      to be set.
+
+    - Budgets are scheduled according to a variant of WF2Q+, implemented
+      using an augmented rb-tree to take eligibility into account while
+      preserving an O(log N) overall complexity.
+
+    - A low-latency tunable is provided; if enabled, both interactive
+      and soft real-time applications are guaranteed a very low latency.
+
+    - Latency guarantees are preserved also in the presence of NCQ.
+
+    - Also with flash-based devices, a high throughput is achieved
+      while still preserving latency guarantees.
+
+    - BFQ features Early Queue Merge (EQM), a sort of fusion of the
+      cooperating-queue-merging and the preemption mechanisms present
+      in CFQ. EQM is in fact a unified mechanism that tries to get a
+      sequential read pattern, and hence a high throughput, with any
+      set of processes performing interleaved I/O over a contiguous
+      sequence of sectors.
+
+    - BFQ supports full hierarchical scheduling, exporting a cgroups
+      interface.  Since each node has a full scheduler, each group can
+      be assigned its own weight.
+
+    - If the cgroups interface is not used, only I/O priorities can be
+      assigned to processes, with ioprio values mapped to weights
+      with the relation weight = IOPRIO_BE_NR - ioprio.
+
+    - ioprio classes are served in strict priority order, i.e., lower
+      priority queues are not served as long as there are higher
+      priority queues.  Among queues in the same class the bandwidth is
+      distributed in proportion to the weight of each queue. A very
+      thin extra bandwidth is however guaranteed to the Idle class, to
+      prevent it from starving.
+
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini@google.com>
+---
+ block/Kconfig.iosched |    6 +-
+ block/bfq-cgroup.c    | 1182 ++++++++++++++++
+ block/bfq-ioc.c       |   36 +
+ block/bfq-iosched.c   | 3754 +++++++++++++++++++++++++++++++++++++++++++++++++
+ block/bfq-sched.c     | 1200 ++++++++++++++++
+ block/bfq.h           |  801 +++++++++++
+ 6 files changed, 6975 insertions(+), 4 deletions(-)
+ create mode 100644 block/bfq-cgroup.c
+ create mode 100644 block/bfq-ioc.c
+ create mode 100644 block/bfq-iosched.c
+ create mode 100644 block/bfq-sched.c
+ create mode 100644 block/bfq.h
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 0ee5f0f..f78cd1a 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -51,14 +51,12 @@ config IOSCHED_BFQ
+ 	  applications. If compiled built-in (saying Y here), BFQ can
+ 	  be configured to support hierarchical scheduling.
+ 
+-config CGROUP_BFQIO
++config BFQ_GROUP_IOSCHED
+ 	bool "BFQ hierarchical scheduling support"
+ 	depends on CGROUPS && IOSCHED_BFQ=y
+ 	default n
+ 	---help---
+-	  Enable hierarchical scheduling in BFQ, using the cgroups
+-	  filesystem interface.  The name of the subsystem will be
+-	  bfqio.
++	  Enable hierarchical scheduling in BFQ, using the blkio controller.
+ 
+ choice
+ 	prompt "Default I/O scheduler"
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+new file mode 100644
+index 0000000..8610cd6
+--- /dev/null
++++ b/block/bfq-cgroup.c
+@@ -0,0 +1,1182 @@
++/*
++ * BFQ: CGROUPS support.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
++ * file.
++ */
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++
++/* bfqg stats flags */
++enum bfqg_stats_flags {
++	BFQG_stats_waiting = 0,
++	BFQG_stats_idling,
++	BFQG_stats_empty,
++};
++
++#define BFQG_FLAG_FNS(name)						\
++static void bfqg_stats_mark_##name(struct bfqg_stats *stats)	\
++{									\
++	stats->flags |= (1 << BFQG_stats_##name);			\
++}									\
++static void bfqg_stats_clear_##name(struct bfqg_stats *stats)	\
++{									\
++	stats->flags &= ~(1 << BFQG_stats_##name);			\
++}									\
++static int bfqg_stats_##name(struct bfqg_stats *stats)		\
++{									\
++	return (stats->flags & (1 << BFQG_stats_##name)) != 0;		\
++}									\
++
++BFQG_FLAG_FNS(waiting)
++BFQG_FLAG_FNS(idling)
++BFQG_FLAG_FNS(empty)
++#undef BFQG_FLAG_FNS
++
++/* This should be called with the queue_lock held. */
++static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
++{
++	unsigned long long now;
++
++	if (!bfqg_stats_waiting(stats))
++		return;
++
++	now = sched_clock();
++	if (time_after64(now, stats->start_group_wait_time))
++		blkg_stat_add(&stats->group_wait_time,
++			      now - stats->start_group_wait_time);
++	bfqg_stats_clear_waiting(stats);
++}
++
++/* This should be called with the queue_lock held. */
++static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
++						 struct bfq_group *curr_bfqg)
++{
++	struct bfqg_stats *stats = &bfqg->stats;
++
++	if (bfqg_stats_waiting(stats))
++		return;
++	if (bfqg == curr_bfqg)
++		return;
++	stats->start_group_wait_time = sched_clock();
++	bfqg_stats_mark_waiting(stats);
++}
++
++/* This should be called with the queue_lock held. */
++static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
++{
++	unsigned long long now;
++
++	if (!bfqg_stats_empty(stats))
++		return;
++
++	now = sched_clock();
++	if (time_after64(now, stats->start_empty_time))
++		blkg_stat_add(&stats->empty_time,
++			      now - stats->start_empty_time);
++	bfqg_stats_clear_empty(stats);
++}
++
++static void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
++{
++	blkg_stat_add(&bfqg->stats.dequeue, 1);
++}
++
++static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
++{
++	struct bfqg_stats *stats = &bfqg->stats;
++
++	if (blkg_rwstat_total(&stats->queued))
++		return;
++
++	/*
++	 * group is already marked empty. This can happen if bfqq got new
++	 * request in parent group and moved to this group while being added
++	 * to service tree. Just ignore the event and move on.
++	 */
++	if (bfqg_stats_empty(stats))
++		return;
++
++	stats->start_empty_time = sched_clock();
++	bfqg_stats_mark_empty(stats);
++}
++
++static void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
++{
++	struct bfqg_stats *stats = &bfqg->stats;
++
++	if (bfqg_stats_idling(stats)) {
++		unsigned long long now = sched_clock();
++
++		if (time_after64(now, stats->start_idle_time))
++			blkg_stat_add(&stats->idle_time,
++				      now - stats->start_idle_time);
++		bfqg_stats_clear_idling(stats);
++	}
++}
++
++static void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
++{
++	struct bfqg_stats *stats = &bfqg->stats;
++
++	stats->start_idle_time = sched_clock();
++	bfqg_stats_mark_idling(stats);
++}
++
++static void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
++{
++	struct bfqg_stats *stats = &bfqg->stats;
++
++	blkg_stat_add(&stats->avg_queue_size_sum,
++		      blkg_rwstat_total(&stats->queued));
++	blkg_stat_add(&stats->avg_queue_size_samples, 1);
++	bfqg_stats_update_group_wait_time(stats);
++}
++
++static struct blkcg_policy blkcg_policy_bfq;
++
++/*
++ * blk-cgroup policy-related handlers
++ * The following functions help in converting between blk-cgroup
++ * internal structures and BFQ-specific structures.
++ */
++
++static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
++{
++	return pd ? container_of(pd, struct bfq_group, pd) : NULL;
++}
++
++static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
++{
++	return pd_to_blkg(&bfqg->pd);
++}
++
++static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
++{
++	struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq);
++	BUG_ON(!pd);
++	return pd_to_bfqg(pd);
++}
++
++/*
++ * bfq_group handlers
++ * The following functions help in navigating the bfq_group hierarchy
++ * by allowing to find the parent of a bfq_group or the bfq_group
++ * associated to a bfq_queue.
++ */
++
++static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
++{
++	struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
++
++	return pblkg ? blkg_to_bfqg(pblkg) : NULL;
++}
++
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *group_entity = bfqq->entity.parent;
++
++	return group_entity ? container_of(group_entity, struct bfq_group,
++					   entity) :
++			      bfqq->bfqd->root_group;
++}
++
++/*
++ * The following two functions handle get and put of a bfq_group by
++ * wrapping the related blk-cgroup hooks.
++ */
++
++static void bfqg_get(struct bfq_group *bfqg)
++{
++	return blkg_get(bfqg_to_blkg(bfqg));
++}
++
++static void bfqg_put(struct bfq_group *bfqg)
++{
++	return blkg_put(bfqg_to_blkg(bfqg));
++}
++
++static void bfqg_stats_update_io_add(struct bfq_group *bfqg,
++				     struct bfq_queue *bfqq,
++				     int rw)
++{
++	blkg_rwstat_add(&bfqg->stats.queued, rw, 1);
++	bfqg_stats_end_empty_time(&bfqg->stats);
++	if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
++		bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
++}
++
++static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw)
++{
++	blkg_rwstat_add(&bfqg->stats.queued, rw, -1);
++}
++
++static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw)
++{
++	blkg_rwstat_add(&bfqg->stats.merged, rw, 1);
++}
++
++static void bfqg_stats_update_dispatch(struct bfq_group *bfqg,
++					      uint64_t bytes, int rw)
++{
++	blkg_stat_add(&bfqg->stats.sectors, bytes >> 9);
++	blkg_rwstat_add(&bfqg->stats.serviced, rw, 1);
++	blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes);
++}
++
++static void bfqg_stats_update_completion(struct bfq_group *bfqg,
++			uint64_t start_time, uint64_t io_start_time, int rw)
++{
++	struct bfqg_stats *stats = &bfqg->stats;
++	unsigned long long now = sched_clock();
++
++	if (time_after64(now, io_start_time))
++		blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
++	if (time_after64(io_start_time, start_time))
++		blkg_rwstat_add(&stats->wait_time, rw,
++				io_start_time - start_time);
++}
++
++/* @stats = 0 */
++static void bfqg_stats_reset(struct bfqg_stats *stats)
++{
++	if (!stats)
++		return;
++
++	/* queued stats shouldn't be cleared */
++	blkg_rwstat_reset(&stats->service_bytes);
++	blkg_rwstat_reset(&stats->serviced);
++	blkg_rwstat_reset(&stats->merged);
++	blkg_rwstat_reset(&stats->service_time);
++	blkg_rwstat_reset(&stats->wait_time);
++	blkg_stat_reset(&stats->time);
++	blkg_stat_reset(&stats->unaccounted_time);
++	blkg_stat_reset(&stats->avg_queue_size_sum);
++	blkg_stat_reset(&stats->avg_queue_size_samples);
++	blkg_stat_reset(&stats->dequeue);
++	blkg_stat_reset(&stats->group_wait_time);
++	blkg_stat_reset(&stats->idle_time);
++	blkg_stat_reset(&stats->empty_time);
++}
++
++/* @to += @from */
++static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from)
++{
++	if (!to || !from)
++		return;
++
++	/* queued stats shouldn't be cleared */
++	blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes);
++	blkg_rwstat_add_aux(&to->serviced, &from->serviced);
++	blkg_rwstat_add_aux(&to->merged, &from->merged);
++	blkg_rwstat_add_aux(&to->service_time, &from->service_time);
++	blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
++	blkg_stat_add_aux(&from->time, &from->time);
++	blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
++	blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
++	blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
++	blkg_stat_add_aux(&to->dequeue, &from->dequeue);
++	blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
++	blkg_stat_add_aux(&to->idle_time, &from->idle_time);
++	blkg_stat_add_aux(&to->empty_time, &from->empty_time);
++}
++
++/*
++ * Transfer @bfqg's stats to its parent's dead_stats so that the ancestors'
++ * recursive stats can still account for the amount used by this bfqg after
++ * it's gone.
++ */
++static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
++{
++	struct bfq_group *parent;
++
++	if (!bfqg) /* root_group */
++		return;
++
++	parent = bfqg_parent(bfqg);
++
++	lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock);
++
++	if (unlikely(!parent))
++		return;
++
++	bfqg_stats_merge(&parent->dead_stats, &bfqg->stats);
++	bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats);
++	bfqg_stats_reset(&bfqg->stats);
++	bfqg_stats_reset(&bfqg->dead_stats);
++}
++
++static void bfq_init_entity(struct bfq_entity *entity,
++			    struct bfq_group *bfqg)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	entity->weight = entity->new_weight;
++	entity->orig_weight = entity->new_weight;
++	if (bfqq) {
++		bfqq->ioprio = bfqq->new_ioprio;
++		bfqq->ioprio_class = bfqq->new_ioprio_class;
++		bfqg_get(bfqg);
++	}
++	entity->parent = bfqg->my_entity;
++	entity->sched_data = &bfqg->sched_data;
++}
++
++static void bfqg_stats_exit(struct bfqg_stats *stats)
++{
++	blkg_rwstat_exit(&stats->service_bytes);
++	blkg_rwstat_exit(&stats->serviced);
++	blkg_rwstat_exit(&stats->merged);
++	blkg_rwstat_exit(&stats->service_time);
++	blkg_rwstat_exit(&stats->wait_time);
++	blkg_rwstat_exit(&stats->queued);
++	blkg_stat_exit(&stats->sectors);
++	blkg_stat_exit(&stats->time);
++	blkg_stat_exit(&stats->unaccounted_time);
++	blkg_stat_exit(&stats->avg_queue_size_sum);
++	blkg_stat_exit(&stats->avg_queue_size_samples);
++	blkg_stat_exit(&stats->dequeue);
++	blkg_stat_exit(&stats->group_wait_time);
++	blkg_stat_exit(&stats->idle_time);
++	blkg_stat_exit(&stats->empty_time);
++}
++
++static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
++{
++	if (blkg_rwstat_init(&stats->service_bytes, gfp) ||
++	    blkg_rwstat_init(&stats->serviced, gfp) ||
++	    blkg_rwstat_init(&stats->merged, gfp) ||
++	    blkg_rwstat_init(&stats->service_time, gfp) ||
++	    blkg_rwstat_init(&stats->wait_time, gfp) ||
++	    blkg_rwstat_init(&stats->queued, gfp) ||
++	    blkg_stat_init(&stats->sectors, gfp) ||
++	    blkg_stat_init(&stats->time, gfp) ||
++	    blkg_stat_init(&stats->unaccounted_time, gfp) ||
++	    blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
++	    blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
++	    blkg_stat_init(&stats->dequeue, gfp) ||
++	    blkg_stat_init(&stats->group_wait_time, gfp) ||
++	    blkg_stat_init(&stats->idle_time, gfp) ||
++	    blkg_stat_init(&stats->empty_time, gfp)) {
++		bfqg_stats_exit(stats);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
++ {
++	return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
++ }
++
++static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
++{
++	return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
++}
++
++static void bfq_cpd_init(struct blkcg_policy_data *cpd)
++{
++	struct bfq_group_data *d = cpd_to_bfqgd(cpd);
++
++	d->weight = BFQ_DEFAULT_GRP_WEIGHT;
++}
++
++static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
++{
++	struct bfq_group *bfqg;
++
++	bfqg = kzalloc_node(sizeof(*bfqg), gfp, node);
++	if (!bfqg)
++		return NULL;
++
++	if (bfqg_stats_init(&bfqg->stats, gfp) ||
++	    bfqg_stats_init(&bfqg->dead_stats, gfp)) {
++		kfree(bfqg);
++		return NULL;
++	}
++
++	return &bfqg->pd;
++}
++
++static void bfq_group_set_parent(struct bfq_group *bfqg,
++					struct bfq_group *parent)
++{
++	struct bfq_entity *entity;
++
++	BUG_ON(!parent);
++	BUG_ON(!bfqg);
++	BUG_ON(bfqg == parent);
++
++	entity = &bfqg->entity;
++	entity->parent = parent->my_entity;
++	entity->sched_data = &parent->sched_data;
++}
++
++static void bfq_pd_init(struct blkg_policy_data *pd)
++{
++	struct blkcg_gq *blkg = pd_to_blkg(pd);
++	struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++	struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
++	struct bfq_entity *entity = &bfqg->entity;
++	struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
++
++	entity->orig_weight = entity->weight = entity->new_weight = d->weight;
++	entity->my_sched_data = &bfqg->sched_data;
++	bfqg->my_entity = entity; /*
++				   * the root_group's will be set to NULL
++				   * in bfq_init_queue()
++				   */
++	bfqg->bfqd = bfqd;
++	bfqg->active_entities = 0;
++}
++
++static void bfq_pd_free(struct blkg_policy_data *pd)
++{
++	struct bfq_group *bfqg = pd_to_bfqg(pd);
++
++	bfqg_stats_exit(&bfqg->stats);
++	bfqg_stats_exit(&bfqg->dead_stats);
++
++	return kfree(bfqg);
++}
++
++/* offset delta from bfqg->stats to bfqg->dead_stats */
++static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) -
++					offsetof(struct bfq_group, stats);
++
++/* to be used by recursive prfill, sums live and dead stats recursively */
++static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
++{
++	u64 sum = 0;
++
++	sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
++	sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
++				       off + dead_stats_off_delta);
++	return sum;
++}
++
++/* to be used by recursive prfill, sums live and dead rwstats recursively */
++static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
++						       int off)
++{
++	struct blkg_rwstat a, b;
++
++	a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
++	b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
++				      off + dead_stats_off_delta);
++	blkg_rwstat_add_aux(&a, &b);
++	return a;
++}
++
++static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
++{
++	struct bfq_group *bfqg = pd_to_bfqg(pd);
++
++	bfqg_stats_reset(&bfqg->stats);
++	bfqg_stats_reset(&bfqg->dead_stats);
++}
++
++static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
++					      struct blkcg *blkcg)
++{
++	struct request_queue *q = bfqd->queue;
++	struct bfq_group *bfqg = NULL, *parent;
++	struct bfq_entity *entity = NULL;
++
++	assert_spin_locked(bfqd->queue->queue_lock);
++
++	/* avoid lookup for the common case where there's no blkcg */
++	if (blkcg == &blkcg_root) {
++		bfqg = bfqd->root_group;
++	} else {
++		struct blkcg_gq *blkg;
++
++		blkg = blkg_lookup_create(blkcg, q);
++		if (!IS_ERR(blkg))
++			bfqg = blkg_to_bfqg(blkg);
++		else /* fallback to root_group */
++			bfqg = bfqd->root_group;
++	}
++
++	BUG_ON(!bfqg);
++
++	/*
++	 * Update chain of bfq_groups as we might be handling a leaf group
++	 * which, along with some of its relatives, has not been hooked yet
++	 * to the private hierarchy of BFQ.
++	 */
++	entity = &bfqg->entity;
++	for_each_entity(entity) {
++		bfqg = container_of(entity, struct bfq_group, entity);
++		BUG_ON(!bfqg);
++		if (bfqg != bfqd->root_group) {
++			parent = bfqg_parent(bfqg);
++			if (!parent)
++				parent = bfqd->root_group;
++			BUG_ON(!parent);
++			bfq_group_set_parent(bfqg, parent);
++		}
++	}
++
++	return bfqg;
++}
++
++/**
++ * bfq_bfqq_move - migrate @bfqq to @bfqg.
++ * @bfqd: queue descriptor.
++ * @bfqq: the queue to move.
++ * @entity: @bfqq's entity.
++ * @bfqg: the group to move to.
++ *
++ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
++ * it on the new one.  Avoid putting the entity on the old group idle tree.
++ *
++ * Must be called under the queue lock; the cgroup owning @bfqg must
++ * not disappear (by now this just means that we are called under
++ * rcu_read_lock()).
++ */
++static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			  struct bfq_entity *entity, struct bfq_group *bfqg)
++{
++	int busy, resume;
++
++	busy = bfq_bfqq_busy(bfqq);
++	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
++
++	BUG_ON(resume && !entity->on_st);
++	BUG_ON(busy && !resume && entity->on_st &&
++	       bfqq != bfqd->in_service_queue);
++
++	if (busy) {
++		BUG_ON(atomic_read(&bfqq->ref) < 2);
++
++		if (!resume)
++			bfq_del_bfqq_busy(bfqd, bfqq, 0);
++		else
++			bfq_deactivate_bfqq(bfqd, bfqq, 0);
++	} else if (entity->on_st)
++		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
++	bfqg_put(bfqq_group(bfqq));
++
++	/*
++	 * Here we use a reference to bfqg.  We don't need a refcounter
++	 * as the cgroup reference will not be dropped, so that its
++	 * destroy() callback will not be invoked.
++	 */
++	entity->parent = bfqg->my_entity;
++	entity->sched_data = &bfqg->sched_data;
++	bfqg_get(bfqg);
++
++	if (busy) {
++		if (resume)
++			bfq_activate_bfqq(bfqd, bfqq);
++	}
++
++	if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
++		bfq_schedule_dispatch(bfqd);
++}
++
++/**
++ * __bfq_bic_change_cgroup - move @bic to @cgroup.
++ * @bfqd: the queue descriptor.
++ * @bic: the bic to move.
++ * @blkcg: the blk-cgroup to move to.
++ *
++ * Move bic to blkcg, assuming that bfqd->queue is locked; the caller
++ * has to make sure that the reference to cgroup is valid across the call.
++ *
++ * NOTE: an alternative approach might have been to store the current
++ * cgroup in bfqq and getting a reference to it, reducing the lookup
++ * time here, at the price of slightly more complex code.
++ */
++static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
++						struct bfq_io_cq *bic,
++						struct blkcg *blkcg)
++{
++	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
++	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
++	struct bfq_group *bfqg;
++	struct bfq_entity *entity;
++
++	lockdep_assert_held(bfqd->queue->queue_lock);
++
++	bfqg = bfq_find_alloc_group(bfqd, blkcg);
++	if (async_bfqq) {
++		entity = &async_bfqq->entity;
++
++		if (entity->sched_data != &bfqg->sched_data) {
++			bic_set_bfqq(bic, NULL, 0);
++			bfq_log_bfqq(bfqd, async_bfqq,
++				     "bic_change_group: %p %d",
++				     async_bfqq, atomic_read(&async_bfqq->ref));
++			bfq_put_queue(async_bfqq);
++		}
++	}
++
++	if (sync_bfqq) {
++		entity = &sync_bfqq->entity;
++		if (entity->sched_data != &bfqg->sched_data)
++			bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
++	}
++
++	return bfqg;
++}
++
++static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
++{
++	struct bfq_data *bfqd = bic_to_bfqd(bic);
++	struct blkcg *blkcg;
++	struct bfq_group *bfqg = NULL;
++	uint64_t id;
++
++	rcu_read_lock();
++	blkcg = bio_blkcg(bio);
++	id = blkcg->css.serial_nr;
++	rcu_read_unlock();
++
++	/*
++	 * Check whether blkcg has changed.  The condition may trigger
++	 * spuriously on a newly created cic but there's no harm.
++	 */
++	if (unlikely(!bfqd) || likely(bic->blkcg_id == id))
++		return;
++
++	bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg);
++	BUG_ON(!bfqg);
++	bic->blkcg_id = id;
++}
++
++/**
++ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
++ * @st: the service tree being flushed.
++ */
++static void bfq_flush_idle_tree(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entity = st->first_idle;
++
++	for (; entity ; entity = st->first_idle)
++		__bfq_deactivate_entity(entity, 0);
++}
++
++/**
++ * bfq_reparent_leaf_entity - move leaf entity to the root_group.
++ * @bfqd: the device data structure with the root group.
++ * @entity: the entity to move.
++ */
++static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
++				     struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	BUG_ON(!bfqq);
++	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
++	return;
++}
++
++/**
++ * bfq_reparent_active_entities - move to the root group all active
++ *                                entities.
++ * @bfqd: the device data structure with the root group.
++ * @bfqg: the group to move from.
++ * @st: the service tree with the entities.
++ *
++ * Needs queue_lock to be taken and reference to be valid over the call.
++ */
++static void bfq_reparent_active_entities(struct bfq_data *bfqd,
++					 struct bfq_group *bfqg,
++					 struct bfq_service_tree *st)
++{
++	struct rb_root *active = &st->active;
++	struct bfq_entity *entity = NULL;
++
++	if (!RB_EMPTY_ROOT(&st->active))
++		entity = bfq_entity_of(rb_first(active));
++
++	for (; entity ; entity = bfq_entity_of(rb_first(active)))
++		bfq_reparent_leaf_entity(bfqd, entity);
++
++	if (bfqg->sched_data.in_service_entity)
++		bfq_reparent_leaf_entity(bfqd,
++			bfqg->sched_data.in_service_entity);
++
++	return;
++}
++
++/**
++ * bfq_destroy_group - destroy @bfqg.
++ * @bfqg: the group being destroyed.
++ *
++ * Destroy @bfqg, making sure that it is not referenced from its parent.
++ * blkio already grabs the queue_lock for us, so no need to use RCU-based magic
++ */
++static void bfq_pd_offline(struct blkg_policy_data *pd)
++{
++	struct bfq_service_tree *st;
++	struct bfq_group *bfqg;
++	struct bfq_data *bfqd;
++	struct bfq_entity *entity;
++	int i;
++
++	BUG_ON(!pd);
++	bfqg = pd_to_bfqg(pd);
++	BUG_ON(!bfqg);
++	bfqd = bfqg->bfqd;
++	BUG_ON(bfqd && !bfqd->root_group);
++
++	entity = bfqg->my_entity;
++
++	if (!entity) /* root group */
++		return;
++
++	/*
++	 * Empty all service_trees belonging to this group before
++	 * deactivating the group itself.
++	 */
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
++		BUG_ON(!bfqg->sched_data.service_tree);
++		st = bfqg->sched_data.service_tree + i;
++		/*
++		 * The idle tree may still contain bfq_queues belonging
++		 * to exited task because they never migrated to a different
++		 * cgroup from the one being destroyed now.  No one else
++		 * can access them so it's safe to act without any lock.
++		 */
++		bfq_flush_idle_tree(st);
++
++		/*
++		 * It may happen that some queues are still active
++		 * (busy) upon group destruction (if the corresponding
++		 * processes have been forced to terminate). We move
++		 * all the leaf entities corresponding to these queues
++		 * to the root_group.
++		 * Also, it may happen that the group has an entity
++		 * in service, which is disconnected from the active
++		 * tree: it must be moved, too.
++		 * There is no need to put the sync queues, as the
++		 * scheduler has taken no reference.
++		 */
++		bfq_reparent_active_entities(bfqd, bfqg, st);
++		BUG_ON(!RB_EMPTY_ROOT(&st->active));
++		BUG_ON(!RB_EMPTY_ROOT(&st->idle));
++	}
++	BUG_ON(bfqg->sched_data.next_in_service);
++	BUG_ON(bfqg->sched_data.in_service_entity);
++
++	__bfq_deactivate_entity(entity, 0);
++	bfq_put_async_queues(bfqd, bfqg);
++	BUG_ON(entity->tree);
++
++	bfqg_stats_xfer_dead(bfqg);
++}
++
++static void bfq_end_wr_async(struct bfq_data *bfqd)
++{
++	struct blkcg_gq *blkg;
++
++	list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
++		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++
++		bfq_end_wr_async_queues(bfqd, bfqg);
++	}
++	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
++}
++
++static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css,
++				       struct cftype *cftype)
++{
++	struct blkcg *blkcg = css_to_blkcg(css);
++	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
++	int ret = -EINVAL;
++
++	spin_lock_irq(&blkcg->lock);
++	ret = bfqgd->weight;
++	spin_unlock_irq(&blkcg->lock);
++
++	return ret;
++}
++
++static int bfqio_cgroup_weight_read_dfl(struct seq_file *sf, void *v)
++{
++	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
++	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
++
++	spin_lock_irq(&blkcg->lock);
++	seq_printf(sf, "%u\n", bfqgd->weight);
++	spin_unlock_irq(&blkcg->lock);
++
++	return 0;
++}
++
++static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
++					struct cftype *cftype,
++					u64 val)
++{
++	struct blkcg *blkcg = css_to_blkcg(css);
++	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
++	struct blkcg_gq *blkg;
++	int ret = -EINVAL;
++
++	if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
++		return ret;
++
++	ret = 0;
++	spin_lock_irq(&blkcg->lock);
++	bfqgd->weight = (unsigned short)val;
++	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
++		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++		if (!bfqg)
++			continue;
++		/*
++		 * Setting the prio_changed flag of the entity
++		 * to 1 with new_weight == weight would re-set
++		 * the value of the weight to its ioprio mapping.
++		 * Set the flag only if necessary.
++		 */
++		if ((unsigned short)val != bfqg->entity.new_weight) {
++			bfqg->entity.new_weight = (unsigned short)val;
++			/*
++			 * Make sure that the above new value has been
++			 * stored in bfqg->entity.new_weight before
++			 * setting the prio_changed flag. In fact,
++			 * this flag may be read asynchronously (in
++			 * critical sections protected by a different
++			 * lock than that held here), and finding this
++			 * flag set may cause the execution of the code
++			 * for updating parameters whose value may
++			 * depend also on bfqg->entity.new_weight (in
++			 * __bfq_entity_update_weight_prio).
++			 * This barrier makes sure that the new value
++			 * of bfqg->entity.new_weight is correctly
++			 * seen in that code.
++			 */
++			smp_wmb();
++			bfqg->entity.prio_changed = 1;
++		}
++	}
++	spin_unlock_irq(&blkcg->lock);
++
++	return ret;
++}
++
++static ssize_t bfqio_cgroup_weight_write_dfl(struct kernfs_open_file *of,
++					     char *buf, size_t nbytes,
++					     loff_t off)
++{
++	/* First unsigned long found in the file is used */
++	return bfqio_cgroup_weight_write(of_css(of), NULL,
++					 simple_strtoull(strim(buf), NULL, 0));
++}
++
++static int bfqg_print_stat(struct seq_file *sf, void *v)
++{
++	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
++			  &blkcg_policy_bfq, seq_cft(sf)->private, false);
++	return 0;
++}
++
++static int bfqg_print_rwstat(struct seq_file *sf, void *v)
++{
++	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
++			  &blkcg_policy_bfq, seq_cft(sf)->private, true);
++	return 0;
++}
++
++static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
++				      struct blkg_policy_data *pd, int off)
++{
++	u64 sum = bfqg_stat_pd_recursive_sum(pd, off);
++
++	return __blkg_prfill_u64(sf, pd, sum);
++}
++
++static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
++					struct blkg_policy_data *pd, int off)
++{
++	struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off);
++
++	return __blkg_prfill_rwstat(sf, pd, &sum);
++}
++
++static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
++{
++	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++			  bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
++			  seq_cft(sf)->private, false);
++	return 0;
++}
++
++static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
++{
++	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++			  bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
++			  seq_cft(sf)->private, true);
++	return 0;
++}
++
++static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
++				      struct blkg_policy_data *pd, int off)
++{
++	struct bfq_group *bfqg = pd_to_bfqg(pd);
++	u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples);
++	u64 v = 0;
++
++	if (samples) {
++		v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum);
++		v = div64_u64(v, samples);
++	}
++	__blkg_prfill_u64(sf, pd, v);
++	return 0;
++}
++
++/* print avg_queue_size */
++static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
++{
++	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++			  bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
++			  0, false);
++	return 0;
++}
++
++static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
++{
++	int ret;
++
++	ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
++	if (ret)
++		return NULL;
++
++        return blkg_to_bfqg(bfqd->queue->root_blkg);
++}
++
++static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
++{
++        struct bfq_group_data *bgd;
++
++        bgd = kzalloc(sizeof(*bgd), GFP_KERNEL);
++        if (!bgd)
++                return NULL;
++        return &bgd->pd;
++}
++
++static void bfq_cpd_free(struct blkcg_policy_data *cpd)
++{
++        kfree(cpd_to_bfqgd(cpd));
++}
++
++static struct cftype bfqio_files_dfl[] = {
++	{
++		.name = "weight",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = bfqio_cgroup_weight_read_dfl,
++		.write = bfqio_cgroup_weight_write_dfl,
++	},
++	{} /* terminate */
++};
++
++static struct cftype bfqio_files[] = {
++	{
++		.name = "bfq.weight",
++		.read_u64 = bfqio_cgroup_weight_read,
++		.write_u64 = bfqio_cgroup_weight_write,
++	},
++	/* statistics, cover only the tasks in the bfqg */
++	{
++		.name = "bfq.time",
++		.private = offsetof(struct bfq_group, stats.time),
++		.seq_show = bfqg_print_stat,
++	},
++	{
++		.name = "bfq.sectors",
++		.private = offsetof(struct bfq_group, stats.sectors),
++		.seq_show = bfqg_print_stat,
++	},
++	{
++		.name = "bfq.io_service_bytes",
++		.private = offsetof(struct bfq_group, stats.service_bytes),
++		.seq_show = bfqg_print_rwstat,
++	},
++	{
++		.name = "bfq.io_serviced",
++		.private = offsetof(struct bfq_group, stats.serviced),
++		.seq_show = bfqg_print_rwstat,
++	},
++	{
++		.name = "bfq.io_service_time",
++		.private = offsetof(struct bfq_group, stats.service_time),
++		.seq_show = bfqg_print_rwstat,
++	},
++	{
++		.name = "bfq.io_wait_time",
++		.private = offsetof(struct bfq_group, stats.wait_time),
++		.seq_show = bfqg_print_rwstat,
++	},
++	{
++		.name = "bfq.io_merged",
++		.private = offsetof(struct bfq_group, stats.merged),
++		.seq_show = bfqg_print_rwstat,
++	},
++	{
++		.name = "bfq.io_queued",
++		.private = offsetof(struct bfq_group, stats.queued),
++		.seq_show = bfqg_print_rwstat,
++	},
++
++	/* the same statictics which cover the bfqg and its descendants */
++	{
++		.name = "bfq.time_recursive",
++		.private = offsetof(struct bfq_group, stats.time),
++		.seq_show = bfqg_print_stat_recursive,
++	},
++	{
++		.name = "bfq.sectors_recursive",
++		.private = offsetof(struct bfq_group, stats.sectors),
++		.seq_show = bfqg_print_stat_recursive,
++	},
++	{
++		.name = "bfq.io_service_bytes_recursive",
++		.private = offsetof(struct bfq_group, stats.service_bytes),
++		.seq_show = bfqg_print_rwstat_recursive,
++	},
++	{
++		.name = "bfq.io_serviced_recursive",
++		.private = offsetof(struct bfq_group, stats.serviced),
++		.seq_show = bfqg_print_rwstat_recursive,
++	},
++	{
++		.name = "bfq.io_service_time_recursive",
++		.private = offsetof(struct bfq_group, stats.service_time),
++		.seq_show = bfqg_print_rwstat_recursive,
++	},
++	{
++		.name = "bfq.io_wait_time_recursive",
++		.private = offsetof(struct bfq_group, stats.wait_time),
++		.seq_show = bfqg_print_rwstat_recursive,
++	},
++	{
++		.name = "bfq.io_merged_recursive",
++		.private = offsetof(struct bfq_group, stats.merged),
++		.seq_show = bfqg_print_rwstat_recursive,
++	},
++	{
++		.name = "bfq.io_queued_recursive",
++		.private = offsetof(struct bfq_group, stats.queued),
++		.seq_show = bfqg_print_rwstat_recursive,
++	},
++	{
++		.name = "bfq.avg_queue_size",
++		.seq_show = bfqg_print_avg_queue_size,
++	},
++	{
++		.name = "bfq.group_wait_time",
++		.private = offsetof(struct bfq_group, stats.group_wait_time),
++		.seq_show = bfqg_print_stat,
++	},
++	{
++		.name = "bfq.idle_time",
++		.private = offsetof(struct bfq_group, stats.idle_time),
++		.seq_show = bfqg_print_stat,
++	},
++	{
++		.name = "bfq.empty_time",
++		.private = offsetof(struct bfq_group, stats.empty_time),
++		.seq_show = bfqg_print_stat,
++	},
++	{
++		.name = "bfq.dequeue",
++		.private = offsetof(struct bfq_group, stats.dequeue),
++		.seq_show = bfqg_print_stat,
++	},
++	{
++		.name = "bfq.unaccounted_time",
++		.private = offsetof(struct bfq_group, stats.unaccounted_time),
++		.seq_show = bfqg_print_stat,
++	},
++	{ }	/* terminate */
++};
++
++static struct blkcg_policy blkcg_policy_bfq = {
++       .dfl_cftypes            = bfqio_files_dfl,
++       .legacy_cftypes         = bfqio_files,
++
++       .pd_alloc_fn            = bfq_pd_alloc,
++       .pd_init_fn             = bfq_pd_init,
++       .pd_offline_fn          = bfq_pd_offline,
++       .pd_free_fn             = bfq_pd_free,
++       .pd_reset_stats_fn      = bfq_pd_reset_stats,
++
++       .cpd_alloc_fn           = bfq_cpd_alloc,
++       .cpd_init_fn            = bfq_cpd_init,
++       .cpd_bind_fn	       = bfq_cpd_init,
++       .cpd_free_fn            = bfq_cpd_free,
++
++};
++
++#else
++
++static void bfq_init_entity(struct bfq_entity *entity,
++			    struct bfq_group *bfqg)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	entity->weight = entity->new_weight;
++	entity->orig_weight = entity->new_weight;
++	if (bfqq) {
++		bfqq->ioprio = bfqq->new_ioprio;
++		bfqq->ioprio_class = bfqq->new_ioprio_class;
++	}
++	entity->sched_data = &bfqg->sched_data;
++}
++
++static struct bfq_group *
++bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
++{
++	struct bfq_data *bfqd = bic_to_bfqd(bic);
++	return bfqd->root_group;
++}
++
++static void bfq_bfqq_move(struct bfq_data *bfqd,
++			  struct bfq_queue *bfqq,
++			  struct bfq_entity *entity,
++			  struct bfq_group *bfqg)
++{
++}
++
++static void bfq_end_wr_async(struct bfq_data *bfqd)
++{
++	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
++}
++
++static void bfq_disconnect_groups(struct bfq_data *bfqd)
++{
++	bfq_put_async_queues(bfqd, bfqd->root_group);
++}
++
++static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
++                                              struct blkcg *blkcg)
++{
++	return bfqd->root_group;
++}
++
++static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
++{
++	struct bfq_group *bfqg;
++	int i;
++
++	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
++	if (!bfqg)
++		return NULL;
++
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
++		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++
++	return bfqg;
++}
++#endif
+diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c
+new file mode 100644
+index 0000000..fb7bb8f
+--- /dev/null
++++ b/block/bfq-ioc.c
+@@ -0,0 +1,36 @@
++/*
++ * BFQ: I/O context handling.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ */
++
++/**
++ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.
++ * @icq: the iocontext queue.
++ */
++static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
++{
++	/* bic->icq is the first member, %NULL will convert to %NULL */
++	return container_of(icq, struct bfq_io_cq, icq);
++}
++
++/**
++ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
++ * @bfqd: the lookup key.
++ * @ioc: the io_context of the process doing I/O.
++ *
++ * Queue lock must be held.
++ */
++static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
++					struct io_context *ioc)
++{
++	if (ioc)
++		return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));
++	return NULL;
++}
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+new file mode 100644
+index 0000000..f9787a6
+--- /dev/null
++++ b/block/bfq-iosched.c
+@@ -0,0 +1,3754 @@
++/*
++ * Budget Fair Queueing (BFQ) disk scheduler.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
++ * file.
++ *
++ * BFQ is a proportional-share storage-I/O scheduling algorithm based on
++ * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
++ * measured in number of sectors, to processes instead of time slices. The
++ * device is not granted to the in-service process for a given time slice,
++ * but until it has exhausted its assigned budget. This change from the time
++ * to the service domain allows BFQ to distribute the device throughput
++ * among processes as desired, without any distortion due to ZBR, workload
++ * fluctuations or other factors. BFQ uses an ad hoc internal scheduler,
++ * called B-WF2Q+, to schedule processes according to their budgets. More
++ * precisely, BFQ schedules queues associated to processes. Thanks to the
++ * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to
++ * I/O-bound processes issuing sequential requests (to boost the
++ * throughput), and yet guarantee a low latency to interactive and soft
++ * real-time applications.
++ *
++ * BFQ is described in [1], where also a reference to the initial, more
++ * theoretical paper on BFQ can be found. The interested reader can find
++ * in the latter paper full details on the main algorithm, as well as
++ * formulas of the guarantees and formal proofs of all the properties.
++ * With respect to the version of BFQ presented in these papers, this
++ * implementation adds a few more heuristics, such as the one that
++ * guarantees a low latency to soft real-time applications, and a
++ * hierarchical extension based on H-WF2Q+.
++ *
++ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
++ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
++ * complexity derives from the one introduced with EEVDF in [3].
++ *
++ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness
++ *     with the BFQ Disk I/O Scheduler'',
++ *     Proceedings of the 5th Annual International Systems and Storage
++ *     Conference (SYSTOR '12), June 2012.
++ *
++ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf
++ *
++ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
++ *     Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
++ *     Oct 1997.
++ *
++ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
++ *
++ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline
++ *     First: A Flexible and Accurate Mechanism for Proportional Share
++ *     Resource Allocation,'' technical report.
++ *
++ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
++ */
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/cgroup.h>
++#include <linux/elevator.h>
++#include <linux/jiffies.h>
++#include <linux/rbtree.h>
++#include <linux/ioprio.h>
++#include "bfq.h"
++#include "blk.h"
++
++/* Expiration time of sync (0) and async (1) requests, in jiffies. */
++static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
++
++/* Maximum backwards seek, in KiB. */
++static const int bfq_back_max = 16 * 1024;
++
++/* Penalty of a backwards seek, in number of sectors. */
++static const int bfq_back_penalty = 2;
++
++/* Idling period duration, in jiffies. */
++static int bfq_slice_idle = HZ / 125;
++
++/* Minimum number of assigned budgets for which stats are safe to compute. */
++static const int bfq_stats_min_budgets = 194;
++
++/* Default maximum budget values, in sectors and number of requests. */
++static const int bfq_default_max_budget = 16 * 1024;
++static const int bfq_max_budget_async_rq = 4;
++
++/*
++ * Async to sync throughput distribution is controlled as follows:
++ * when an async request is served, the entity is charged the number
++ * of sectors of the request, multiplied by the factor below
++ */
++static const int bfq_async_charge_factor = 10;
++
++/* Default timeout values, in jiffies, approximating CFQ defaults. */
++static const int bfq_timeout_sync = HZ / 8;
++static int bfq_timeout_async = HZ / 25;
++
++struct kmem_cache *bfq_pool;
++
++/* Below this threshold (in ms), we consider thinktime immediate. */
++#define BFQ_MIN_TT		2
++
++/* hw_tag detection: parallel requests threshold and min samples needed. */
++#define BFQ_HW_QUEUE_THRESHOLD	4
++#define BFQ_HW_QUEUE_SAMPLES	32
++
++#define BFQQ_SEEK_THR	 (sector_t)(8 * 1024)
++#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
++
++/* Min samples used for peak rate estimation (for autotuning). */
++#define BFQ_PEAK_RATE_SAMPLES	32
++
++/* Shift used for peak rate fixed precision calculations. */
++#define BFQ_RATE_SHIFT		16
++
++/*
++ * By default, BFQ computes the duration of the weight raising for
++ * interactive applications automatically, using the following formula:
++ * duration = (R / r) * T, where r is the peak rate of the device, and
++ * R and T are two reference parameters.
++ * In particular, R is the peak rate of the reference device (see below),
++ * and T is a reference time: given the systems that are likely to be
++ * installed on the reference device according to its speed class, T is
++ * about the maximum time needed, under BFQ and while reading two files in
++ * parallel, to load typical large applications on these systems.
++ * In practice, the slower/faster the device at hand is, the more/less it
++ * takes to load applications with respect to the reference device.
++ * Accordingly, the longer/shorter BFQ grants weight raising to interactive
++ * applications.
++ *
++ * BFQ uses four different reference pairs (R, T), depending on:
++ * . whether the device is rotational or non-rotational;
++ * . whether the device is slow, such as old or portable HDDs, as well as
++ *   SD cards, or fast, such as newer HDDs and SSDs.
++ *
++ * The device's speed class is dynamically (re)detected in
++ * bfq_update_peak_rate() every time the estimated peak rate is updated.
++ *
++ * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0]
++ * are the reference values for a slow/fast rotational device, whereas
++ * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for
++ * a slow/fast non-rotational device. Finally, device_speed_thresh are the
++ * thresholds used to switch between speed classes.
++ * Both the reference peak rates and the thresholds are measured in
++ * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
++ */
++static int R_slow[2] = {1536, 10752};
++static int R_fast[2] = {17415, 34791};
++/*
++ * To improve readability, a conversion function is used to initialize the
++ * following arrays, which entails that they can be initialized only in a
++ * function.
++ */
++static int T_slow[2];
++static int T_fast[2];
++static int device_speed_thresh[2];
++
++#define BFQ_SERVICE_TREE_INIT	((struct bfq_service_tree)		\
++				{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
++
++#define RQ_BIC(rq)		((struct bfq_io_cq *) (rq)->elv.priv[0])
++#define RQ_BFQQ(rq)		((rq)->elv.priv[1])
++
++static void bfq_schedule_dispatch(struct bfq_data *bfqd);
++
++#include "bfq-ioc.c"
++#include "bfq-sched.c"
++#include "bfq-cgroup.c"
++
++#define bfq_class_idle(bfqq)	((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
++#define bfq_class_rt(bfqq)	((bfqq)->ioprio_class == IOPRIO_CLASS_RT)
++
++#define bfq_sample_valid(samples)	((samples) > 80)
++
++/*
++ * We regard a request as SYNC, if either it's a read or has the SYNC bit
++ * set (in which case it could also be a direct WRITE).
++ */
++static int bfq_bio_sync(struct bio *bio)
++{
++	if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))
++		return 1;
++
++	return 0;
++}
++
++/*
++ * Scheduler run of queue, if there are requests pending and no one in the
++ * driver that will restart queueing.
++ */
++static void bfq_schedule_dispatch(struct bfq_data *bfqd)
++{
++	if (bfqd->queued != 0) {
++		bfq_log(bfqd, "schedule dispatch");
++		kblockd_schedule_work(&bfqd->unplug_work);
++	}
++}
++
++/*
++ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
++ * We choose the request that is closesr to the head right now.  Distance
++ * behind the head is penalized and only allowed to a certain extent.
++ */
++static struct request *bfq_choose_req(struct bfq_data *bfqd,
++				      struct request *rq1,
++				      struct request *rq2,
++				      sector_t last)
++{
++	sector_t s1, s2, d1 = 0, d2 = 0;
++	unsigned long back_max;
++#define BFQ_RQ1_WRAP	0x01 /* request 1 wraps */
++#define BFQ_RQ2_WRAP	0x02 /* request 2 wraps */
++	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
++
++	if (!rq1 || rq1 == rq2)
++		return rq2;
++	if (!rq2)
++		return rq1;
++
++	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
++		return rq1;
++	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
++		return rq2;
++	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
++		return rq1;
++	else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
++		return rq2;
++
++	s1 = blk_rq_pos(rq1);
++	s2 = blk_rq_pos(rq2);
++
++	/*
++	 * By definition, 1KiB is 2 sectors.
++	 */
++	back_max = bfqd->bfq_back_max * 2;
++
++	/*
++	 * Strict one way elevator _except_ in the case where we allow
++	 * short backward seeks which are biased as twice the cost of a
++	 * similar forward seek.
++	 */
++	if (s1 >= last)
++		d1 = s1 - last;
++	else if (s1 + back_max >= last)
++		d1 = (last - s1) * bfqd->bfq_back_penalty;
++	else
++		wrap |= BFQ_RQ1_WRAP;
++
++	if (s2 >= last)
++		d2 = s2 - last;
++	else if (s2 + back_max >= last)
++		d2 = (last - s2) * bfqd->bfq_back_penalty;
++	else
++		wrap |= BFQ_RQ2_WRAP;
++
++	/* Found required data */
++
++	/*
++	 * By doing switch() on the bit mask "wrap" we avoid having to
++	 * check two variables for all permutations: --> faster!
++	 */
++	switch (wrap) {
++	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
++		if (d1 < d2)
++			return rq1;
++		else if (d2 < d1)
++			return rq2;
++		else {
++			if (s1 >= s2)
++				return rq1;
++			else
++				return rq2;
++		}
++
++	case BFQ_RQ2_WRAP:
++		return rq1;
++	case BFQ_RQ1_WRAP:
++		return rq2;
++	case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */
++	default:
++		/*
++		 * Since both rqs are wrapped,
++		 * start with the one that's further behind head
++		 * (--> only *one* back seek required),
++		 * since back seek takes more time than forward.
++		 */
++		if (s1 <= s2)
++			return rq1;
++		else
++			return rq2;
++	}
++}
++
++/*
++ * Tell whether there are active queues or groups with differentiated weights.
++ */
++static bool bfq_differentiated_weights(struct bfq_data *bfqd)
++{
++	/*
++	 * For weights to differ, at least one of the trees must contain
++	 * at least two nodes.
++	 */
++	return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
++		(bfqd->queue_weights_tree.rb_node->rb_left ||
++		 bfqd->queue_weights_tree.rb_node->rb_right)
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	       ) ||
++	       (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
++		(bfqd->group_weights_tree.rb_node->rb_left ||
++		 bfqd->group_weights_tree.rb_node->rb_right)
++#endif
++	       );
++}
++
++/*
++ * The following function returns true if every queue must receive the
++ * same share of the throughput (this condition is used when deciding
++ * whether idling may be disabled, see the comments in the function
++ * bfq_bfqq_may_idle()).
++ *
++ * Such a scenario occurs when:
++ * 1) all active queues have the same weight,
++ * 2) all active groups at the same level in the groups tree have the same
++ *    weight,
++ * 3) all active groups at the same level in the groups tree have the same
++ *    number of children.
++ *
++ * Unfortunately, keeping the necessary state for evaluating exactly the
++ * above symmetry conditions would be quite complex and time-consuming.
++ * Therefore this function evaluates, instead, the following stronger
++ * sub-conditions, for which it is much easier to maintain the needed
++ * state:
++ * 1) all active queues have the same weight,
++ * 2) all active groups have the same weight,
++ * 3) all active groups have at most one active child each.
++ * In particular, the last two conditions are always true if hierarchical
++ * support and the cgroups interface are not enabled, thus no state needs
++ * to be maintained in this case.
++ */
++static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
++{
++	return
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		!bfqd->active_numerous_groups &&
++#endif
++		!bfq_differentiated_weights(bfqd);
++}
++
++/*
++ * If the weight-counter tree passed as input contains no counter for
++ * the weight of the input entity, then add that counter; otherwise just
++ * increment the existing counter.
++ *
++ * Note that weight-counter trees contain few nodes in mostly symmetric
++ * scenarios. For example, if all queues have the same weight, then the
++ * weight-counter tree for the queues may contain at most one node.
++ * This holds even if low_latency is on, because weight-raised queues
++ * are not inserted in the tree.
++ * In most scenarios, the rate at which nodes are created/destroyed
++ * should be low too.
++ */
++static void bfq_weights_tree_add(struct bfq_data *bfqd,
++				 struct bfq_entity *entity,
++				 struct rb_root *root)
++{
++	struct rb_node **new = &(root->rb_node), *parent = NULL;
++
++	/*
++	 * Do not insert if the entity is already associated with a
++	 * counter, which happens if:
++	 *   1) the entity is associated with a queue,
++	 *   2) a request arrival has caused the queue to become both
++	 *      non-weight-raised, and hence change its weight, and
++	 *      backlogged; in this respect, each of the two events
++	 *      causes an invocation of this function,
++	 *   3) this is the invocation of this function caused by the
++	 *      second event. This second invocation is actually useless,
++	 *      and we handle this fact by exiting immediately. More
++	 *      efficient or clearer solutions might possibly be adopted.
++	 */
++	if (entity->weight_counter)
++		return;
++
++	while (*new) {
++		struct bfq_weight_counter *__counter = container_of(*new,
++						struct bfq_weight_counter,
++						weights_node);
++		parent = *new;
++
++		if (entity->weight == __counter->weight) {
++			entity->weight_counter = __counter;
++			goto inc_counter;
++		}
++		if (entity->weight < __counter->weight)
++			new = &((*new)->rb_left);
++		else
++			new = &((*new)->rb_right);
++	}
++
++	entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
++					 GFP_ATOMIC);
++	entity->weight_counter->weight = entity->weight;
++	rb_link_node(&entity->weight_counter->weights_node, parent, new);
++	rb_insert_color(&entity->weight_counter->weights_node, root);
++
++inc_counter:
++	entity->weight_counter->num_active++;
++}
++
++/*
++ * Decrement the weight counter associated with the entity, and, if the
++ * counter reaches 0, remove the counter from the tree.
++ * See the comments to the function bfq_weights_tree_add() for considerations
++ * about overhead.
++ */
++static void bfq_weights_tree_remove(struct bfq_data *bfqd,
++				    struct bfq_entity *entity,
++				    struct rb_root *root)
++{
++	if (!entity->weight_counter)
++		return;
++
++	BUG_ON(RB_EMPTY_ROOT(root));
++	BUG_ON(entity->weight_counter->weight != entity->weight);
++
++	BUG_ON(!entity->weight_counter->num_active);
++	entity->weight_counter->num_active--;
++	if (entity->weight_counter->num_active > 0)
++		goto reset_entity_pointer;
++
++	rb_erase(&entity->weight_counter->weights_node, root);
++	kfree(entity->weight_counter);
++
++reset_entity_pointer:
++	entity->weight_counter = NULL;
++}
++
++static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
++					struct bfq_queue *bfqq,
++					struct request *last)
++{
++	struct rb_node *rbnext = rb_next(&last->rb_node);
++	struct rb_node *rbprev = rb_prev(&last->rb_node);
++	struct request *next = NULL, *prev = NULL;
++
++	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
++
++	if (rbprev)
++		prev = rb_entry_rq(rbprev);
++
++	if (rbnext)
++		next = rb_entry_rq(rbnext);
++	else {
++		rbnext = rb_first(&bfqq->sort_list);
++		if (rbnext && rbnext != &last->rb_node)
++			next = rb_entry_rq(rbnext);
++	}
++
++	return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
++}
++
++/* see the definition of bfq_async_charge_factor for details */
++static unsigned long bfq_serv_to_charge(struct request *rq,
++					struct bfq_queue *bfqq)
++{
++	return blk_rq_sectors(rq) *
++		(1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) *
++		bfq_async_charge_factor));
++}
++
++/**
++ * bfq_updated_next_req - update the queue after a new next_rq selection.
++ * @bfqd: the device data the queue belongs to.
++ * @bfqq: the queue to update.
++ *
++ * If the first request of a queue changes we make sure that the queue
++ * has enough budget to serve at least its first request (if the
++ * request has grown).  We do this because if the queue has not enough
++ * budget for its first request, it has to go through two dispatch
++ * rounds to actually get it dispatched.
++ */
++static void bfq_updated_next_req(struct bfq_data *bfqd,
++				 struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++	struct request *next_rq = bfqq->next_rq;
++	unsigned long new_budget;
++
++	if (!next_rq)
++		return;
++
++	if (bfqq == bfqd->in_service_queue)
++		/*
++		 * In order not to break guarantees, budgets cannot be
++		 * changed after an entity has been selected.
++		 */
++		return;
++
++	BUG_ON(entity->tree != &st->active);
++	BUG_ON(entity == entity->sched_data->in_service_entity);
++
++	new_budget = max_t(unsigned long, bfqq->max_budget,
++			   bfq_serv_to_charge(next_rq, bfqq));
++	if (entity->budget != new_budget) {
++		entity->budget = new_budget;
++		bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu",
++					 new_budget);
++		bfq_activate_bfqq(bfqd, bfqq);
++	}
++}
++
++static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
++{
++	u64 dur;
++
++	if (bfqd->bfq_wr_max_time > 0)
++		return bfqd->bfq_wr_max_time;
++
++	dur = bfqd->RT_prod;
++	do_div(dur, bfqd->peak_rate);
++
++	return dur;
++}
++
++/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */
++static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	struct bfq_queue *item;
++	struct hlist_node *n;
++
++	hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
++		hlist_del_init(&item->burst_list_node);
++	hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
++	bfqd->burst_size = 1;
++}
++
++/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
++static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	/* Increment burst size to take into account also bfqq */
++	bfqd->burst_size++;
++
++	if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
++		struct bfq_queue *pos, *bfqq_item;
++		struct hlist_node *n;
++
++		/*
++		 * Enough queues have been activated shortly after each
++		 * other to consider this burst as large.
++		 */
++		bfqd->large_burst = true;
++
++		/*
++		 * We can now mark all queues in the burst list as
++		 * belonging to a large burst.
++		 */
++		hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
++				     burst_list_node)
++		        bfq_mark_bfqq_in_large_burst(bfqq_item);
++		bfq_mark_bfqq_in_large_burst(bfqq);
++
++		/*
++		 * From now on, and until the current burst finishes, any
++		 * new queue being activated shortly after the last queue
++		 * was inserted in the burst can be immediately marked as
++		 * belonging to a large burst. So the burst list is not
++		 * needed any more. Remove it.
++		 */
++		hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
++					  burst_list_node)
++			hlist_del_init(&pos->burst_list_node);
++	} else /* burst not yet large: add bfqq to the burst list */
++		hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
++}
++
++/*
++ * If many queues happen to become active shortly after each other, then,
++ * to help the processes associated to these queues get their job done as
++ * soon as possible, it is usually better to not grant either weight-raising
++ * or device idling to these queues. In this comment we describe, firstly,
++ * the reasons why this fact holds, and, secondly, the next function, which
++ * implements the main steps needed to properly mark these queues so that
++ * they can then be treated in a different way.
++ *
++ * As for the terminology, we say that a queue becomes active, i.e.,
++ * switches from idle to backlogged, either when it is created (as a
++ * consequence of the arrival of an I/O request), or, if already existing,
++ * when a new request for the queue arrives while the queue is idle.
++ * Bursts of activations, i.e., activations of different queues occurring
++ * shortly after each other, are typically caused by services or applications
++ * that spawn or reactivate many parallel threads/processes. Examples are
++ * systemd during boot or git grep.
++ *
++ * These services or applications benefit mostly from a high throughput:
++ * the quicker the requests of the activated queues are cumulatively served,
++ * the sooner the target job of these queues gets completed. As a consequence,
++ * weight-raising any of these queues, which also implies idling the device
++ * for it, is almost always counterproductive: in most cases it just lowers
++ * throughput.
++ *
++ * On the other hand, a burst of activations may be also caused by the start
++ * of an application that does not consist in a lot of parallel I/O-bound
++ * threads. In fact, with a complex application, the burst may be just a
++ * consequence of the fact that several processes need to be executed to
++ * start-up the application. To start an application as quickly as possible,
++ * the best thing to do is to privilege the I/O related to the application
++ * with respect to all other I/O. Therefore, the best strategy to start as
++ * quickly as possible an application that causes a burst of activations is
++ * to weight-raise all the queues activated during the burst. This is the
++ * exact opposite of the best strategy for the other type of bursts.
++ *
++ * In the end, to take the best action for each of the two cases, the two
++ * types of bursts need to be distinguished. Fortunately, this seems
++ * relatively easy to do, by looking at the sizes of the bursts. In
++ * particular, we found a threshold such that bursts with a larger size
++ * than that threshold are apparently caused only by services or commands
++ * such as systemd or git grep. For brevity, hereafter we call just 'large'
++ * these bursts. BFQ *does not* weight-raise queues whose activations occur
++ * in a large burst. In addition, for each of these queues BFQ performs or
++ * does not perform idling depending on which choice boosts the throughput
++ * most. The exact choice depends on the device and request pattern at
++ * hand.
++ *
++ * Turning back to the next function, it implements all the steps needed
++ * to detect the occurrence of a large burst and to properly mark all the
++ * queues belonging to it (so that they can then be treated in a different
++ * way). This goal is achieved by maintaining a special "burst list" that
++ * holds, temporarily, the queues that belong to the burst in progress. The
++ * list is then used to mark these queues as belonging to a large burst if
++ * the burst does become large. The main steps are the following.
++ *
++ * . when the very first queue is activated, the queue is inserted into the
++ *   list (as it could be the first queue in a possible burst)
++ *
++ * . if the current burst has not yet become large, and a queue Q that does
++ *   not yet belong to the burst is activated shortly after the last time
++ *   at which a new queue entered the burst list, then the function appends
++ *   Q to the burst list
++ *
++ * . if, as a consequence of the previous step, the burst size reaches
++ *   the large-burst threshold, then
++ *
++ *     . all the queues in the burst list are marked as belonging to a
++ *       large burst
++ *
++ *     . the burst list is deleted; in fact, the burst list already served
++ *       its purpose (keeping temporarily track of the queues in a burst,
++ *       so as to be able to mark them as belonging to a large burst in the
++ *       previous sub-step), and now is not needed any more
++ *
++ *     . the device enters a large-burst mode
++ *
++ * . if a queue Q that does not belong to the burst is activated while
++ *   the device is in large-burst mode and shortly after the last time
++ *   at which a queue either entered the burst list or was marked as
++ *   belonging to the current large burst, then Q is immediately marked
++ *   as belonging to a large burst.
++ *
++ * . if a queue Q that does not belong to the burst is activated a while
++ *   later, i.e., not shortly after, than the last time at which a queue
++ *   either entered the burst list or was marked as belonging to the
++ *   current large burst, then the current burst is deemed as finished and:
++ *
++ *        . the large-burst mode is reset if set
++ *
++ *        . the burst list is emptied
++ *
++ *        . Q is inserted in the burst list, as Q may be the first queue
++ *          in a possible new burst (then the burst list contains just Q
++ *          after this step).
++ */
++static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			     bool idle_for_long_time)
++{
++	/*
++	 * If bfqq happened to be activated in a burst, but has been idle
++	 * for at least as long as an interactive queue, then we assume
++	 * that, in the overall I/O initiated in the burst, the I/O
++	 * associated to bfqq is finished. So bfqq does not need to be
++	 * treated as a queue belonging to a burst anymore. Accordingly,
++	 * we reset bfqq's in_large_burst flag if set, and remove bfqq
++	 * from the burst list if it's there. We do not decrement instead
++	 * burst_size, because the fact that bfqq does not need to belong
++	 * to the burst list any more does not invalidate the fact that
++	 * bfqq may have been activated during the current burst.
++	 */
++	if (idle_for_long_time) {
++		hlist_del_init(&bfqq->burst_list_node);
++		bfq_clear_bfqq_in_large_burst(bfqq);
++	}
++
++	/*
++	 * If bfqq is already in the burst list or is part of a large
++	 * burst, then there is nothing else to do.
++	 */
++	if (!hlist_unhashed(&bfqq->burst_list_node) ||
++	    bfq_bfqq_in_large_burst(bfqq))
++		return;
++
++	/*
++	 * If bfqq's activation happens late enough, then the current
++	 * burst is finished, and related data structures must be reset.
++	 *
++	 * In this respect, consider the special case where bfqq is the very
++	 * first queue being activated. In this case, last_ins_in_burst is
++	 * not yet significant when we get here. But it is easy to verify
++	 * that, whether or not the following condition is true, bfqq will
++	 * end up being inserted into the burst list. In particular the
++	 * list will happen to contain only bfqq. And this is exactly what
++	 * has to happen, as bfqq may be the first queue in a possible
++	 * burst.
++	 */
++	if (time_is_before_jiffies(bfqd->last_ins_in_burst +
++	    bfqd->bfq_burst_interval)) {
++		bfqd->large_burst = false;
++		bfq_reset_burst_list(bfqd, bfqq);
++		return;
++	}
++
++	/*
++	 * If we get here, then bfqq is being activated shortly after the
++	 * last queue. So, if the current burst is also large, we can mark
++	 * bfqq as belonging to this large burst immediately.
++	 */
++	if (bfqd->large_burst) {
++		bfq_mark_bfqq_in_large_burst(bfqq);
++		return;
++	}
++
++	/*
++	 * If we get here, then a large-burst state has not yet been
++	 * reached, but bfqq is being activated shortly after the last
++	 * queue. Then we add bfqq to the burst.
++	 */
++	bfq_add_to_burst(bfqd, bfqq);
++}
++
++static void bfq_add_request(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_data *bfqd = bfqq->bfqd;
++	struct request *next_rq, *prev;
++	unsigned long old_wr_coeff = bfqq->wr_coeff;
++	bool interactive = false;
++
++	bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
++	bfqq->queued[rq_is_sync(rq)]++;
++	bfqd->queued++;
++
++	elv_rb_add(&bfqq->sort_list, rq);
++
++	/*
++	 * Check if this request is a better next-serve candidate.
++	 */
++	prev = bfqq->next_rq;
++	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
++	BUG_ON(!next_rq);
++	bfqq->next_rq = next_rq;
++
++	if (!bfq_bfqq_busy(bfqq)) {
++		bool soft_rt, in_burst,
++		     idle_for_long_time = time_is_before_jiffies(
++						bfqq->budget_timeout +
++						bfqd->bfq_wr_min_idle_time);
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq,
++					 rq->cmd_flags);
++#endif
++		if (bfq_bfqq_sync(bfqq)) {
++			bool already_in_burst =
++			   !hlist_unhashed(&bfqq->burst_list_node) ||
++			   bfq_bfqq_in_large_burst(bfqq);
++			bfq_handle_burst(bfqd, bfqq, idle_for_long_time);
++			/*
++			 * If bfqq was not already in the current burst,
++			 * then, at this point, bfqq either has been
++			 * added to the current burst or has caused the
++			 * current burst to terminate. In particular, in
++			 * the second case, bfqq has become the first
++			 * queue in a possible new burst.
++			 * In both cases last_ins_in_burst needs to be
++			 * moved forward.
++			 */
++			if (!already_in_burst)
++				bfqd->last_ins_in_burst = jiffies;
++		}
++
++		in_burst = bfq_bfqq_in_large_burst(bfqq);
++		soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
++			!in_burst &&
++			time_is_before_jiffies(bfqq->soft_rt_next_start);
++		interactive = !in_burst && idle_for_long_time;
++		entity->budget = max_t(unsigned long, bfqq->max_budget,
++				       bfq_serv_to_charge(next_rq, bfqq));
++
++		if (!bfq_bfqq_IO_bound(bfqq)) {
++			if (time_before(jiffies,
++					RQ_BIC(rq)->ttime.last_end_request +
++					bfqd->bfq_slice_idle)) {
++				bfqq->requests_within_timer++;
++				if (bfqq->requests_within_timer >=
++				    bfqd->bfq_requests_within_timer)
++					bfq_mark_bfqq_IO_bound(bfqq);
++			} else
++				bfqq->requests_within_timer = 0;
++		}
++
++		if (!bfqd->low_latency)
++			goto add_bfqq_busy;
++
++		/*
++		 * If the queue:
++		 * - is not being boosted,
++		 * - has been idle for enough time,
++		 * - is not a sync queue or is linked to a bfq_io_cq (it is
++		 *   shared "for its nature" or it is not shared and its
++		 *   requests have not been redirected to a shared queue)
++		 * start a weight-raising period.
++		 */
++		if (old_wr_coeff == 1 && (interactive || soft_rt) &&
++		    (!bfq_bfqq_sync(bfqq) || bfqq->bic)) {
++			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
++			if (interactive)
++				bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++			else
++				bfqq->wr_cur_max_time =
++					bfqd->bfq_wr_rt_max_time;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "wrais starting at %lu, rais_max_time %u",
++				     jiffies,
++				     jiffies_to_msecs(bfqq->wr_cur_max_time));
++		} else if (old_wr_coeff > 1) {
++			if (interactive)
++				bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++			else if (in_burst ||
++				 (bfqq->wr_cur_max_time ==
++				  bfqd->bfq_wr_rt_max_time &&
++				  !soft_rt)) {
++				bfqq->wr_coeff = 1;
++				bfq_log_bfqq(bfqd, bfqq,
++					"wrais ending at %lu, rais_max_time %u",
++					jiffies,
++					jiffies_to_msecs(bfqq->
++						wr_cur_max_time));
++			} else if (time_before(
++					bfqq->last_wr_start_finish +
++					bfqq->wr_cur_max_time,
++					jiffies +
++					bfqd->bfq_wr_rt_max_time) &&
++				   soft_rt) {
++				/*
++				 *
++				 * The remaining weight-raising time is lower
++				 * than bfqd->bfq_wr_rt_max_time, which means
++				 * that the application is enjoying weight
++				 * raising either because deemed soft-rt in
++				 * the near past, or because deemed interactive
++				 * a long ago.
++				 * In both cases, resetting now the current
++				 * remaining weight-raising time for the
++				 * application to the weight-raising duration
++				 * for soft rt applications would not cause any
++				 * latency increase for the application (as the
++				 * new duration would be higher than the
++				 * remaining time).
++				 *
++				 * In addition, the application is now meeting
++				 * the requirements for being deemed soft rt.
++				 * In the end we can correctly and safely
++				 * (re)charge the weight-raising duration for
++				 * the application with the weight-raising
++				 * duration for soft rt applications.
++				 *
++				 * In particular, doing this recharge now, i.e.,
++				 * before the weight-raising period for the
++				 * application finishes, reduces the probability
++				 * of the following negative scenario:
++				 * 1) the weight of a soft rt application is
++				 *    raised at startup (as for any newly
++				 *    created application),
++				 * 2) since the application is not interactive,
++				 *    at a certain time weight-raising is
++				 *    stopped for the application,
++				 * 3) at that time the application happens to
++				 *    still have pending requests, and hence
++				 *    is destined to not have a chance to be
++				 *    deemed soft rt before these requests are
++				 *    completed (see the comments to the
++				 *    function bfq_bfqq_softrt_next_start()
++				 *    for details on soft rt detection),
++				 * 4) these pending requests experience a high
++				 *    latency because the application is not
++				 *    weight-raised while they are pending.
++				 */
++				bfqq->last_wr_start_finish = jiffies;
++				bfqq->wr_cur_max_time =
++					bfqd->bfq_wr_rt_max_time;
++			}
++		}
++		if (old_wr_coeff != bfqq->wr_coeff)
++			entity->prio_changed = 1;
++add_bfqq_busy:
++		bfqq->last_idle_bklogged = jiffies;
++		bfqq->service_from_backlogged = 0;
++		bfq_clear_bfqq_softrt_update(bfqq);
++		bfq_add_bfqq_busy(bfqd, bfqq);
++	} else {
++		if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
++		    time_is_before_jiffies(
++				bfqq->last_wr_start_finish +
++				bfqd->bfq_wr_min_inter_arr_async)) {
++			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
++			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++
++			bfqd->wr_busy_queues++;
++			entity->prio_changed = 1;
++			bfq_log_bfqq(bfqd, bfqq,
++			    "non-idle wrais starting at %lu, rais_max_time %u",
++			    jiffies,
++			    jiffies_to_msecs(bfqq->wr_cur_max_time));
++		}
++		if (prev != bfqq->next_rq)
++			bfq_updated_next_req(bfqd, bfqq);
++	}
++
++	if (bfqd->low_latency &&
++		(old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
++		bfqq->last_wr_start_finish = jiffies;
++}
++
++static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
++					  struct bio *bio)
++{
++	struct task_struct *tsk = current;
++	struct bfq_io_cq *bic;
++	struct bfq_queue *bfqq;
++
++	bic = bfq_bic_lookup(bfqd, tsk->io_context);
++	if (!bic)
++		return NULL;
++
++	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
++	if (bfqq)
++		return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));
++
++	return NULL;
++}
++
++static void bfq_activate_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++
++	bfqd->rq_in_driver++;
++	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
++	bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",
++		(long long unsigned)bfqd->last_position);
++}
++
++static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++
++	BUG_ON(bfqd->rq_in_driver == 0);
++	bfqd->rq_in_driver--;
++}
++
++static void bfq_remove_request(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_data *bfqd = bfqq->bfqd;
++	const int sync = rq_is_sync(rq);
++
++	if (bfqq->next_rq == rq) {
++		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
++		bfq_updated_next_req(bfqd, bfqq);
++	}
++
++	if (rq->queuelist.prev != &rq->queuelist)
++		list_del_init(&rq->queuelist);
++	BUG_ON(bfqq->queued[sync] == 0);
++	bfqq->queued[sync]--;
++	bfqd->queued--;
++	elv_rb_del(&bfqq->sort_list, rq);
++
++	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue)
++			bfq_del_bfqq_busy(bfqd, bfqq, 1);
++		/*
++		 * Remove queue from request-position tree as it is empty.
++		 */
++		if (bfqq->pos_root) {
++			rb_erase(&bfqq->pos_node, bfqq->pos_root);
++			bfqq->pos_root = NULL;
++		}
++	}
++
++	if (rq->cmd_flags & REQ_META) {
++		BUG_ON(bfqq->meta_pending == 0);
++		bfqq->meta_pending--;
++	}
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
++#endif
++}
++
++static int bfq_merge(struct request_queue *q, struct request **req,
++		     struct bio *bio)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct request *__rq;
++
++	__rq = bfq_find_rq_fmerge(bfqd, bio);
++	if (__rq && elv_rq_merge_ok(__rq, bio)) {
++		*req = __rq;
++		return ELEVATOR_FRONT_MERGE;
++	}
++
++	return ELEVATOR_NO_MERGE;
++}
++
++static void bfq_merged_request(struct request_queue *q, struct request *req,
++			       int type)
++{
++	if (type == ELEVATOR_FRONT_MERGE &&
++	    rb_prev(&req->rb_node) &&
++	    blk_rq_pos(req) <
++	    blk_rq_pos(container_of(rb_prev(&req->rb_node),
++				    struct request, rb_node))) {
++		struct bfq_queue *bfqq = RQ_BFQQ(req);
++		struct bfq_data *bfqd = bfqq->bfqd;
++		struct request *prev, *next_rq;
++
++		/* Reposition request in its sort_list */
++		elv_rb_del(&bfqq->sort_list, req);
++		elv_rb_add(&bfqq->sort_list, req);
++		/* Choose next request to be served for bfqq */
++		prev = bfqq->next_rq;
++		next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req,
++					 bfqd->last_position);
++		BUG_ON(!next_rq);
++		bfqq->next_rq = next_rq;
++	}
++}
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static void bfq_bio_merged(struct request_queue *q, struct request *req,
++			   struct bio *bio)
++{
++	bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_rw);
++}
++#endif
++
++static void bfq_merged_requests(struct request_queue *q, struct request *rq,
++				struct request *next)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
++
++	/*
++	 * If next and rq belong to the same bfq_queue and next is older
++	 * than rq, then reposition rq in the fifo (by substituting next
++	 * with rq). Otherwise, if next and rq belong to different
++	 * bfq_queues, never reposition rq: in fact, we would have to
++	 * reposition it with respect to next's position in its own fifo,
++	 * which would most certainly be too expensive with respect to
++	 * the benefits.
++	 */
++	if (bfqq == next_bfqq &&
++	    !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
++	    time_before(next->fifo_time, rq->fifo_time)) {
++		list_del_init(&rq->queuelist);
++		list_replace_init(&next->queuelist, &rq->queuelist);
++		rq->fifo_time = next->fifo_time;
++	}
++
++	if (bfqq->next_rq == next)
++		bfqq->next_rq = rq;
++
++	bfq_remove_request(next);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
++#endif
++}
++
++/* Must be called with bfqq != NULL */
++static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
++{
++	BUG_ON(!bfqq);
++	if (bfq_bfqq_busy(bfqq))
++		bfqq->bfqd->wr_busy_queues--;
++	bfqq->wr_coeff = 1;
++	bfqq->wr_cur_max_time = 0;
++	/* Trigger a weight change on the next activation of the queue */
++	bfqq->entity.prio_changed = 1;
++}
++
++static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
++				    struct bfq_group *bfqg)
++{
++	int i, j;
++
++	for (i = 0; i < 2; i++)
++		for (j = 0; j < IOPRIO_BE_NR; j++)
++			if (bfqg->async_bfqq[i][j])
++				bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
++	if (bfqg->async_idle_bfqq)
++		bfq_bfqq_end_wr(bfqg->async_idle_bfqq);
++}
++
++static void bfq_end_wr(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq;
++
++	spin_lock_irq(bfqd->queue->queue_lock);
++
++	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
++		bfq_bfqq_end_wr(bfqq);
++	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
++		bfq_bfqq_end_wr(bfqq);
++	bfq_end_wr_async(bfqd);
++
++	spin_unlock_irq(bfqd->queue->queue_lock);
++}
++
++static int bfq_allow_merge(struct request_queue *q, struct request *rq,
++			   struct bio *bio)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_io_cq *bic;
++
++	/*
++	 * Disallow merge of a sync bio into an async request.
++	 */
++	if (bfq_bio_sync(bio) && !rq_is_sync(rq))
++		return 0;
++
++	/*
++	 * Lookup the bfqq that this bio will be queued with. Allow
++	 * merge only if rq is queued there.
++	 * Queue lock is held here.
++	 */
++	bic = bfq_bic_lookup(bfqd, current->io_context);
++	if (!bic)
++		return 0;
++
++	return bic_to_bfqq(bic, bfq_bio_sync(bio)) == RQ_BFQQ(rq);
++}
++
++static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
++				       struct bfq_queue *bfqq)
++{
++	if (bfqq) {
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
++#endif
++		bfq_mark_bfqq_must_alloc(bfqq);
++		bfq_mark_bfqq_budget_new(bfqq);
++		bfq_clear_bfqq_fifo_expire(bfqq);
++
++		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
++
++		bfq_log_bfqq(bfqd, bfqq,
++			     "set_in_service_queue, cur-budget = %d",
++			     bfqq->entity.budget);
++	}
++
++	bfqd->in_service_queue = bfqq;
++}
++
++/*
++ * Get and set a new queue for service.
++ */
++static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
++
++	__bfq_set_in_service_queue(bfqd, bfqq);
++	return bfqq;
++}
++
++/*
++ * If enough samples have been computed, return the current max budget
++ * stored in bfqd, which is dynamically updated according to the
++ * estimated disk peak rate; otherwise return the default max budget
++ */
++static int bfq_max_budget(struct bfq_data *bfqd)
++{
++	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
++		return bfq_default_max_budget;
++	else
++		return bfqd->bfq_max_budget;
++}
++
++/*
++ * Return min budget, which is a fraction of the current or default
++ * max budget (trying with 1/32)
++ */
++static int bfq_min_budget(struct bfq_data *bfqd)
++{
++	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
++		return bfq_default_max_budget / 32;
++	else
++		return bfqd->bfq_max_budget / 32;
++}
++
++static void bfq_arm_slice_timer(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq = bfqd->in_service_queue;
++	struct bfq_io_cq *bic;
++	unsigned long sl;
++
++	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	/* Processes have exited, don't wait. */
++	bic = bfqd->in_service_bic;
++	if (!bic || atomic_read(&bic->icq.ioc->active_ref) == 0)
++		return;
++
++	bfq_mark_bfqq_wait_request(bfqq);
++
++	/*
++	 * We don't want to idle for seeks, but we do want to allow
++	 * fair distribution of slice time for a process doing back-to-back
++	 * seeks. So allow a little bit of time for him to submit a new rq.
++	 *
++	 * To prevent processes with (partly) seeky workloads from
++	 * being too ill-treated, grant them a small fraction of the
++	 * assigned budget before reducing the waiting time to
++	 * BFQ_MIN_TT. This happened to help reduce latency.
++	 */
++	sl = bfqd->bfq_slice_idle;
++	/*
++	 * Unless the queue is being weight-raised or the scenario is
++	 * asymmetric, grant only minimum idle time if the queue either
++	 * has been seeky for long enough or has already proved to be
++	 * constantly seeky.
++	 */
++	if (bfq_sample_valid(bfqq->seek_samples) &&
++	    ((BFQQ_SEEKY(bfqq) && bfqq->entity.service >
++				  bfq_max_budget(bfqq->bfqd) / 8) ||
++	      bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 &&
++	    bfq_symmetric_scenario(bfqd))
++		sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
++	else if (bfqq->wr_coeff > 1)
++		sl = sl * 3;
++	bfqd->last_idling_start = ktime_get();
++	mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
++#endif
++	bfq_log(bfqd, "arm idle: %u/%u ms",
++		jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
++}
++
++/*
++ * Set the maximum time for the in-service queue to consume its
++ * budget. This prevents seeky processes from lowering the disk
++ * throughput (always guaranteed with a time slice scheme as in CFQ).
++ */
++static void bfq_set_budget_timeout(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq = bfqd->in_service_queue;
++	unsigned int timeout_coeff;
++	if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
++		timeout_coeff = 1;
++	else
++		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
++
++	bfqd->last_budget_start = ktime_get();
++
++	bfq_clear_bfqq_budget_new(bfqq);
++	bfqq->budget_timeout = jiffies +
++		bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
++
++	bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
++		jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
++		timeout_coeff));
++}
++
++/*
++ * Move request from internal lists to the request queue dispatch list.
++ */
++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	/*
++	 * For consistency, the next instruction should have been executed
++	 * after removing the request from the queue and dispatching it.
++	 * We execute instead this instruction before bfq_remove_request()
++	 * (and hence introduce a temporary inconsistency), for efficiency.
++	 * In fact, in a forced_dispatch, this prevents two counters related
++	 * to bfqq->dispatched to risk to be uselessly decremented if bfqq
++	 * is not in service, and then to be incremented again after
++	 * incrementing bfqq->dispatched.
++	 */
++	bfqq->dispatched++;
++	bfq_remove_request(rq);
++	elv_dispatch_sort(q, rq);
++
++	if (bfq_bfqq_sync(bfqq))
++		bfqd->sync_flight++;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg_stats_update_dispatch(bfqq_group(bfqq), blk_rq_bytes(rq),
++				   rq->cmd_flags);
++#endif
++}
++
++/*
++ * Return expired entry, or NULL to just start from scratch in rbtree.
++ */
++static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
++{
++	struct request *rq = NULL;
++
++	if (bfq_bfqq_fifo_expire(bfqq))
++		return NULL;
++
++	bfq_mark_bfqq_fifo_expire(bfqq);
++
++	if (list_empty(&bfqq->fifo))
++		return NULL;
++
++	rq = rq_entry_fifo(bfqq->fifo.next);
++
++	if (time_before(jiffies, rq->fifo_time))
++		return NULL;
++
++	return rq;
++}
++
++static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	return entity->budget - entity->service;
++}
++
++static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	BUG_ON(bfqq != bfqd->in_service_queue);
++
++	__bfq_bfqd_reset_in_service(bfqd);
++
++	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++		/*
++		 * Overloading budget_timeout field to store the time
++		 * at which the queue remains with no backlog; used by
++		 * the weight-raising mechanism.
++		 */
++		bfqq->budget_timeout = jiffies;
++		bfq_del_bfqq_busy(bfqd, bfqq, 1);
++	} else
++		bfq_activate_bfqq(bfqd, bfqq);
++}
++
++/**
++ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
++ * @bfqd: device data.
++ * @bfqq: queue to update.
++ * @reason: reason for expiration.
++ *
++ * Handle the feedback on @bfqq budget at queue expiration.
++ * See the body for detailed comments.
++ */
++static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
++				     struct bfq_queue *bfqq,
++				     enum bfqq_expiration reason)
++{
++	struct request *next_rq;
++	int budget, min_budget;
++
++	budget = bfqq->max_budget;
++	min_budget = bfq_min_budget(bfqd);
++
++	BUG_ON(bfqq != bfqd->in_service_queue);
++
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
++		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d",
++		budget, bfq_min_budget(bfqd));
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
++		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
++
++	if (bfq_bfqq_sync(bfqq)) {
++		switch (reason) {
++		/*
++		 * Caveat: in all the following cases we trade latency
++		 * for throughput.
++		 */
++		case BFQ_BFQQ_TOO_IDLE:
++			/*
++			 * This is the only case where we may reduce
++			 * the budget: if there is no request of the
++			 * process still waiting for completion, then
++			 * we assume (tentatively) that the timer has
++			 * expired because the batch of requests of
++			 * the process could have been served with a
++			 * smaller budget.  Hence, betting that
++			 * process will behave in the same way when it
++			 * becomes backlogged again, we reduce its
++			 * next budget.  As long as we guess right,
++			 * this budget cut reduces the latency
++			 * experienced by the process.
++			 *
++			 * However, if there are still outstanding
++			 * requests, then the process may have not yet
++			 * issued its next request just because it is
++			 * still waiting for the completion of some of
++			 * the still outstanding ones.  So in this
++			 * subcase we do not reduce its budget, on the
++			 * contrary we increase it to possibly boost
++			 * the throughput, as discussed in the
++			 * comments to the BUDGET_TIMEOUT case.
++			 */
++			if (bfqq->dispatched > 0) /* still outstanding reqs */
++				budget = min(budget * 2, bfqd->bfq_max_budget);
++			else {
++				if (budget > 5 * min_budget)
++					budget -= 4 * min_budget;
++				else
++					budget = min_budget;
++			}
++			break;
++		case BFQ_BFQQ_BUDGET_TIMEOUT:
++			/*
++			 * We double the budget here because: 1) it
++			 * gives the chance to boost the throughput if
++			 * this is not a seeky process (which may have
++			 * bumped into this timeout because of, e.g.,
++			 * ZBR), 2) together with charge_full_budget
++			 * it helps give seeky processes higher
++			 * timestamps, and hence be served less
++			 * frequently.
++			 */
++			budget = min(budget * 2, bfqd->bfq_max_budget);
++			break;
++		case BFQ_BFQQ_BUDGET_EXHAUSTED:
++			/*
++			 * The process still has backlog, and did not
++			 * let either the budget timeout or the disk
++			 * idling timeout expire. Hence it is not
++			 * seeky, has a short thinktime and may be
++			 * happy with a higher budget too. So
++			 * definitely increase the budget of this good
++			 * candidate to boost the disk throughput.
++			 */
++			budget = min(budget * 4, bfqd->bfq_max_budget);
++			break;
++		case BFQ_BFQQ_NO_MORE_REQUESTS:
++		       /*
++			* Leave the budget unchanged.
++			*/
++		default:
++			return;
++		}
++	} else
++		/*
++		 * Async queues get always the maximum possible budget
++		 * (their ability to dispatch is limited by
++		 * @bfqd->bfq_max_budget_async_rq).
++		 */
++		budget = bfqd->bfq_max_budget;
++
++	bfqq->max_budget = budget;
++
++	if (bfqd->budgets_assigned >= bfq_stats_min_budgets &&
++	    !bfqd->bfq_user_max_budget)
++		bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
++
++	/*
++	 * Make sure that we have enough budget for the next request.
++	 * Since the finish time of the bfqq must be kept in sync with
++	 * the budget, be sure to call __bfq_bfqq_expire() after the
++	 * update.
++	 */
++	next_rq = bfqq->next_rq;
++	if (next_rq)
++		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
++					    bfq_serv_to_charge(next_rq, bfqq));
++	else
++		bfqq->entity.budget = bfqq->max_budget;
++
++	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d",
++			next_rq ? blk_rq_sectors(next_rq) : 0,
++			bfqq->entity.budget);
++}
++
++static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
++{
++	unsigned long max_budget;
++
++	/*
++	 * The max_budget calculated when autotuning is equal to the
++	 * amount of sectors transfered in timeout_sync at the
++	 * estimated peak rate.
++	 */
++	max_budget = (unsigned long)(peak_rate * 1000 *
++				     timeout >> BFQ_RATE_SHIFT);
++
++	return max_budget;
++}
++
++/*
++ * In addition to updating the peak rate, checks whether the process
++ * is "slow", and returns 1 if so. This slow flag is used, in addition
++ * to the budget timeout, to reduce the amount of service provided to
++ * seeky processes, and hence reduce their chances to lower the
++ * throughput. See the code for more details.
++ */
++static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++				 bool compensate, enum bfqq_expiration reason)
++{
++	u64 bw, usecs, expected, timeout;
++	ktime_t delta;
++	int update = 0;
++
++	if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
++		return false;
++
++	if (compensate)
++		delta = bfqd->last_idling_start;
++	else
++		delta = ktime_get();
++	delta = ktime_sub(delta, bfqd->last_budget_start);
++	usecs = ktime_to_us(delta);
++
++	/* Don't trust short/unrealistic values. */
++	if (usecs < 100 || usecs >= LONG_MAX)
++		return false;
++
++	/*
++	 * Calculate the bandwidth for the last slice.  We use a 64 bit
++	 * value to store the peak rate, in sectors per usec in fixed
++	 * point math.  We do so to have enough precision in the estimate
++	 * and to avoid overflows.
++	 */
++	bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
++	do_div(bw, (unsigned long)usecs);
++
++	timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++
++	/*
++	 * Use only long (> 20ms) intervals to filter out spikes for
++	 * the peak rate estimation.
++	 */
++	if (usecs > 20000) {
++		if (bw > bfqd->peak_rate ||
++		   (!BFQQ_SEEKY(bfqq) &&
++		    reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {
++			bfq_log(bfqd, "measured bw =%llu", bw);
++			/*
++			 * To smooth oscillations use a low-pass filter with
++			 * alpha=7/8, i.e.,
++			 * new_rate = (7/8) * old_rate + (1/8) * bw
++			 */
++			do_div(bw, 8);
++			if (bw == 0)
++				return 0;
++			bfqd->peak_rate *= 7;
++			do_div(bfqd->peak_rate, 8);
++			bfqd->peak_rate += bw;
++			update = 1;
++			bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);
++		}
++
++		update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
++
++		if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES)
++			bfqd->peak_rate_samples++;
++
++		if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&
++		    update) {
++			int dev_type = blk_queue_nonrot(bfqd->queue);
++			if (bfqd->bfq_user_max_budget == 0) {
++				bfqd->bfq_max_budget =
++					bfq_calc_max_budget(bfqd->peak_rate,
++							    timeout);
++				bfq_log(bfqd, "new max_budget=%d",
++					bfqd->bfq_max_budget);
++			}
++			if (bfqd->device_speed == BFQ_BFQD_FAST &&
++			    bfqd->peak_rate < device_speed_thresh[dev_type]) {
++				bfqd->device_speed = BFQ_BFQD_SLOW;
++				bfqd->RT_prod = R_slow[dev_type] *
++						T_slow[dev_type];
++			} else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
++			    bfqd->peak_rate > device_speed_thresh[dev_type]) {
++				bfqd->device_speed = BFQ_BFQD_FAST;
++				bfqd->RT_prod = R_fast[dev_type] *
++						T_fast[dev_type];
++			}
++		}
++	}
++
++	/*
++	 * If the process has been served for a too short time
++	 * interval to let its possible sequential accesses prevail on
++	 * the initial seek time needed to move the disk head on the
++	 * first sector it requested, then give the process a chance
++	 * and for the moment return false.
++	 */
++	if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
++		return false;
++
++	/*
++	 * A process is considered ``slow'' (i.e., seeky, so that we
++	 * cannot treat it fairly in the service domain, as it would
++	 * slow down too much the other processes) if, when a slice
++	 * ends for whatever reason, it has received service at a
++	 * rate that would not be high enough to complete the budget
++	 * before the budget timeout expiration.
++	 */
++	expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;
++
++	/*
++	 * Caveat: processes doing IO in the slower disk zones will
++	 * tend to be slow(er) even if not seeky. And the estimated
++	 * peak rate will actually be an average over the disk
++	 * surface. Hence, to not be too harsh with unlucky processes,
++	 * we keep a budget/3 margin of safety before declaring a
++	 * process slow.
++	 */
++	return expected > (4 * bfqq->entity.budget) / 3;
++}
++
++/*
++ * To be deemed as soft real-time, an application must meet two
++ * requirements. First, the application must not require an average
++ * bandwidth higher than the approximate bandwidth required to playback or
++ * record a compressed high-definition video.
++ * The next function is invoked on the completion of the last request of a
++ * batch, to compute the next-start time instant, soft_rt_next_start, such
++ * that, if the next request of the application does not arrive before
++ * soft_rt_next_start, then the above requirement on the bandwidth is met.
++ *
++ * The second requirement is that the request pattern of the application is
++ * isochronous, i.e., that, after issuing a request or a batch of requests,
++ * the application stops issuing new requests until all its pending requests
++ * have been completed. After that, the application may issue a new batch,
++ * and so on.
++ * For this reason the next function is invoked to compute
++ * soft_rt_next_start only for applications that meet this requirement,
++ * whereas soft_rt_next_start is set to infinity for applications that do
++ * not.
++ *
++ * Unfortunately, even a greedy application may happen to behave in an
++ * isochronous way if the CPU load is high. In fact, the application may
++ * stop issuing requests while the CPUs are busy serving other processes,
++ * then restart, then stop again for a while, and so on. In addition, if
++ * the disk achieves a low enough throughput with the request pattern
++ * issued by the application (e.g., because the request pattern is random
++ * and/or the device is slow), then the application may meet the above
++ * bandwidth requirement too. To prevent such a greedy application to be
++ * deemed as soft real-time, a further rule is used in the computation of
++ * soft_rt_next_start: soft_rt_next_start must be higher than the current
++ * time plus the maximum time for which the arrival of a request is waited
++ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle.
++ * This filters out greedy applications, as the latter issue instead their
++ * next request as soon as possible after the last one has been completed
++ * (in contrast, when a batch of requests is completed, a soft real-time
++ * application spends some time processing data).
++ *
++ * Unfortunately, the last filter may easily generate false positives if
++ * only bfqd->bfq_slice_idle is used as a reference time interval and one
++ * or both the following cases occur:
++ * 1) HZ is so low that the duration of a jiffy is comparable to or higher
++ *    than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with
++ *    HZ=100.
++ * 2) jiffies, instead of increasing at a constant rate, may stop increasing
++ *    for a while, then suddenly 'jump' by several units to recover the lost
++ *    increments. This seems to happen, e.g., inside virtual machines.
++ * To address this issue, we do not use as a reference time interval just
++ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In
++ * particular we add the minimum number of jiffies for which the filter
++ * seems to be quite precise also in embedded systems and KVM/QEMU virtual
++ * machines.
++ */
++static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
++						struct bfq_queue *bfqq)
++{
++	return max(bfqq->last_idle_bklogged +
++		   HZ * bfqq->service_from_backlogged /
++		   bfqd->bfq_wr_max_softrt_rate,
++		   jiffies + bfqq->bfqd->bfq_slice_idle + 4);
++}
++
++/*
++ * Return the largest-possible time instant such that, for as long as possible,
++ * the current time will be lower than this time instant according to the macro
++ * time_is_before_jiffies().
++ */
++static unsigned long bfq_infinity_from_now(unsigned long now)
++{
++	return now + ULONG_MAX / 2;
++}
++
++/**
++ * bfq_bfqq_expire - expire a queue.
++ * @bfqd: device owning the queue.
++ * @bfqq: the queue to expire.
++ * @compensate: if true, compensate for the time spent idling.
++ * @reason: the reason causing the expiration.
++ *
++ *
++ * If the process associated to the queue is slow (i.e., seeky), or in
++ * case of budget timeout, or, finally, if it is async, we
++ * artificially charge it an entire budget (independently of the
++ * actual service it received). As a consequence, the queue will get
++ * higher timestamps than the correct ones upon reactivation, and
++ * hence it will be rescheduled as if it had received more service
++ * than what it actually received. In the end, this class of processes
++ * will receive less service in proportion to how slowly they consume
++ * their budgets (and hence how seriously they tend to lower the
++ * throughput).
++ *
++ * In contrast, when a queue expires because it has been idling for
++ * too much or because it exhausted its budget, we do not touch the
++ * amount of service it has received. Hence when the queue will be
++ * reactivated and its timestamps updated, the latter will be in sync
++ * with the actual service received by the queue until expiration.
++ *
++ * Charging a full budget to the first type of queues and the exact
++ * service to the others has the effect of using the WF2Q+ policy to
++ * schedule the former on a timeslice basis, without violating the
++ * service domain guarantees of the latter.
++ */
++static void bfq_bfqq_expire(struct bfq_data *bfqd,
++			    struct bfq_queue *bfqq,
++			    bool compensate,
++			    enum bfqq_expiration reason)
++{
++	bool slow;
++	BUG_ON(bfqq != bfqd->in_service_queue);
++
++	/*
++	 * Update disk peak rate for autotuning and check whether the
++	 * process is slow (see bfq_update_peak_rate).
++	 */
++	slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
++
++	/*
++	 * As above explained, 'punish' slow (i.e., seeky), timed-out
++	 * and async queues, to favor sequential sync workloads.
++	 *
++	 * Processes doing I/O in the slower disk zones will tend to be
++	 * slow(er) even if not seeky. Hence, since the estimated peak
++	 * rate is actually an average over the disk surface, these
++	 * processes may timeout just for bad luck. To avoid punishing
++	 * them we do not charge a full budget to a process that
++	 * succeeded in consuming at least 2/3 of its budget.
++	 */
++	if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
++		     bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3))
++		bfq_bfqq_charge_full_budget(bfqq);
++
++	bfqq->service_from_backlogged += bfqq->entity.service;
++
++	if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
++	    !bfq_bfqq_constantly_seeky(bfqq)) {
++		bfq_mark_bfqq_constantly_seeky(bfqq);
++		if (!blk_queue_nonrot(bfqd->queue))
++			bfqd->const_seeky_busy_in_flight_queues++;
++	}
++
++	if (reason == BFQ_BFQQ_TOO_IDLE &&
++	    bfqq->entity.service <= 2 * bfqq->entity.budget / 10 )
++		bfq_clear_bfqq_IO_bound(bfqq);
++
++	if (bfqd->low_latency && bfqq->wr_coeff == 1)
++		bfqq->last_wr_start_finish = jiffies;
++
++	if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
++	    RB_EMPTY_ROOT(&bfqq->sort_list)) {
++		/*
++		 * If we get here, and there are no outstanding requests,
++		 * then the request pattern is isochronous (see the comments
++		 * to the function bfq_bfqq_softrt_next_start()). Hence we
++		 * can compute soft_rt_next_start. If, instead, the queue
++		 * still has outstanding requests, then we have to wait
++		 * for the completion of all the outstanding requests to
++		 * discover whether the request pattern is actually
++		 * isochronous.
++		 */
++		if (bfqq->dispatched == 0)
++			bfqq->soft_rt_next_start =
++				bfq_bfqq_softrt_next_start(bfqd, bfqq);
++		else {
++			/*
++			 * The application is still waiting for the
++			 * completion of one or more requests:
++			 * prevent it from possibly being incorrectly
++			 * deemed as soft real-time by setting its
++			 * soft_rt_next_start to infinity. In fact,
++			 * without this assignment, the application
++			 * would be incorrectly deemed as soft
++			 * real-time if:
++			 * 1) it issued a new request before the
++			 *    completion of all its in-flight
++			 *    requests, and
++			 * 2) at that time, its soft_rt_next_start
++			 *    happened to be in the past.
++			 */
++			bfqq->soft_rt_next_start =
++				bfq_infinity_from_now(jiffies);
++			/*
++			 * Schedule an update of soft_rt_next_start to when
++			 * the task may be discovered to be isochronous.
++			 */
++			bfq_mark_bfqq_softrt_update(bfqq);
++		}
++	}
++
++	bfq_log_bfqq(bfqd, bfqq,
++		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
++		slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
++
++	/*
++	 * Increase, decrease or leave budget unchanged according to
++	 * reason.
++	 */
++	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
++	__bfq_bfqq_expire(bfqd, bfqq);
++}
++
++/*
++ * Budget timeout is not implemented through a dedicated timer, but
++ * just checked on request arrivals and completions, as well as on
++ * idle timer expirations.
++ */
++static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
++{
++	if (bfq_bfqq_budget_new(bfqq) ||
++	    time_before(jiffies, bfqq->budget_timeout))
++		return false;
++	return true;
++}
++
++/*
++ * If we expire a queue that is waiting for the arrival of a new
++ * request, we may prevent the fictitious timestamp back-shifting that
++ * allows the guarantees of the queue to be preserved (see [1] for
++ * this tricky aspect). Hence we return true only if this condition
++ * does not hold, or if the queue is slow enough to deserve only to be
++ * kicked off for preserving a high throughput.
++*/
++static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
++{
++	bfq_log_bfqq(bfqq->bfqd, bfqq,
++		"may_budget_timeout: wait_request %d left %d timeout %d",
++		bfq_bfqq_wait_request(bfqq),
++			bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,
++		bfq_bfqq_budget_timeout(bfqq));
++
++	return (!bfq_bfqq_wait_request(bfqq) ||
++		bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)
++		&&
++		bfq_bfqq_budget_timeout(bfqq);
++}
++
++/*
++ * For a queue that becomes empty, device idling is allowed only if
++ * this function returns true for that queue. As a consequence, since
++ * device idling plays a critical role for both throughput boosting
++ * and service guarantees, the return value of this function plays a
++ * critical role as well.
++ *
++ * In a nutshell, this function returns true only if idling is
++ * beneficial for throughput or, even if detrimental for throughput,
++ * idling is however necessary to preserve service guarantees (low
++ * latency, desired throughput distribution, ...). In particular, on
++ * NCQ-capable devices, this function tries to return false, so as to
++ * help keep the drives' internal queues full, whenever this helps the
++ * device boost the throughput without causing any service-guarantee
++ * issue.
++ *
++ * In more detail, the return value of this function is obtained by,
++ * first, computing a number of boolean variables that take into
++ * account throughput and service-guarantee issues, and, then,
++ * combining these variables in a logical expression. Most of the
++ * issues taken into account are not trivial. We discuss these issues
++ * while introducing the variables.
++ */
++static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
++{
++	struct bfq_data *bfqd = bfqq->bfqd;
++	bool idling_boosts_thr, idling_boosts_thr_without_issues,
++		all_queues_seeky, on_hdd_and_not_all_queues_seeky,
++		idling_needed_for_service_guarantees,
++		asymmetric_scenario;
++
++	/*
++	 * The next variable takes into account the cases where idling
++	 * boosts the throughput.
++	 *
++	 * The value of the variable is computed considering, first, that
++	 * idling is virtually always beneficial for the throughput if:
++	 * (a) the device is not NCQ-capable, or
++	 * (b) regardless of the presence of NCQ, the device is rotational
++	 *     and the request pattern for bfqq is I/O-bound and sequential.
++	 *
++	 * Secondly, and in contrast to the above item (b), idling an
++	 * NCQ-capable flash-based device would not boost the
++	 * throughput even with sequential I/O; rather it would lower
++	 * the throughput in proportion to how fast the device
++	 * is. Accordingly, the next variable is true if any of the
++	 * above conditions (a) and (b) is true, and, in particular,
++	 * happens to be false if bfqd is an NCQ-capable flash-based
++	 * device.
++	 */
++	idling_boosts_thr = !bfqd->hw_tag ||
++		(!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
++		 bfq_bfqq_idle_window(bfqq)) ;
++
++	/*
++	 * The value of the next variable,
++	 * idling_boosts_thr_without_issues, is equal to that of
++	 * idling_boosts_thr, unless a special case holds. In this
++	 * special case, described below, idling may cause problems to
++	 * weight-raised queues.
++	 *
++	 * When the request pool is saturated (e.g., in the presence
++	 * of write hogs), if the processes associated with
++	 * non-weight-raised queues ask for requests at a lower rate,
++	 * then processes associated with weight-raised queues have a
++	 * higher probability to get a request from the pool
++	 * immediately (or at least soon) when they need one. Thus
++	 * they have a higher probability to actually get a fraction
++	 * of the device throughput proportional to their high
++	 * weight. This is especially true with NCQ-capable drives,
++	 * which enqueue several requests in advance, and further
++	 * reorder internally-queued requests.
++	 *
++	 * For this reason, we force to false the value of
++	 * idling_boosts_thr_without_issues if there are weight-raised
++	 * busy queues. In this case, and if bfqq is not weight-raised,
++	 * this guarantees that the device is not idled for bfqq (if,
++	 * instead, bfqq is weight-raised, then idling will be
++	 * guaranteed by another variable, see below). Combined with
++	 * the timestamping rules of BFQ (see [1] for details), this
++	 * behavior causes bfqq, and hence any sync non-weight-raised
++	 * queue, to get a lower number of requests served, and thus
++	 * to ask for a lower number of requests from the request
++	 * pool, before the busy weight-raised queues get served
++	 * again. This often mitigates starvation problems in the
++	 * presence of heavy write workloads and NCQ, thereby
++	 * guaranteeing a higher application and system responsiveness
++	 * in these hostile scenarios.
++	 */
++	idling_boosts_thr_without_issues = idling_boosts_thr &&
++		bfqd->wr_busy_queues == 0;
++
++	/*
++	 * There are then two cases where idling must be performed not
++	 * for throughput concerns, but to preserve service
++	 * guarantees. In the description of these cases, we say, for
++	 * short, that a queue is sequential/random if the process
++	 * associated to the queue issues sequential/random requests
++	 * (in the second case the queue may be tagged as seeky or
++	 * even constantly_seeky).
++	 *
++	 * To introduce the first case, we note that, since
++	 * bfq_bfqq_idle_window(bfqq) is false if the device is
++	 * NCQ-capable and bfqq is random (see
++	 * bfq_update_idle_window()), then, from the above two
++	 * assignments it follows that
++	 * idling_boosts_thr_without_issues is false if the device is
++	 * NCQ-capable and bfqq is random. Therefore, for this case,
++	 * device idling would never be allowed if we used just
++	 * idling_boosts_thr_without_issues to decide whether to allow
++	 * it. And, beneficially, this would imply that throughput
++	 * would always be boosted also with random I/O on NCQ-capable
++	 * HDDs.
++	 *
++	 * But we must be careful on this point, to avoid an unfair
++	 * treatment for bfqq. In fact, because of the same above
++	 * assignments, idling_boosts_thr_without_issues is, on the
++	 * other hand, true if 1) the device is an HDD and bfqq is
++	 * sequential, and 2) there are no busy weight-raised
++	 * queues. As a consequence, if we used just
++	 * idling_boosts_thr_without_issues to decide whether to idle
++	 * the device, then with an HDD we might easily bump into a
++	 * scenario where queues that are sequential and I/O-bound
++	 * would enjoy idling, whereas random queues would not. The
++	 * latter might then get a low share of the device throughput,
++	 * simply because the former would get many requests served
++	 * after being set as in service, while the latter would not.
++	 *
++	 * To address this issue, we start by setting to true a
++	 * sentinel variable, on_hdd_and_not_all_queues_seeky, if the
++	 * device is rotational and not all queues with pending or
++	 * in-flight requests are constantly seeky (i.e., there are
++	 * active sequential queues, and bfqq might then be mistreated
++	 * if it does not enjoy idling because it is random).
++	 */
++	all_queues_seeky = bfq_bfqq_constantly_seeky(bfqq) &&
++			   bfqd->busy_in_flight_queues ==
++			   bfqd->const_seeky_busy_in_flight_queues;
++
++	on_hdd_and_not_all_queues_seeky =
++		!blk_queue_nonrot(bfqd->queue) && !all_queues_seeky;
++
++	/*
++	 * To introduce the second case where idling needs to be
++	 * performed to preserve service guarantees, we can note that
++	 * allowing the drive to enqueue more than one request at a
++	 * time, and hence delegating de facto final scheduling
++	 * decisions to the drive's internal scheduler, causes loss of
++	 * control on the actual request service order. In particular,
++	 * the critical situation is when requests from different
++	 * processes happens to be present, at the same time, in the
++	 * internal queue(s) of the drive. In such a situation, the
++	 * drive, by deciding the service order of the
++	 * internally-queued requests, does determine also the actual
++	 * throughput distribution among these processes. But the
++	 * drive typically has no notion or concern about per-process
++	 * throughput distribution, and makes its decisions only on a
++	 * per-request basis. Therefore, the service distribution
++	 * enforced by the drive's internal scheduler is likely to
++	 * coincide with the desired device-throughput distribution
++	 * only in a completely symmetric scenario where:
++	 * (i)  each of these processes must get the same throughput as
++	 *      the others;
++	 * (ii) all these processes have the same I/O pattern
++	        (either sequential or random).
++	 * In fact, in such a scenario, the drive will tend to treat
++	 * the requests of each of these processes in about the same
++	 * way as the requests of the others, and thus to provide
++	 * each of these processes with about the same throughput
++	 * (which is exactly the desired throughput distribution). In
++	 * contrast, in any asymmetric scenario, device idling is
++	 * certainly needed to guarantee that bfqq receives its
++	 * assigned fraction of the device throughput (see [1] for
++	 * details).
++	 *
++	 * We address this issue by controlling, actually, only the
++	 * symmetry sub-condition (i), i.e., provided that
++	 * sub-condition (i) holds, idling is not performed,
++	 * regardless of whether sub-condition (ii) holds. In other
++	 * words, only if sub-condition (i) holds, then idling is
++	 * allowed, and the device tends to be prevented from queueing
++	 * many requests, possibly of several processes. The reason
++	 * for not controlling also sub-condition (ii) is that, first,
++	 * in the case of an HDD, the asymmetry in terms of types of
++	 * I/O patterns is already taken in to account in the above
++	 * sentinel variable
++	 * on_hdd_and_not_all_queues_seeky. Secondly, in the case of a
++	 * flash-based device, we prefer however to privilege
++	 * throughput (and idling lowers throughput for this type of
++	 * devices), for the following reasons:
++	 * 1) differently from HDDs, the service time of random
++	 *    requests is not orders of magnitudes lower than the service
++	 *    time of sequential requests; thus, even if processes doing
++	 *    sequential I/O get a preferential treatment with respect to
++	 *    others doing random I/O, the consequences are not as
++	 *    dramatic as with HDDs;
++	 * 2) if a process doing random I/O does need strong
++	 *    throughput guarantees, it is hopefully already being
++	 *    weight-raised, or the user is likely to have assigned it a
++	 *    higher weight than the other processes (and thus
++	 *    sub-condition (i) is likely to be false, which triggers
++	 *    idling).
++	 *
++	 * According to the above considerations, the next variable is
++	 * true (only) if sub-condition (i) holds. To compute the
++	 * value of this variable, we not only use the return value of
++	 * the function bfq_symmetric_scenario(), but also check
++	 * whether bfqq is being weight-raised, because
++	 * bfq_symmetric_scenario() does not take into account also
++	 * weight-raised queues (see comments to
++	 * bfq_weights_tree_add()).
++	 *
++	 * As a side note, it is worth considering that the above
++	 * device-idling countermeasures may however fail in the
++	 * following unlucky scenario: if idling is (correctly)
++	 * disabled in a time period during which all symmetry
++	 * sub-conditions hold, and hence the device is allowed to
++	 * enqueue many requests, but at some later point in time some
++	 * sub-condition stops to hold, then it may become impossible
++	 * to let requests be served in the desired order until all
++	 * the requests already queued in the device have been served.
++	 */
++	asymmetric_scenario = bfqq->wr_coeff > 1 ||
++		!bfq_symmetric_scenario(bfqd);
++
++	/*
++	 * Finally, there is a case where maximizing throughput is the
++	 * best choice even if it may cause unfairness toward
++	 * bfqq. Such a case is when bfqq became active in a burst of
++	 * queue activations. Queues that became active during a large
++	 * burst benefit only from throughput, as discussed in the
++	 * comments to bfq_handle_burst. Thus, if bfqq became active
++	 * in a burst and not idling the device maximizes throughput,
++	 * then the device must no be idled, because not idling the
++	 * device provides bfqq and all other queues in the burst with
++	 * maximum benefit. Combining this and the two cases above, we
++	 * can now establish when idling is actually needed to
++	 * preserve service guarantees.
++	 */
++	idling_needed_for_service_guarantees =
++		(on_hdd_and_not_all_queues_seeky || asymmetric_scenario) &&
++		!bfq_bfqq_in_large_burst(bfqq);
++
++	/*
++	 * We have now all the components we need to compute the return
++	 * value of the function, which is true only if both the following
++	 * conditions hold:
++	 * 1) bfqq is sync, because idling make sense only for sync queues;
++	 * 2) idling either boosts the throughput (without issues), or
++	 *    is necessary to preserve service guarantees.
++	 */
++	return bfq_bfqq_sync(bfqq) &&
++		(idling_boosts_thr_without_issues ||
++		 idling_needed_for_service_guarantees);
++}
++
++/*
++ * If the in-service queue is empty but the function bfq_bfqq_may_idle
++ * returns true, then:
++ * 1) the queue must remain in service and cannot be expired, and
++ * 2) the device must be idled to wait for the possible arrival of a new
++ *    request for the queue.
++ * See the comments to the function bfq_bfqq_may_idle for the reasons
++ * why performing device idling is the best choice to boost the throughput
++ * and preserve service guarantees when bfq_bfqq_may_idle itself
++ * returns true.
++ */
++static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
++{
++	struct bfq_data *bfqd = bfqq->bfqd;
++
++	return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&
++	       bfq_bfqq_may_idle(bfqq);
++}
++
++/*
++ * Select a queue for service.  If we have a current queue in service,
++ * check whether to continue servicing it, or retrieve and set a new one.
++ */
++static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq;
++	struct request *next_rq;
++	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
++
++	bfqq = bfqd->in_service_queue;
++	if (!bfqq)
++		goto new_queue;
++
++	bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
++
++	if (bfq_may_expire_for_budg_timeout(bfqq) &&
++	    !timer_pending(&bfqd->idle_slice_timer) &&
++	    !bfq_bfqq_must_idle(bfqq))
++		goto expire;
++
++	next_rq = bfqq->next_rq;
++	/*
++	 * If bfqq has requests queued and it has enough budget left to
++	 * serve them, keep the queue, otherwise expire it.
++	 */
++	if (next_rq) {
++		if (bfq_serv_to_charge(next_rq, bfqq) >
++			bfq_bfqq_budget_left(bfqq)) {
++			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
++			goto expire;
++		} else {
++			/*
++			 * The idle timer may be pending because we may
++			 * not disable disk idling even when a new request
++			 * arrives.
++			 */
++			if (timer_pending(&bfqd->idle_slice_timer)) {
++				/*
++				 * If we get here: 1) at least a new request
++				 * has arrived but we have not disabled the
++				 * timer because the request was too small,
++				 * 2) then the block layer has unplugged
++				 * the device, causing the dispatch to be
++				 * invoked.
++				 *
++				 * Since the device is unplugged, now the
++				 * requests are probably large enough to
++				 * provide a reasonable throughput.
++				 * So we disable idling.
++				 */
++				bfq_clear_bfqq_wait_request(bfqq);
++				del_timer(&bfqd->idle_slice_timer);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++				bfqg_stats_update_idle_time(bfqq_group(bfqq));
++#endif
++			}
++			goto keep_queue;
++		}
++	}
++
++	/*
++	 * No requests pending. However, if the in-service queue is idling
++	 * for a new request, or has requests waiting for a completion and
++	 * may idle after their completion, then keep it anyway.
++	 */
++	if (timer_pending(&bfqd->idle_slice_timer) ||
++	    (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) {
++		bfqq = NULL;
++		goto keep_queue;
++	}
++
++	reason = BFQ_BFQQ_NO_MORE_REQUESTS;
++expire:
++	bfq_bfqq_expire(bfqd, bfqq, false, reason);
++new_queue:
++	bfqq = bfq_set_in_service_queue(bfqd);
++	bfq_log(bfqd, "select_queue: new queue %d returned",
++		bfqq ? bfqq->pid : 0);
++keep_queue:
++	return bfqq;
++}
++
++static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
++		bfq_log_bfqq(bfqd, bfqq,
++			"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
++			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
++			jiffies_to_msecs(bfqq->wr_cur_max_time),
++			bfqq->wr_coeff,
++			bfqq->entity.weight, bfqq->entity.orig_weight);
++
++		BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=
++		       entity->orig_weight * bfqq->wr_coeff);
++		if (entity->prio_changed)
++			bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
++
++		/*
++		 * If the queue was activated in a burst, or
++		 * too much time has elapsed from the beginning
++		 * of this weight-raising period, then end weight
++		 * raising.
++		 */
++		if (bfq_bfqq_in_large_burst(bfqq) ||
++		    time_is_before_jiffies(bfqq->last_wr_start_finish +
++					   bfqq->wr_cur_max_time)) {
++			bfqq->last_wr_start_finish = jiffies;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "wrais ending at %lu, rais_max_time %u",
++				     bfqq->last_wr_start_finish,
++				     jiffies_to_msecs(bfqq->wr_cur_max_time));
++			bfq_bfqq_end_wr(bfqq);
++		}
++	}
++	/* Update weight both if it must be raised and if it must be lowered */
++	if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
++		__bfq_entity_update_weight_prio(
++			bfq_entity_service_tree(entity),
++			entity);
++}
++
++/*
++ * Dispatch one request from bfqq, moving it to the request queue
++ * dispatch list.
++ */
++static int bfq_dispatch_request(struct bfq_data *bfqd,
++				struct bfq_queue *bfqq)
++{
++	int dispatched = 0;
++	struct request *rq;
++	unsigned long service_to_charge;
++
++	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	/* Follow expired path, else get first next available. */
++	rq = bfq_check_fifo(bfqq);
++	if (!rq)
++		rq = bfqq->next_rq;
++	service_to_charge = bfq_serv_to_charge(rq, bfqq);
++
++	if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {
++		/*
++		 * This may happen if the next rq is chosen in fifo order
++		 * instead of sector order. The budget is properly
++		 * dimensioned to be always sufficient to serve the next
++		 * request only if it is chosen in sector order. The reason
++		 * is that it would be quite inefficient and little useful
++		 * to always make sure that the budget is large enough to
++		 * serve even the possible next rq in fifo order.
++		 * In fact, requests are seldom served in fifo order.
++		 *
++		 * Expire the queue for budget exhaustion, and make sure
++		 * that the next act_budget is enough to serve the next
++		 * request, even if it comes from the fifo expired path.
++		 */
++		bfqq->next_rq = rq;
++		/*
++		 * Since this dispatch is failed, make sure that
++		 * a new one will be performed
++		 */
++		if (!bfqd->rq_in_driver)
++			bfq_schedule_dispatch(bfqd);
++		goto expire;
++	}
++
++	/* Finally, insert request into driver dispatch list. */
++	bfq_bfqq_served(bfqq, service_to_charge);
++	bfq_dispatch_insert(bfqd->queue, rq);
++
++	bfq_update_wr_data(bfqd, bfqq);
++
++	bfq_log_bfqq(bfqd, bfqq,
++			"dispatched %u sec req (%llu), budg left %d",
++			blk_rq_sectors(rq),
++			(long long unsigned)blk_rq_pos(rq),
++			bfq_bfqq_budget_left(bfqq));
++
++	dispatched++;
++
++	if (!bfqd->in_service_bic) {
++		atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);
++		bfqd->in_service_bic = RQ_BIC(rq);
++	}
++
++	if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&
++	    dispatched >= bfqd->bfq_max_budget_async_rq) ||
++	    bfq_class_idle(bfqq)))
++		goto expire;
++
++	return dispatched;
++
++expire:
++	bfq_bfqq_expire(bfqd, bfqq, false, BFQ_BFQQ_BUDGET_EXHAUSTED);
++	return dispatched;
++}
++
++static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
++{
++	int dispatched = 0;
++
++	while (bfqq->next_rq) {
++		bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);
++		dispatched++;
++	}
++
++	BUG_ON(!list_empty(&bfqq->fifo));
++	return dispatched;
++}
++
++/*
++ * Drain our current requests.
++ * Used for barriers and when switching io schedulers on-the-fly.
++ */
++static int bfq_forced_dispatch(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq, *n;
++	struct bfq_service_tree *st;
++	int dispatched = 0;
++
++	bfqq = bfqd->in_service_queue;
++	if (bfqq)
++		__bfq_bfqq_expire(bfqd, bfqq);
++
++	/*
++	 * Loop through classes, and be careful to leave the scheduler
++	 * in a consistent state, as feedback mechanisms and vtime
++	 * updates cannot be disabled during the process.
++	 */
++	list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {
++		st = bfq_entity_service_tree(&bfqq->entity);
++
++		dispatched += __bfq_forced_dispatch_bfqq(bfqq);
++		bfqq->max_budget = bfq_max_budget(bfqd);
++
++		bfq_forget_idle(st);
++	}
++
++	BUG_ON(bfqd->busy_queues != 0);
++
++	return dispatched;
++}
++
++static int bfq_dispatch_requests(struct request_queue *q, int force)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq;
++	int max_dispatch;
++
++	bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
++	if (bfqd->busy_queues == 0)
++		return 0;
++
++	if (unlikely(force))
++		return bfq_forced_dispatch(bfqd);
++
++	bfqq = bfq_select_queue(bfqd);
++	if (!bfqq)
++		return 0;
++
++	if (bfq_class_idle(bfqq))
++		max_dispatch = 1;
++
++	if (!bfq_bfqq_sync(bfqq))
++		max_dispatch = bfqd->bfq_max_budget_async_rq;
++
++	if (!bfq_bfqq_sync(bfqq) && bfqq->dispatched >= max_dispatch) {
++		if (bfqd->busy_queues > 1)
++			return 0;
++		if (bfqq->dispatched >= 4 * max_dispatch)
++			return 0;
++	}
++
++	if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))
++		return 0;
++
++	bfq_clear_bfqq_wait_request(bfqq);
++	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
++
++	if (!bfq_dispatch_request(bfqd, bfqq))
++		return 0;
++
++	bfq_log_bfqq(bfqd, bfqq, "dispatched %s request",
++			bfq_bfqq_sync(bfqq) ? "sync" : "async");
++
++	return 1;
++}
++
++/*
++ * Task holds one reference to the queue, dropped when task exits.  Each rq
++ * in-flight on this queue also holds a reference, dropped when rq is freed.
++ *
++ * Queue lock must be held here.
++ */
++static void bfq_put_queue(struct bfq_queue *bfqq)
++{
++	struct bfq_data *bfqd = bfqq->bfqd;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	struct bfq_group *bfqg = bfqq_group(bfqq);
++#endif
++
++	BUG_ON(atomic_read(&bfqq->ref) <= 0);
++
++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
++		     atomic_read(&bfqq->ref));
++	if (!atomic_dec_and_test(&bfqq->ref))
++		return;
++
++	BUG_ON(rb_first(&bfqq->sort_list));
++	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
++	BUG_ON(bfqq->entity.tree);
++	BUG_ON(bfq_bfqq_busy(bfqq));
++	BUG_ON(bfqd->in_service_queue == bfqq);
++
++	if (bfq_bfqq_sync(bfqq))
++		/*
++		 * The fact that this queue is being destroyed does not
++		 * invalidate the fact that this queue may have been
++		 * activated during the current burst. As a consequence,
++		 * although the queue does not exist anymore, and hence
++		 * needs to be removed from the burst list if there,
++		 * the burst size has not to be decremented.
++		 */
++		hlist_del_init(&bfqq->burst_list_node);
++
++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);
++
++	kmem_cache_free(bfq_pool, bfqq);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg_put(bfqg);
++#endif
++}
++
++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	if (bfqq == bfqd->in_service_queue) {
++		__bfq_bfqq_expire(bfqd, bfqq);
++		bfq_schedule_dispatch(bfqd);
++	}
++
++	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++
++	bfq_put_queue(bfqq);
++}
++
++static void bfq_init_icq(struct io_cq *icq)
++{
++	struct bfq_io_cq *bic = icq_to_bic(icq);
++
++	bic->ttime.last_end_request = jiffies;
++}
++
++static void bfq_exit_icq(struct io_cq *icq)
++{
++	struct bfq_io_cq *bic = icq_to_bic(icq);
++	struct bfq_data *bfqd = bic_to_bfqd(bic);
++
++	if (bic->bfqq[BLK_RW_ASYNC]) {
++		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);
++		bic->bfqq[BLK_RW_ASYNC] = NULL;
++	}
++
++	if (bic->bfqq[BLK_RW_SYNC]) {
++		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
++		bic->bfqq[BLK_RW_SYNC] = NULL;
++	}
++}
++
++/*
++ * Update the entity prio values; note that the new values will not
++ * be used until the next (re)activation.
++ */
++static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
++{
++	struct task_struct *tsk = current;
++	int ioprio_class;
++
++	ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
++	switch (ioprio_class) {
++	default:
++		dev_err(bfqq->bfqd->queue->backing_dev_info.dev,
++			"bfq: bad prio class %d\n", ioprio_class);
++	case IOPRIO_CLASS_NONE:
++		/*
++		 * No prio set, inherit CPU scheduling settings.
++		 */
++		bfqq->new_ioprio = task_nice_ioprio(tsk);
++		bfqq->new_ioprio_class = task_nice_ioclass(tsk);
++		break;
++	case IOPRIO_CLASS_RT:
++		bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
++		bfqq->new_ioprio_class = IOPRIO_CLASS_RT;
++		break;
++	case IOPRIO_CLASS_BE:
++		bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
++		bfqq->new_ioprio_class = IOPRIO_CLASS_BE;
++		break;
++	case IOPRIO_CLASS_IDLE:
++		bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
++		bfqq->new_ioprio = 7;
++		bfq_clear_bfqq_idle_window(bfqq);
++		break;
++	}
++
++	if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) {
++		printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
++				 bfqq->new_ioprio);
++		BUG();
++	}
++
++	bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
++	bfqq->entity.prio_changed = 1;
++}
++
++static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
++{
++	struct bfq_data *bfqd;
++	struct bfq_queue *bfqq, *new_bfqq;
++	unsigned long uninitialized_var(flags);
++	int ioprio = bic->icq.ioc->ioprio;
++
++	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
++				   &flags);
++	/*
++	 * This condition may trigger on a newly created bic, be sure to
++	 * drop the lock before returning.
++	 */
++	if (unlikely(!bfqd) || likely(bic->ioprio == ioprio))
++		goto out;
++
++	bic->ioprio = ioprio;
++
++	bfqq = bic->bfqq[BLK_RW_ASYNC];
++	if (bfqq) {
++		new_bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic,
++					 GFP_ATOMIC);
++		if (new_bfqq) {
++			bic->bfqq[BLK_RW_ASYNC] = new_bfqq;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "check_ioprio_change: bfqq %p %d",
++				     bfqq, atomic_read(&bfqq->ref));
++			bfq_put_queue(bfqq);
++		}
++	}
++
++	bfqq = bic->bfqq[BLK_RW_SYNC];
++	if (bfqq)
++		bfq_set_next_ioprio_data(bfqq, bic);
++
++out:
++	bfq_put_bfqd_unlock(bfqd, &flags);
++}
++
++static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			  struct bfq_io_cq *bic, pid_t pid, int is_sync)
++{
++	RB_CLEAR_NODE(&bfqq->entity.rb_node);
++	INIT_LIST_HEAD(&bfqq->fifo);
++	INIT_HLIST_NODE(&bfqq->burst_list_node);
++
++	atomic_set(&bfqq->ref, 0);
++	bfqq->bfqd = bfqd;
++
++	if (bic)
++		bfq_set_next_ioprio_data(bfqq, bic);
++
++	if (is_sync) {
++		if (!bfq_class_idle(bfqq))
++			bfq_mark_bfqq_idle_window(bfqq);
++		bfq_mark_bfqq_sync(bfqq);
++	} else
++		bfq_clear_bfqq_sync(bfqq);
++	bfq_mark_bfqq_IO_bound(bfqq);
++
++	/* Tentative initial value to trade off between thr and lat */
++	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
++	bfqq->pid = pid;
++
++	bfqq->wr_coeff = 1;
++	bfqq->last_wr_start_finish = 0;
++	/*
++	 * Set to the value for which bfqq will not be deemed as
++	 * soft rt when it becomes backlogged.
++	 */
++	bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies);
++}
++
++static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
++					      struct bio *bio, int is_sync,
++					      struct bfq_io_cq *bic,
++					      gfp_t gfp_mask)
++{
++	struct bfq_group *bfqg;
++	struct bfq_queue *bfqq, *new_bfqq = NULL;
++	struct blkcg *blkcg;
++
++retry:
++	rcu_read_lock();
++
++	blkcg = bio_blkcg(bio);
++	bfqg = bfq_find_alloc_group(bfqd, blkcg);
++	/* bic always exists here */
++	bfqq = bic_to_bfqq(bic, is_sync);
++
++	/*
++	 * Always try a new alloc if we fall back to the OOM bfqq
++	 * originally, since it should just be a temporary situation.
++	 */
++	if (!bfqq || bfqq == &bfqd->oom_bfqq) {
++		bfqq = NULL;
++		if (new_bfqq) {
++			bfqq = new_bfqq;
++			new_bfqq = NULL;
++		} else if (gfpflags_allow_blocking(gfp_mask)) {
++			rcu_read_unlock();
++			spin_unlock_irq(bfqd->queue->queue_lock);
++			new_bfqq = kmem_cache_alloc_node(bfq_pool,
++					gfp_mask | __GFP_ZERO,
++					bfqd->queue->node);
++			spin_lock_irq(bfqd->queue->queue_lock);
++			if (new_bfqq)
++				goto retry;
++		} else {
++			bfqq = kmem_cache_alloc_node(bfq_pool,
++					gfp_mask | __GFP_ZERO,
++					bfqd->queue->node);
++		}
++
++		if (bfqq) {
++			bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
++                                      is_sync);
++			bfq_init_entity(&bfqq->entity, bfqg);
++			bfq_log_bfqq(bfqd, bfqq, "allocated");
++		} else {
++			bfqq = &bfqd->oom_bfqq;
++			bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
++		}
++	}
++
++	if (new_bfqq)
++		kmem_cache_free(bfq_pool, new_bfqq);
++
++	rcu_read_unlock();
++
++	return bfqq;
++}
++
++static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
++					       struct bfq_group *bfqg,
++					       int ioprio_class, int ioprio)
++{
++	switch (ioprio_class) {
++	case IOPRIO_CLASS_RT:
++		return &bfqg->async_bfqq[0][ioprio];
++	case IOPRIO_CLASS_NONE:
++		ioprio = IOPRIO_NORM;
++		/* fall through */
++	case IOPRIO_CLASS_BE:
++		return &bfqg->async_bfqq[1][ioprio];
++	case IOPRIO_CLASS_IDLE:
++		return &bfqg->async_idle_bfqq;
++	default:
++		BUG();
++	}
++}
++
++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
++				       struct bio *bio, int is_sync,
++				       struct bfq_io_cq *bic, gfp_t gfp_mask)
++{
++	const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
++	const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
++	struct bfq_queue **async_bfqq = NULL;
++	struct bfq_queue *bfqq = NULL;
++
++	if (!is_sync) {
++		struct blkcg *blkcg;
++		struct bfq_group *bfqg;
++
++		rcu_read_lock();
++		blkcg = bio_blkcg(bio);
++		rcu_read_unlock();
++		bfqg = bfq_find_alloc_group(bfqd, blkcg);
++		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
++						  ioprio);
++		bfqq = *async_bfqq;
++	}
++
++	if (!bfqq)
++		bfqq = bfq_find_alloc_queue(bfqd, bio, is_sync, bic, gfp_mask);
++
++	/*
++	 * Pin the queue now that it's allocated, scheduler exit will
++	 * prune it.
++	 */
++	if (!is_sync && !(*async_bfqq)) {
++		atomic_inc(&bfqq->ref);
++		bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		*async_bfqq = bfqq;
++	}
++
++	atomic_inc(&bfqq->ref);
++	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++	return bfqq;
++}
++
++static void bfq_update_io_thinktime(struct bfq_data *bfqd,
++				    struct bfq_io_cq *bic)
++{
++	unsigned long elapsed = jiffies - bic->ttime.last_end_request;
++	unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);
++
++	bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;
++	bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8;
++	bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) /
++				bic->ttime.ttime_samples;
++}
++
++static void bfq_update_io_seektime(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq,
++				   struct request *rq)
++{
++	sector_t sdist;
++	u64 total;
++
++	if (bfqq->last_request_pos < blk_rq_pos(rq))
++		sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
++	else
++		sdist = bfqq->last_request_pos - blk_rq_pos(rq);
++
++	/*
++	 * Don't allow the seek distance to get too large from the
++	 * odd fragment, pagein, etc.
++	 */
++	if (bfqq->seek_samples == 0) /* first request, not really a seek */
++		sdist = 0;
++	else if (bfqq->seek_samples <= 60) /* second & third seek */
++		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);
++	else
++		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);
++
++	bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;
++	bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;
++	total = bfqq->seek_total + (bfqq->seek_samples/2);
++	do_div(total, bfqq->seek_samples);
++	bfqq->seek_mean = (sector_t)total;
++
++	bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,
++			(u64)bfqq->seek_mean);
++}
++
++/*
++ * Disable idle window if the process thinks too long or seeks so much that
++ * it doesn't matter.
++ */
++static void bfq_update_idle_window(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq,
++				   struct bfq_io_cq *bic)
++{
++	int enable_idle;
++
++	/* Don't idle for async or idle io prio class. */
++	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
++		return;
++
++	enable_idle = bfq_bfqq_idle_window(bfqq);
++
++	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
++	    bfqd->bfq_slice_idle == 0 ||
++		(bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
++			bfqq->wr_coeff == 1))
++		enable_idle = 0;
++	else if (bfq_sample_valid(bic->ttime.ttime_samples)) {
++		if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle &&
++			bfqq->wr_coeff == 1)
++			enable_idle = 0;
++		else
++			enable_idle = 1;
++	}
++	bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
++		enable_idle);
++
++	if (enable_idle)
++		bfq_mark_bfqq_idle_window(bfqq);
++	else
++		bfq_clear_bfqq_idle_window(bfqq);
++}
++
++/*
++ * Called when a new fs request (rq) is added to bfqq.  Check if there's
++ * something we should do about it.
++ */
++static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			    struct request *rq)
++{
++	struct bfq_io_cq *bic = RQ_BIC(rq);
++
++	if (rq->cmd_flags & REQ_META)
++		bfqq->meta_pending++;
++
++	bfq_update_io_thinktime(bfqd, bic);
++	bfq_update_io_seektime(bfqd, bfqq, rq);
++	if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) {
++		bfq_clear_bfqq_constantly_seeky(bfqq);
++		if (!blk_queue_nonrot(bfqd->queue)) {
++			BUG_ON(!bfqd->const_seeky_busy_in_flight_queues);
++			bfqd->const_seeky_busy_in_flight_queues--;
++		}
++	}
++	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
++	    !BFQQ_SEEKY(bfqq))
++		bfq_update_idle_window(bfqd, bfqq, bic);
++
++	bfq_log_bfqq(bfqd, bfqq,
++		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
++		     bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),
++		     (long long unsigned)bfqq->seek_mean);
++
++	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
++
++	if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {
++		bool small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
++				 blk_rq_sectors(rq) < 32;
++		bool budget_timeout = bfq_bfqq_budget_timeout(bfqq);
++
++		/*
++		 * There is just this request queued: if the request
++		 * is small and the queue is not to be expired, then
++		 * just exit.
++		 *
++		 * In this way, if the disk is being idled to wait for
++		 * a new request from the in-service queue, we avoid
++		 * unplugging the device and committing the disk to serve
++		 * just a small request. On the contrary, we wait for
++		 * the block layer to decide when to unplug the device:
++		 * hopefully, new requests will be merged to this one
++		 * quickly, then the device will be unplugged and
++		 * larger requests will be dispatched.
++		 */
++		if (small_req && !budget_timeout)
++			return;
++
++		/*
++		 * A large enough request arrived, or the queue is to
++		 * be expired: in both cases disk idling is to be
++		 * stopped, so clear wait_request flag and reset
++		 * timer.
++		 */
++		bfq_clear_bfqq_wait_request(bfqq);
++		del_timer(&bfqd->idle_slice_timer);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		bfqg_stats_update_idle_time(bfqq_group(bfqq));
++#endif
++
++		/*
++		 * The queue is not empty, because a new request just
++		 * arrived. Hence we can safely expire the queue, in
++		 * case of budget timeout, without risking that the
++		 * timestamps of the queue are not updated correctly.
++		 * See [1] for more details.
++		 */
++		if (budget_timeout)
++			bfq_bfqq_expire(bfqd, bfqq, false,
++					BFQ_BFQQ_BUDGET_TIMEOUT);
++
++		/*
++		 * Let the request rip immediately, or let a new queue be
++		 * selected if bfqq has just been expired.
++		 */
++		__blk_run_queue(bfqd->queue);
++	}
++}
++
++static void bfq_insert_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	assert_spin_locked(bfqd->queue->queue_lock);
++
++	bfq_add_request(rq);
++
++	rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
++	list_add_tail(&rq->queuelist, &bfqq->fifo);
++
++	bfq_rq_enqueued(bfqd, bfqq, rq);
++}
++
++static void bfq_update_hw_tag(struct bfq_data *bfqd)
++{
++	bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,
++				     bfqd->rq_in_driver);
++
++	if (bfqd->hw_tag == 1)
++		return;
++
++	/*
++	 * This sample is valid if the number of outstanding requests
++	 * is large enough to allow a queueing behavior.  Note that the
++	 * sum is not exact, as it's not taking into account deactivated
++	 * requests.
++	 */
++	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
++		return;
++
++	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
++		return;
++
++	bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
++	bfqd->max_rq_in_driver = 0;
++	bfqd->hw_tag_samples = 0;
++}
++
++static void bfq_completed_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_data *bfqd = bfqq->bfqd;
++	bool sync = bfq_bfqq_sync(bfqq);
++
++	bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)",
++		     blk_rq_sectors(rq), sync);
++
++	bfq_update_hw_tag(bfqd);
++
++	BUG_ON(!bfqd->rq_in_driver);
++	BUG_ON(!bfqq->dispatched);
++	bfqd->rq_in_driver--;
++	bfqq->dispatched--;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg_stats_update_completion(bfqq_group(bfqq),
++				     rq_start_time_ns(rq),
++				     rq_io_start_time_ns(rq), rq->cmd_flags);
++#endif
++
++	if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
++		bfq_weights_tree_remove(bfqd, &bfqq->entity,
++					&bfqd->queue_weights_tree);
++		if (!blk_queue_nonrot(bfqd->queue)) {
++			BUG_ON(!bfqd->busy_in_flight_queues);
++			bfqd->busy_in_flight_queues--;
++			if (bfq_bfqq_constantly_seeky(bfqq)) {
++				BUG_ON(!bfqd->
++					const_seeky_busy_in_flight_queues);
++				bfqd->const_seeky_busy_in_flight_queues--;
++			}
++		}
++	}
++
++	if (sync) {
++		bfqd->sync_flight--;
++		RQ_BIC(rq)->ttime.last_end_request = jiffies;
++	}
++
++	/*
++	 * If we are waiting to discover whether the request pattern of the
++	 * task associated with the queue is actually isochronous, and
++	 * both requisites for this condition to hold are satisfied, then
++	 * compute soft_rt_next_start (see the comments to the function
++	 * bfq_bfqq_softrt_next_start()).
++	 */
++	if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
++	    RB_EMPTY_ROOT(&bfqq->sort_list))
++		bfqq->soft_rt_next_start =
++			bfq_bfqq_softrt_next_start(bfqd, bfqq);
++
++	/*
++	 * If this is the in-service queue, check if it needs to be expired,
++	 * or if we want to idle in case it has no pending requests.
++	 */
++	if (bfqd->in_service_queue == bfqq) {
++		if (bfq_bfqq_budget_new(bfqq))
++			bfq_set_budget_timeout(bfqd);
++
++		if (bfq_bfqq_must_idle(bfqq)) {
++			bfq_arm_slice_timer(bfqd);
++			goto out;
++		} else if (bfq_may_expire_for_budg_timeout(bfqq))
++			bfq_bfqq_expire(bfqd, bfqq, false,
++					BFQ_BFQQ_BUDGET_TIMEOUT);
++		else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
++			 (bfqq->dispatched == 0 ||
++			  !bfq_bfqq_may_idle(bfqq)))
++			bfq_bfqq_expire(bfqd, bfqq, false,
++					BFQ_BFQQ_NO_MORE_REQUESTS);
++	}
++
++	if (!bfqd->rq_in_driver)
++		bfq_schedule_dispatch(bfqd);
++
++out:
++	return;
++}
++
++static int __bfq_may_queue(struct bfq_queue *bfqq)
++{
++	if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {
++		bfq_clear_bfqq_must_alloc(bfqq);
++		return ELV_MQUEUE_MUST;
++	}
++
++	return ELV_MQUEUE_MAY;
++}
++
++static int bfq_may_queue(struct request_queue *q, int rw)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct task_struct *tsk = current;
++	struct bfq_io_cq *bic;
++	struct bfq_queue *bfqq;
++
++	/*
++	 * Don't force setup of a queue from here, as a call to may_queue
++	 * does not necessarily imply that a request actually will be
++	 * queued. So just lookup a possibly existing queue, or return
++	 * 'may queue' if that fails.
++	 */
++	bic = bfq_bic_lookup(bfqd, tsk->io_context);
++	if (!bic)
++		return ELV_MQUEUE_MAY;
++
++	bfqq = bic_to_bfqq(bic, rw_is_sync(rw));
++	if (bfqq)
++		return __bfq_may_queue(bfqq);
++
++	return ELV_MQUEUE_MAY;
++}
++
++/*
++ * Queue lock held here.
++ */
++static void bfq_put_request(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	if (bfqq) {
++		const int rw = rq_data_dir(rq);
++
++		BUG_ON(!bfqq->allocated[rw]);
++		bfqq->allocated[rw]--;
++
++		rq->elv.priv[0] = NULL;
++		rq->elv.priv[1] = NULL;
++
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++	}
++}
++
++/*
++ * Allocate bfq data structures associated with this request.
++ */
++static int bfq_set_request(struct request_queue *q, struct request *rq,
++			   struct bio *bio, gfp_t gfp_mask)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
++	const int rw = rq_data_dir(rq);
++	const int is_sync = rq_is_sync(rq);
++	struct bfq_queue *bfqq;
++	unsigned long flags;
++
++	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
++
++	bfq_check_ioprio_change(bic, bio);
++
++	spin_lock_irqsave(q->queue_lock, flags);
++
++	if (!bic)
++		goto queue_fail;
++
++	bfq_bic_update_cgroup(bic, bio);
++
++	bfqq = bic_to_bfqq(bic, is_sync);
++	if (!bfqq || bfqq == &bfqd->oom_bfqq) {
++		bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask);
++		bic_set_bfqq(bic, bfqq, is_sync);
++		if (is_sync) {
++			if (bfqd->large_burst)
++				bfq_mark_bfqq_in_large_burst(bfqq);
++			else
++				bfq_clear_bfqq_in_large_burst(bfqq);
++		}
++	}
++
++	bfqq->allocated[rw]++;
++	atomic_inc(&bfqq->ref);
++	bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++
++	rq->elv.priv[0] = bic;
++	rq->elv.priv[1] = bfqq;
++
++	spin_unlock_irqrestore(q->queue_lock, flags);
++
++	return 0;
++
++queue_fail:
++	bfq_schedule_dispatch(bfqd);
++	spin_unlock_irqrestore(q->queue_lock, flags);
++
++	return 1;
++}
++
++static void bfq_kick_queue(struct work_struct *work)
++{
++	struct bfq_data *bfqd =
++		container_of(work, struct bfq_data, unplug_work);
++	struct request_queue *q = bfqd->queue;
++
++	spin_lock_irq(q->queue_lock);
++	__blk_run_queue(q);
++	spin_unlock_irq(q->queue_lock);
++}
++
++/*
++ * Handler of the expiration of the timer running if the in-service queue
++ * is idling inside its time slice.
++ */
++static void bfq_idle_slice_timer(unsigned long data)
++{
++	struct bfq_data *bfqd = (struct bfq_data *)data;
++	struct bfq_queue *bfqq;
++	unsigned long flags;
++	enum bfqq_expiration reason;
++
++	spin_lock_irqsave(bfqd->queue->queue_lock, flags);
++
++	bfqq = bfqd->in_service_queue;
++	/*
++	 * Theoretical race here: the in-service queue can be NULL or
++	 * different from the queue that was idling if the timer handler
++	 * spins on the queue_lock and a new request arrives for the
++	 * current queue and there is a full dispatch cycle that changes
++	 * the in-service queue.  This can hardly happen, but in the worst
++	 * case we just expire a queue too early.
++	 */
++	if (bfqq) {
++		bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
++		if (bfq_bfqq_budget_timeout(bfqq))
++			/*
++			 * Also here the queue can be safely expired
++			 * for budget timeout without wasting
++			 * guarantees
++			 */
++			reason = BFQ_BFQQ_BUDGET_TIMEOUT;
++		else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
++			/*
++			 * The queue may not be empty upon timer expiration,
++			 * because we may not disable the timer when the
++			 * first request of the in-service queue arrives
++			 * during disk idling.
++			 */
++			reason = BFQ_BFQQ_TOO_IDLE;
++		else
++			goto schedule_dispatch;
++
++		bfq_bfqq_expire(bfqd, bfqq, true, reason);
++	}
++
++schedule_dispatch:
++	bfq_schedule_dispatch(bfqd);
++
++	spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
++}
++
++static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
++{
++	del_timer_sync(&bfqd->idle_slice_timer);
++	cancel_work_sync(&bfqd->unplug_work);
++}
++
++static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
++					struct bfq_queue **bfqq_ptr)
++{
++	struct bfq_group *root_group = bfqd->root_group;
++	struct bfq_queue *bfqq = *bfqq_ptr;
++
++	bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
++	if (bfqq) {
++		bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
++		bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++		*bfqq_ptr = NULL;
++	}
++}
++
++/*
++ * Release all the bfqg references to its async queues.  If we are
++ * deallocating the group these queues may still contain requests, so
++ * we reparent them to the root cgroup (i.e., the only one that will
++ * exist for sure until all the requests on a device are gone).
++ */
++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
++{
++	int i, j;
++
++	for (i = 0; i < 2; i++)
++		for (j = 0; j < IOPRIO_BE_NR; j++)
++			__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
++
++	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
++}
++
++static void bfq_exit_queue(struct elevator_queue *e)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	struct request_queue *q = bfqd->queue;
++	struct bfq_queue *bfqq, *n;
++
++	bfq_shutdown_timer_wq(bfqd);
++
++	spin_lock_irq(q->queue_lock);
++
++	BUG_ON(bfqd->in_service_queue);
++	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
++		bfq_deactivate_bfqq(bfqd, bfqq, 0);
++
++	spin_unlock_irq(q->queue_lock);
++
++	bfq_shutdown_timer_wq(bfqd);
++
++	synchronize_rcu();
++
++	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	blkcg_deactivate_policy(q, &blkcg_policy_bfq);
++#else
++	kfree(bfqd->root_group);
++#endif
++
++	kfree(bfqd);
++}
++
++static void bfq_init_root_group(struct bfq_group *root_group,
++				struct bfq_data *bfqd)
++{
++	int i;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	root_group->entity.parent = NULL;
++	root_group->my_entity = NULL;
++	root_group->bfqd = bfqd;
++#endif
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
++		root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++}
++
++static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
++{
++	struct bfq_data *bfqd;
++	struct elevator_queue *eq;
++
++	eq = elevator_alloc(q, e);
++	if (!eq)
++		return -ENOMEM;
++
++	bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
++	if (!bfqd) {
++		kobject_put(&eq->kobj);
++		return -ENOMEM;
++	}
++	eq->elevator_data = bfqd;
++
++	/*
++	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
++	 * Grab a permanent reference to it, so that the normal code flow
++	 * will not attempt to free it.
++	 */
++	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
++	atomic_inc(&bfqd->oom_bfqq.ref);
++	bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
++	bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
++	bfqd->oom_bfqq.entity.new_weight =
++		bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
++	/*
++	 * Trigger weight initialization, according to ioprio, at the
++	 * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
++	 * class won't be changed any more.
++	 */
++	bfqd->oom_bfqq.entity.prio_changed = 1;
++
++	bfqd->queue = q;
++
++	spin_lock_irq(q->queue_lock);
++	q->elevator = eq;
++	spin_unlock_irq(q->queue_lock);
++
++	bfqd->root_group = bfq_create_group_hierarchy(bfqd, q->node);
++	if (!bfqd->root_group)
++		goto out_free;
++	bfq_init_root_group(bfqd->root_group, bfqd);
++	bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqd->active_numerous_groups = 0;
++#endif
++
++	init_timer(&bfqd->idle_slice_timer);
++	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
++	bfqd->idle_slice_timer.data = (unsigned long)bfqd;
++
++	bfqd->queue_weights_tree = RB_ROOT;
++	bfqd->group_weights_tree = RB_ROOT;
++
++	INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);
++
++	INIT_LIST_HEAD(&bfqd->active_list);
++	INIT_LIST_HEAD(&bfqd->idle_list);
++	INIT_HLIST_HEAD(&bfqd->burst_list);
++
++	bfqd->hw_tag = -1;
++
++	bfqd->bfq_max_budget = bfq_default_max_budget;
++
++	bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
++	bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
++	bfqd->bfq_back_max = bfq_back_max;
++	bfqd->bfq_back_penalty = bfq_back_penalty;
++	bfqd->bfq_slice_idle = bfq_slice_idle;
++	bfqd->bfq_class_idle_last_service = 0;
++	bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
++	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
++	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
++
++	bfqd->bfq_requests_within_timer = 120;
++
++	bfqd->bfq_large_burst_thresh = 11;
++	bfqd->bfq_burst_interval = msecs_to_jiffies(500);
++
++	bfqd->low_latency = true;
++
++	bfqd->bfq_wr_coeff = 20;
++	bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
++	bfqd->bfq_wr_max_time = 0;
++	bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
++	bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500);
++	bfqd->bfq_wr_max_softrt_rate = 7000; /*
++					      * Approximate rate required
++					      * to playback or record a
++					      * high-definition compressed
++					      * video.
++					      */
++	bfqd->wr_busy_queues = 0;
++	bfqd->busy_in_flight_queues = 0;
++	bfqd->const_seeky_busy_in_flight_queues = 0;
++
++	/*
++	 * Begin by assuming, optimistically, that the device peak rate is
++	 * equal to the highest reference rate.
++	 */
++	bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
++			T_fast[blk_queue_nonrot(bfqd->queue)];
++	bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)];
++	bfqd->device_speed = BFQ_BFQD_FAST;
++
++	return 0;
++
++out_free:
++	kfree(bfqd);
++	kobject_put(&eq->kobj);
++	return -ENOMEM;
++}
++
++static void bfq_slab_kill(void)
++{
++	if (bfq_pool)
++		kmem_cache_destroy(bfq_pool);
++}
++
++static int __init bfq_slab_setup(void)
++{
++	bfq_pool = KMEM_CACHE(bfq_queue, 0);
++	if (!bfq_pool)
++		return -ENOMEM;
++	return 0;
++}
++
++static ssize_t bfq_var_show(unsigned int var, char *page)
++{
++	return sprintf(page, "%d\n", var);
++}
++
++static ssize_t bfq_var_store(unsigned long *var, const char *page,
++			     size_t count)
++{
++	unsigned long new_val;
++	int ret = kstrtoul(page, 10, &new_val);
++
++	if (ret == 0)
++		*var = new_val;
++
++	return count;
++}
++
++static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ?
++		       jiffies_to_msecs(bfqd->bfq_wr_max_time) :
++		       jiffies_to_msecs(bfq_wr_duration(bfqd)));
++}
++
++static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
++{
++	struct bfq_queue *bfqq;
++	struct bfq_data *bfqd = e->elevator_data;
++	ssize_t num_char = 0;
++
++	num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n",
++			    bfqd->queued);
++
++	spin_lock_irq(bfqd->queue->queue_lock);
++
++	num_char += sprintf(page + num_char, "Active:\n");
++	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
++	  num_char += sprintf(page + num_char,
++			      "pid%d: weight %hu, nr_queued %d %d, dur %d/%u\n",
++			      bfqq->pid,
++			      bfqq->entity.weight,
++			      bfqq->queued[0],
++			      bfqq->queued[1],
++			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
++			jiffies_to_msecs(bfqq->wr_cur_max_time));
++	}
++
++	num_char += sprintf(page + num_char, "Idle:\n");
++	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
++			num_char += sprintf(page + num_char,
++				"pid%d: weight %hu, dur %d/%u\n",
++				bfqq->pid,
++				bfqq->entity.weight,
++				jiffies_to_msecs(jiffies -
++					bfqq->last_wr_start_finish),
++				jiffies_to_msecs(bfqq->wr_cur_max_time));
++	}
++
++	spin_unlock_irq(bfqd->queue->queue_lock);
++
++	return num_char;
++}
++
++#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
++static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
++{									\
++	struct bfq_data *bfqd = e->elevator_data;			\
++	unsigned int __data = __VAR;					\
++	if (__CONV)							\
++		__data = jiffies_to_msecs(__data);			\
++	return bfq_var_show(__data, (page));				\
++}
++SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);
++SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);
++SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
++SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
++SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
++SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
++SHOW_FUNCTION(bfq_max_budget_async_rq_show,
++	      bfqd->bfq_max_budget_async_rq, 0);
++SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
++SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
++SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
++SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
++SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
++SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1);
++SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async,
++	1);
++SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0);
++#undef SHOW_FUNCTION
++
++#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
++static ssize_t								\
++__FUNC(struct elevator_queue *e, const char *page, size_t count)	\
++{									\
++	struct bfq_data *bfqd = e->elevator_data;			\
++	unsigned long uninitialized_var(__data);			\
++	int ret = bfq_var_store(&__data, (page), count);		\
++	if (__data < (MIN))						\
++		__data = (MIN);						\
++	else if (__data > (MAX))					\
++		__data = (MAX);						\
++	if (__CONV)							\
++		*(__PTR) = msecs_to_jiffies(__data);			\
++	else								\
++		*(__PTR) = __data;					\
++	return ret;							\
++}
++STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
++STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
++		INT_MAX, 0);
++STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,
++		1, INT_MAX, 0);
++STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
++STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
++		1);
++STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_wr_min_inter_arr_async_store,
++		&bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
++		INT_MAX, 0);
++#undef STORE_FUNCTION
++
++/* do nothing for the moment */
++static ssize_t bfq_weights_store(struct elevator_queue *e,
++				    const char *page, size_t count)
++{
++	return count;
++}
++
++static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
++{
++	u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++
++	if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)
++		return bfq_calc_max_budget(bfqd->peak_rate, timeout);
++	else
++		return bfq_default_max_budget;
++}
++
++static ssize_t bfq_max_budget_store(struct elevator_queue *e,
++				    const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long uninitialized_var(__data);
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data == 0)
++		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
++	else {
++		if (__data > INT_MAX)
++			__data = INT_MAX;
++		bfqd->bfq_max_budget = __data;
++	}
++
++	bfqd->bfq_user_max_budget = __data;
++
++	return ret;
++}
++
++static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
++				      const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long uninitialized_var(__data);
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data < 1)
++		__data = 1;
++	else if (__data > INT_MAX)
++		__data = INT_MAX;
++
++	bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);
++	if (bfqd->bfq_user_max_budget == 0)
++		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
++
++	return ret;
++}
++
++static ssize_t bfq_low_latency_store(struct elevator_queue *e,
++				     const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long uninitialized_var(__data);
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data > 1)
++		__data = 1;
++	if (__data == 0 && bfqd->low_latency != 0)
++		bfq_end_wr(bfqd);
++	bfqd->low_latency = __data;
++
++	return ret;
++}
++
++#define BFQ_ATTR(name) \
++	__ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)
++
++static struct elv_fs_entry bfq_attrs[] = {
++	BFQ_ATTR(fifo_expire_sync),
++	BFQ_ATTR(fifo_expire_async),
++	BFQ_ATTR(back_seek_max),
++	BFQ_ATTR(back_seek_penalty),
++	BFQ_ATTR(slice_idle),
++	BFQ_ATTR(max_budget),
++	BFQ_ATTR(max_budget_async_rq),
++	BFQ_ATTR(timeout_sync),
++	BFQ_ATTR(timeout_async),
++	BFQ_ATTR(low_latency),
++	BFQ_ATTR(wr_coeff),
++	BFQ_ATTR(wr_max_time),
++	BFQ_ATTR(wr_rt_max_time),
++	BFQ_ATTR(wr_min_idle_time),
++	BFQ_ATTR(wr_min_inter_arr_async),
++	BFQ_ATTR(wr_max_softrt_rate),
++	BFQ_ATTR(weights),
++	__ATTR_NULL
++};
++
++static struct elevator_type iosched_bfq = {
++	.ops = {
++		.elevator_merge_fn =		bfq_merge,
++		.elevator_merged_fn =		bfq_merged_request,
++		.elevator_merge_req_fn =	bfq_merged_requests,
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		.elevator_bio_merged_fn =	bfq_bio_merged,
++#endif
++		.elevator_allow_merge_fn =	bfq_allow_merge,
++		.elevator_dispatch_fn =		bfq_dispatch_requests,
++		.elevator_add_req_fn =		bfq_insert_request,
++		.elevator_activate_req_fn =	bfq_activate_request,
++		.elevator_deactivate_req_fn =	bfq_deactivate_request,
++		.elevator_completed_req_fn =	bfq_completed_request,
++		.elevator_former_req_fn =	elv_rb_former_request,
++		.elevator_latter_req_fn =	elv_rb_latter_request,
++		.elevator_init_icq_fn =		bfq_init_icq,
++		.elevator_exit_icq_fn =		bfq_exit_icq,
++		.elevator_set_req_fn =		bfq_set_request,
++		.elevator_put_req_fn =		bfq_put_request,
++		.elevator_may_queue_fn =	bfq_may_queue,
++		.elevator_init_fn =		bfq_init_queue,
++		.elevator_exit_fn =		bfq_exit_queue,
++	},
++	.icq_size =		sizeof(struct bfq_io_cq),
++	.icq_align =		__alignof__(struct bfq_io_cq),
++	.elevator_attrs =	bfq_attrs,
++	.elevator_name =	"bfq",
++	.elevator_owner =	THIS_MODULE,
++};
++
++static int __init bfq_init(void)
++{
++	int ret;
++
++	/*
++	 * Can be 0 on HZ < 1000 setups.
++	 */
++	if (bfq_slice_idle == 0)
++		bfq_slice_idle = 1;
++
++	if (bfq_timeout_async == 0)
++		bfq_timeout_async = 1;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	ret = blkcg_policy_register(&blkcg_policy_bfq);
++	if (ret)
++		return ret;
++#endif
++
++	ret = -ENOMEM;
++	if (bfq_slab_setup())
++		goto err_pol_unreg;
++
++	/*
++	 * Times to load large popular applications for the typical systems
++	 * installed on the reference devices (see the comments before the
++	 * definitions of the two arrays).
++	 */
++	T_slow[0] = msecs_to_jiffies(2600);
++	T_slow[1] = msecs_to_jiffies(1000);
++	T_fast[0] = msecs_to_jiffies(5500);
++	T_fast[1] = msecs_to_jiffies(2000);
++
++	/*
++	 * Thresholds that determine the switch between speed classes (see
++	 * the comments before the definition of the array).
++	 */
++	device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2;
++	device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2;
++
++	ret = elv_register(&iosched_bfq);
++	if (ret)
++		goto err_pol_unreg;
++
++	pr_info("BFQ I/O-scheduler: v7r11");
++
++	return 0;
++
++err_pol_unreg:
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	blkcg_policy_unregister(&blkcg_policy_bfq);
++#endif
++	return ret;
++}
++
++static void __exit bfq_exit(void)
++{
++	elv_unregister(&iosched_bfq);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	blkcg_policy_unregister(&blkcg_policy_bfq);
++#endif
++	bfq_slab_kill();
++}
++
++module_init(bfq_init);
++module_exit(bfq_exit);
++
++MODULE_AUTHOR("Arianna Avanzini, Fabio Checconi, Paolo Valente");
++MODULE_LICENSE("GPL");
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+new file mode 100644
+index 0000000..a64fec1
+--- /dev/null
++++ b/block/bfq-sched.c
+@@ -0,0 +1,1200 @@
++/*
++ * BFQ: Hierarchical B-WF2Q+ scheduler.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ */
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++#define for_each_entity(entity)	\
++	for (; entity ; entity = entity->parent)
++
++#define for_each_entity_safe(entity, parent) \
++	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
++
++
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
++						 int extract,
++						 struct bfq_data *bfqd);
++
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
++
++static void bfq_update_budget(struct bfq_entity *next_in_service)
++{
++	struct bfq_entity *bfqg_entity;
++	struct bfq_group *bfqg;
++	struct bfq_sched_data *group_sd;
++
++	BUG_ON(!next_in_service);
++
++	group_sd = next_in_service->sched_data;
++
++	bfqg = container_of(group_sd, struct bfq_group, sched_data);
++	/*
++	 * bfq_group's my_entity field is not NULL only if the group
++	 * is not the root group. We must not touch the root entity
++	 * as it must never become an in-service entity.
++	 */
++	bfqg_entity = bfqg->my_entity;
++	if (bfqg_entity)
++		bfqg_entity->budget = next_in_service->budget;
++}
++
++static int bfq_update_next_in_service(struct bfq_sched_data *sd)
++{
++	struct bfq_entity *next_in_service;
++
++	if (sd->in_service_entity)
++		/* will update/requeue at the end of service */
++		return 0;
++
++	/*
++	 * NOTE: this can be improved in many ways, such as returning
++	 * 1 (and thus propagating upwards the update) only when the
++	 * budget changes, or caching the bfqq that will be scheduled
++	 * next from this subtree.  By now we worry more about
++	 * correctness than about performance...
++	 */
++	next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
++	sd->next_in_service = next_in_service;
++
++	if (next_in_service)
++		bfq_update_budget(next_in_service);
++
++	return 1;
++}
++
++static void bfq_check_next_in_service(struct bfq_sched_data *sd,
++				      struct bfq_entity *entity)
++{
++	BUG_ON(sd->next_in_service != entity);
++}
++#else
++#define for_each_entity(entity)	\
++	for (; entity ; entity = NULL)
++
++#define for_each_entity_safe(entity, parent) \
++	for (parent = NULL; entity ; entity = parent)
++
++static int bfq_update_next_in_service(struct bfq_sched_data *sd)
++{
++	return 0;
++}
++
++static void bfq_check_next_in_service(struct bfq_sched_data *sd,
++				      struct bfq_entity *entity)
++{
++}
++
++static void bfq_update_budget(struct bfq_entity *next_in_service)
++{
++}
++#endif
++
++/*
++ * Shift for timestamp calculations.  This actually limits the maximum
++ * service allowed in one timestamp delta (small shift values increase it),
++ * the maximum total weight that can be used for the queues in the system
++ * (big shift values increase it), and the period of virtual time
++ * wraparounds.
++ */
++#define WFQ_SERVICE_SHIFT	22
++
++/**
++ * bfq_gt - compare two timestamps.
++ * @a: first ts.
++ * @b: second ts.
++ *
++ * Return @a > @b, dealing with wrapping correctly.
++ */
++static int bfq_gt(u64 a, u64 b)
++{
++	return (s64)(a - b) > 0;
++}
++
++static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = NULL;
++
++	BUG_ON(!entity);
++
++	if (!entity->my_sched_data)
++		bfqq = container_of(entity, struct bfq_queue, entity);
++
++	return bfqq;
++}
++
++
++/**
++ * bfq_delta - map service into the virtual time domain.
++ * @service: amount of service.
++ * @weight: scale factor (weight of an entity or weight sum).
++ */
++static u64 bfq_delta(unsigned long service, unsigned long weight)
++{
++	u64 d = (u64)service << WFQ_SERVICE_SHIFT;
++
++	do_div(d, weight);
++	return d;
++}
++
++/**
++ * bfq_calc_finish - assign the finish time to an entity.
++ * @entity: the entity to act upon.
++ * @service: the service to be charged to the entity.
++ */
++static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	BUG_ON(entity->weight == 0);
++
++	entity->finish = entity->start +
++		bfq_delta(service, entity->weight);
++
++	if (bfqq) {
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			"calc_finish: serv %lu, w %d",
++			service, entity->weight);
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			"calc_finish: start %llu, finish %llu, delta %llu",
++			entity->start, entity->finish,
++			bfq_delta(service, entity->weight));
++	}
++}
++
++/**
++ * bfq_entity_of - get an entity from a node.
++ * @node: the node field of the entity.
++ *
++ * Convert a node pointer to the relative entity.  This is used only
++ * to simplify the logic of some functions and not as the generic
++ * conversion mechanism because, e.g., in the tree walking functions,
++ * the check for a %NULL value would be redundant.
++ */
++static struct bfq_entity *bfq_entity_of(struct rb_node *node)
++{
++	struct bfq_entity *entity = NULL;
++
++	if (node)
++		entity = rb_entry(node, struct bfq_entity, rb_node);
++
++	return entity;
++}
++
++/**
++ * bfq_extract - remove an entity from a tree.
++ * @root: the tree root.
++ * @entity: the entity to remove.
++ */
++static void bfq_extract(struct rb_root *root, struct bfq_entity *entity)
++{
++	BUG_ON(entity->tree != root);
++
++	entity->tree = NULL;
++	rb_erase(&entity->rb_node, root);
++}
++
++/**
++ * bfq_idle_extract - extract an entity from the idle tree.
++ * @st: the service tree of the owning @entity.
++ * @entity: the entity being removed.
++ */
++static void bfq_idle_extract(struct bfq_service_tree *st,
++			     struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *next;
++
++	BUG_ON(entity->tree != &st->idle);
++
++	if (entity == st->first_idle) {
++		next = rb_next(&entity->rb_node);
++		st->first_idle = bfq_entity_of(next);
++	}
++
++	if (entity == st->last_idle) {
++		next = rb_prev(&entity->rb_node);
++		st->last_idle = bfq_entity_of(next);
++	}
++
++	bfq_extract(&st->idle, entity);
++
++	if (bfqq)
++		list_del(&bfqq->bfqq_list);
++}
++
++/**
++ * bfq_insert - generic tree insertion.
++ * @root: tree root.
++ * @entity: entity to insert.
++ *
++ * This is used for the idle and the active tree, since they are both
++ * ordered by finish time.
++ */
++static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
++{
++	struct bfq_entity *entry;
++	struct rb_node **node = &root->rb_node;
++	struct rb_node *parent = NULL;
++
++	BUG_ON(entity->tree);
++
++	while (*node) {
++		parent = *node;
++		entry = rb_entry(parent, struct bfq_entity, rb_node);
++
++		if (bfq_gt(entry->finish, entity->finish))
++			node = &parent->rb_left;
++		else
++			node = &parent->rb_right;
++	}
++
++	rb_link_node(&entity->rb_node, parent, node);
++	rb_insert_color(&entity->rb_node, root);
++
++	entity->tree = root;
++}
++
++/**
++ * bfq_update_min - update the min_start field of a entity.
++ * @entity: the entity to update.
++ * @node: one of its children.
++ *
++ * This function is called when @entity may store an invalid value for
++ * min_start due to updates to the active tree.  The function  assumes
++ * that the subtree rooted at @node (which may be its left or its right
++ * child) has a valid min_start value.
++ */
++static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node)
++{
++	struct bfq_entity *child;
++
++	if (node) {
++		child = rb_entry(node, struct bfq_entity, rb_node);
++		if (bfq_gt(entity->min_start, child->min_start))
++			entity->min_start = child->min_start;
++	}
++}
++
++/**
++ * bfq_update_active_node - recalculate min_start.
++ * @node: the node to update.
++ *
++ * @node may have changed position or one of its children may have moved,
++ * this function updates its min_start value.  The left and right subtrees
++ * are assumed to hold a correct min_start value.
++ */
++static void bfq_update_active_node(struct rb_node *node)
++{
++	struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
++
++	entity->min_start = entity->start;
++	bfq_update_min(entity, node->rb_right);
++	bfq_update_min(entity, node->rb_left);
++}
++
++/**
++ * bfq_update_active_tree - update min_start for the whole active tree.
++ * @node: the starting node.
++ *
++ * @node must be the deepest modified node after an update.  This function
++ * updates its min_start using the values held by its children, assuming
++ * that they did not change, and then updates all the nodes that may have
++ * changed in the path to the root.  The only nodes that may have changed
++ * are the ones in the path or their siblings.
++ */
++static void bfq_update_active_tree(struct rb_node *node)
++{
++	struct rb_node *parent;
++
++up:
++	bfq_update_active_node(node);
++
++	parent = rb_parent(node);
++	if (!parent)
++		return;
++
++	if (node == parent->rb_left && parent->rb_right)
++		bfq_update_active_node(parent->rb_right);
++	else if (parent->rb_left)
++		bfq_update_active_node(parent->rb_left);
++
++	node = parent;
++	goto up;
++}
++
++static void bfq_weights_tree_add(struct bfq_data *bfqd,
++				 struct bfq_entity *entity,
++				 struct rb_root *root);
++
++static void bfq_weights_tree_remove(struct bfq_data *bfqd,
++				    struct bfq_entity *entity,
++				    struct rb_root *root);
++
++
++/**
++ * bfq_active_insert - insert an entity in the active tree of its
++ *                     group/device.
++ * @st: the service tree of the entity.
++ * @entity: the entity being inserted.
++ *
++ * The active tree is ordered by finish time, but an extra key is kept
++ * per each node, containing the minimum value for the start times of
++ * its children (and the node itself), so it's possible to search for
++ * the eligible node with the lowest finish time in logarithmic time.
++ */
++static void bfq_active_insert(struct bfq_service_tree *st,
++			      struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *node = &entity->rb_node;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	struct bfq_sched_data *sd = NULL;
++	struct bfq_group *bfqg = NULL;
++	struct bfq_data *bfqd = NULL;
++#endif
++
++	bfq_insert(&st->active, entity);
++
++	if (node->rb_left)
++		node = node->rb_left;
++	else if (node->rb_right)
++		node = node->rb_right;
++
++	bfq_update_active_tree(node);
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	sd = entity->sched_data;
++	bfqg = container_of(sd, struct bfq_group, sched_data);
++	BUG_ON(!bfqg);
++	bfqd = (struct bfq_data *)bfqg->bfqd;
++#endif
++	if (bfqq)
++		list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	else { /* bfq_group */
++		BUG_ON(!bfqd);
++		bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
++	}
++	if (bfqg != bfqd->root_group) {
++		BUG_ON(!bfqg);
++		BUG_ON(!bfqd);
++		bfqg->active_entities++;
++		if (bfqg->active_entities == 2)
++			bfqd->active_numerous_groups++;
++	}
++#endif
++}
++
++/**
++ * bfq_ioprio_to_weight - calc a weight from an ioprio.
++ * @ioprio: the ioprio value to convert.
++ */
++static unsigned short bfq_ioprio_to_weight(int ioprio)
++{
++	BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
++	return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - ioprio;
++}
++
++/**
++ * bfq_weight_to_ioprio - calc an ioprio from a weight.
++ * @weight: the weight value to convert.
++ *
++ * To preserve as much as possible the old only-ioprio user interface,
++ * 0 is used as an escape ioprio value for weights (numerically) equal or
++ * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF.
++ */
++static unsigned short bfq_weight_to_ioprio(int weight)
++{
++	BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
++	return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight < 0 ?
++		0 : IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight;
++}
++
++static void bfq_get_entity(struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	if (bfqq) {
++		atomic_inc(&bfqq->ref);
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
++			     bfqq, atomic_read(&bfqq->ref));
++	}
++}
++
++/**
++ * bfq_find_deepest - find the deepest node that an extraction can modify.
++ * @node: the node being removed.
++ *
++ * Do the first step of an extraction in an rb tree, looking for the
++ * node that will replace @node, and returning the deepest node that
++ * the following modifications to the tree can touch.  If @node is the
++ * last node in the tree return %NULL.
++ */
++static struct rb_node *bfq_find_deepest(struct rb_node *node)
++{
++	struct rb_node *deepest;
++
++	if (!node->rb_right && !node->rb_left)
++		deepest = rb_parent(node);
++	else if (!node->rb_right)
++		deepest = node->rb_left;
++	else if (!node->rb_left)
++		deepest = node->rb_right;
++	else {
++		deepest = rb_next(node);
++		if (deepest->rb_right)
++			deepest = deepest->rb_right;
++		else if (rb_parent(deepest) != node)
++			deepest = rb_parent(deepest);
++	}
++
++	return deepest;
++}
++
++/**
++ * bfq_active_extract - remove an entity from the active tree.
++ * @st: the service_tree containing the tree.
++ * @entity: the entity being removed.
++ */
++static void bfq_active_extract(struct bfq_service_tree *st,
++			       struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *node;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	struct bfq_sched_data *sd = NULL;
++	struct bfq_group *bfqg = NULL;
++	struct bfq_data *bfqd = NULL;
++#endif
++
++	node = bfq_find_deepest(&entity->rb_node);
++	bfq_extract(&st->active, entity);
++
++	if (node)
++		bfq_update_active_tree(node);
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	sd = entity->sched_data;
++	bfqg = container_of(sd, struct bfq_group, sched_data);
++	BUG_ON(!bfqg);
++	bfqd = (struct bfq_data *)bfqg->bfqd;
++#endif
++	if (bfqq)
++		list_del(&bfqq->bfqq_list);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	else { /* bfq_group */
++		BUG_ON(!bfqd);
++		bfq_weights_tree_remove(bfqd, entity,
++					&bfqd->group_weights_tree);
++	}
++	if (bfqg != bfqd->root_group) {
++		BUG_ON(!bfqg);
++		BUG_ON(!bfqd);
++		BUG_ON(!bfqg->active_entities);
++		bfqg->active_entities--;
++		if (bfqg->active_entities == 1) {
++			BUG_ON(!bfqd->active_numerous_groups);
++			bfqd->active_numerous_groups--;
++		}
++	}
++#endif
++}
++
++/**
++ * bfq_idle_insert - insert an entity into the idle tree.
++ * @st: the service tree containing the tree.
++ * @entity: the entity to insert.
++ */
++static void bfq_idle_insert(struct bfq_service_tree *st,
++			    struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct bfq_entity *first_idle = st->first_idle;
++	struct bfq_entity *last_idle = st->last_idle;
++
++	if (!first_idle || bfq_gt(first_idle->finish, entity->finish))
++		st->first_idle = entity;
++	if (!last_idle || bfq_gt(entity->finish, last_idle->finish))
++		st->last_idle = entity;
++
++	bfq_insert(&st->idle, entity);
++
++	if (bfqq)
++		list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
++}
++
++/**
++ * bfq_forget_entity - remove an entity from the wfq trees.
++ * @st: the service tree.
++ * @entity: the entity being removed.
++ *
++ * Update the device status and forget everything about @entity, putting
++ * the device reference to it, if it is a queue.  Entities belonging to
++ * groups are not refcounted.
++ */
++static void bfq_forget_entity(struct bfq_service_tree *st,
++			      struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct bfq_sched_data *sd;
++
++	BUG_ON(!entity->on_st);
++
++	entity->on_st = 0;
++	st->wsum -= entity->weight;
++	if (bfqq) {
++		sd = entity->sched_data;
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++	}
++}
++
++/**
++ * bfq_put_idle_entity - release the idle tree ref of an entity.
++ * @st: service tree for the entity.
++ * @entity: the entity being released.
++ */
++static void bfq_put_idle_entity(struct bfq_service_tree *st,
++				struct bfq_entity *entity)
++{
++	bfq_idle_extract(st, entity);
++	bfq_forget_entity(st, entity);
++}
++
++/**
++ * bfq_forget_idle - update the idle tree if necessary.
++ * @st: the service tree to act upon.
++ *
++ * To preserve the global O(log N) complexity we only remove one entry here;
++ * as the idle tree will not grow indefinitely this can be done safely.
++ */
++static void bfq_forget_idle(struct bfq_service_tree *st)
++{
++	struct bfq_entity *first_idle = st->first_idle;
++	struct bfq_entity *last_idle = st->last_idle;
++
++	if (RB_EMPTY_ROOT(&st->active) && last_idle &&
++	    !bfq_gt(last_idle->finish, st->vtime)) {
++		/*
++		 * Forget the whole idle tree, increasing the vtime past
++		 * the last finish time of idle entities.
++		 */
++		st->vtime = last_idle->finish;
++	}
++
++	if (first_idle && !bfq_gt(first_idle->finish, st->vtime))
++		bfq_put_idle_entity(st, first_idle);
++}
++
++static struct bfq_service_tree *
++__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
++			 struct bfq_entity *entity)
++{
++	struct bfq_service_tree *new_st = old_st;
++
++	if (entity->prio_changed) {
++		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++		unsigned short prev_weight, new_weight;
++		struct bfq_data *bfqd = NULL;
++		struct rb_root *root;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		struct bfq_sched_data *sd;
++		struct bfq_group *bfqg;
++#endif
++
++		if (bfqq)
++			bfqd = bfqq->bfqd;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		else {
++			sd = entity->my_sched_data;
++			bfqg = container_of(sd, struct bfq_group, sched_data);
++			BUG_ON(!bfqg);
++			bfqd = (struct bfq_data *)bfqg->bfqd;
++			BUG_ON(!bfqd);
++		}
++#endif
++
++		BUG_ON(old_st->wsum < entity->weight);
++		old_st->wsum -= entity->weight;
++
++		if (entity->new_weight != entity->orig_weight) {
++			if (entity->new_weight < BFQ_MIN_WEIGHT ||
++			    entity->new_weight > BFQ_MAX_WEIGHT) {
++				printk(KERN_CRIT "update_weight_prio: "
++						 "new_weight %d\n",
++					entity->new_weight);
++				BUG();
++			}
++			entity->orig_weight = entity->new_weight;
++			if (bfqq)
++				bfqq->ioprio =
++				  bfq_weight_to_ioprio(entity->orig_weight);
++		}
++
++		if (bfqq)
++			bfqq->ioprio_class = bfqq->new_ioprio_class;
++		entity->prio_changed = 0;
++
++		/*
++		 * NOTE: here we may be changing the weight too early,
++		 * this will cause unfairness.  The correct approach
++		 * would have required additional complexity to defer
++		 * weight changes to the proper time instants (i.e.,
++		 * when entity->finish <= old_st->vtime).
++		 */
++		new_st = bfq_entity_service_tree(entity);
++
++		prev_weight = entity->weight;
++		new_weight = entity->orig_weight *
++			     (bfqq ? bfqq->wr_coeff : 1);
++		/*
++		 * If the weight of the entity changes, remove the entity
++		 * from its old weight counter (if there is a counter
++		 * associated with the entity), and add it to the counter
++		 * associated with its new weight.
++		 */
++		if (prev_weight != new_weight) {
++			root = bfqq ? &bfqd->queue_weights_tree :
++				      &bfqd->group_weights_tree;
++			bfq_weights_tree_remove(bfqd, entity, root);
++		}
++		entity->weight = new_weight;
++		/*
++		 * Add the entity to its weights tree only if it is
++		 * not associated with a weight-raised queue.
++		 */
++		if (prev_weight != new_weight &&
++		    (bfqq ? bfqq->wr_coeff == 1 : 1))
++			/* If we get here, root has been initialized. */
++			bfq_weights_tree_add(bfqd, entity, root);
++
++		new_st->wsum += entity->weight;
++
++		if (new_st != old_st)
++			entity->start = new_st->vtime;
++	}
++
++	return new_st;
++}
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
++#endif
++
++/**
++ * bfq_bfqq_served - update the scheduler status after selection for
++ *                   service.
++ * @bfqq: the queue being served.
++ * @served: bytes to transfer.
++ *
++ * NOTE: this can be optimized, as the timestamps of upper level entities
++ * are synchronized every time a new bfqq is selected for service.  By now,
++ * we keep it to better check consistency.
++ */
++static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_service_tree *st;
++
++	for_each_entity(entity) {
++		st = bfq_entity_service_tree(entity);
++
++		entity->service += served;
++		BUG_ON(entity->service > entity->budget);
++		BUG_ON(st->wsum == 0);
++
++		st->vtime += bfq_delta(served, st->wsum);
++		bfq_forget_idle(st);
++	}
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
++#endif
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
++}
++
++/**
++ * bfq_bfqq_charge_full_budget - set the service to the entity budget.
++ * @bfqq: the queue that needs a service update.
++ *
++ * When it's not possible to be fair in the service domain, because
++ * a queue is not consuming its budget fast enough (the meaning of
++ * fast depends on the timeout parameter), we charge it a full
++ * budget.  In this way we should obtain a sort of time-domain
++ * fairness among all the seeky/slow queues.
++ */
++static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
++
++	bfq_bfqq_served(bfqq, entity->budget - entity->service);
++}
++
++/**
++ * __bfq_activate_entity - activate an entity.
++ * @entity: the entity being activated.
++ *
++ * Called whenever an entity is activated, i.e., it is not active and one
++ * of its children receives a new request, or has to be reactivated due to
++ * budget exhaustion.  It uses the current budget of the entity (and the
++ * service received if @entity is active) of the queue to calculate its
++ * timestamps.
++ */
++static void __bfq_activate_entity(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sd = entity->sched_data;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++
++	if (entity == sd->in_service_entity) {
++		BUG_ON(entity->tree);
++		/*
++		 * If we are requeueing the current entity we have
++		 * to take care of not charging to it service it has
++		 * not received.
++		 */
++		bfq_calc_finish(entity, entity->service);
++		entity->start = entity->finish;
++		sd->in_service_entity = NULL;
++	} else if (entity->tree == &st->active) {
++		/*
++		 * Requeueing an entity due to a change of some
++		 * next_in_service entity below it.  We reuse the
++		 * old start time.
++		 */
++		bfq_active_extract(st, entity);
++	} else if (entity->tree == &st->idle) {
++		/*
++		 * Must be on the idle tree, bfq_idle_extract() will
++		 * check for that.
++		 */
++		bfq_idle_extract(st, entity);
++		entity->start = bfq_gt(st->vtime, entity->finish) ?
++				       st->vtime : entity->finish;
++	} else {
++		/*
++		 * The finish time of the entity may be invalid, and
++		 * it is in the past for sure, otherwise the queue
++		 * would have been on the idle tree.
++		 */
++		entity->start = st->vtime;
++		st->wsum += entity->weight;
++		bfq_get_entity(entity);
++
++		BUG_ON(entity->on_st);
++		entity->on_st = 1;
++	}
++
++	st = __bfq_entity_update_weight_prio(st, entity);
++	bfq_calc_finish(entity, entity->budget);
++	bfq_active_insert(st, entity);
++}
++
++/**
++ * bfq_activate_entity - activate an entity and its ancestors if necessary.
++ * @entity: the entity to activate.
++ *
++ * Activate @entity and all the entities on the path from it to the root.
++ */
++static void bfq_activate_entity(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sd;
++
++	for_each_entity(entity) {
++		__bfq_activate_entity(entity);
++
++		sd = entity->sched_data;
++		if (!bfq_update_next_in_service(sd))
++			/*
++			 * No need to propagate the activation to the
++			 * upper entities, as they will be updated when
++			 * the in-service entity is rescheduled.
++			 */
++			break;
++	}
++}
++
++/**
++ * __bfq_deactivate_entity - deactivate an entity from its service tree.
++ * @entity: the entity to deactivate.
++ * @requeue: if false, the entity will not be put into the idle tree.
++ *
++ * Deactivate an entity, independently from its previous state.  If the
++ * entity was not on a service tree just return, otherwise if it is on
++ * any scheduler tree, extract it from that tree, and if necessary
++ * and if the caller did not specify @requeue, put it on the idle tree.
++ *
++ * Return %1 if the caller should update the entity hierarchy, i.e.,
++ * if the entity was in service or if it was the next_in_service for
++ * its sched_data; return %0 otherwise.
++ */
++static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++{
++	struct bfq_sched_data *sd = entity->sched_data;
++	struct bfq_service_tree *st;
++	int was_in_service;
++	int ret = 0;
++
++	if (sd == NULL || !entity->on_st) /* never activated, or inactive */
++		return 0;
++
++	st = bfq_entity_service_tree(entity);
++	was_in_service = entity == sd->in_service_entity;
++
++	BUG_ON(was_in_service && entity->tree);
++
++	if (was_in_service) {
++		bfq_calc_finish(entity, entity->service);
++		sd->in_service_entity = NULL;
++	} else if (entity->tree == &st->active)
++		bfq_active_extract(st, entity);
++	else if (entity->tree == &st->idle)
++		bfq_idle_extract(st, entity);
++	else if (entity->tree)
++		BUG();
++
++	if (was_in_service || sd->next_in_service == entity)
++		ret = bfq_update_next_in_service(sd);
++
++	if (!requeue || !bfq_gt(entity->finish, st->vtime))
++		bfq_forget_entity(st, entity);
++	else
++		bfq_idle_insert(st, entity);
++
++	BUG_ON(sd->in_service_entity == entity);
++	BUG_ON(sd->next_in_service == entity);
++
++	return ret;
++}
++
++/**
++ * bfq_deactivate_entity - deactivate an entity.
++ * @entity: the entity to deactivate.
++ * @requeue: true if the entity can be put on the idle tree
++ */
++static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++{
++	struct bfq_sched_data *sd;
++	struct bfq_entity *parent;
++
++	for_each_entity_safe(entity, parent) {
++		sd = entity->sched_data;
++
++		if (!__bfq_deactivate_entity(entity, requeue))
++			/*
++			 * The parent entity is still backlogged, and
++			 * we don't need to update it as it is still
++			 * in service.
++			 */
++			break;
++
++		if (sd->next_in_service)
++			/*
++			 * The parent entity is still backlogged and
++			 * the budgets on the path towards the root
++			 * need to be updated.
++			 */
++			goto update;
++
++		/*
++		 * If we reach there the parent is no more backlogged and
++		 * we want to propagate the dequeue upwards.
++		 */
++		requeue = 1;
++	}
++
++	return;
++
++update:
++	entity = parent;
++	for_each_entity(entity) {
++		__bfq_activate_entity(entity);
++
++		sd = entity->sched_data;
++		if (!bfq_update_next_in_service(sd))
++			break;
++	}
++}
++
++/**
++ * bfq_update_vtime - update vtime if necessary.
++ * @st: the service tree to act upon.
++ *
++ * If necessary update the service tree vtime to have at least one
++ * eligible entity, skipping to its start time.  Assumes that the
++ * active tree of the device is not empty.
++ *
++ * NOTE: this hierarchical implementation updates vtimes quite often,
++ * we may end up with reactivated processes getting timestamps after a
++ * vtime skip done because we needed a ->first_active entity on some
++ * intermediate node.
++ */
++static void bfq_update_vtime(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entry;
++	struct rb_node *node = st->active.rb_node;
++
++	entry = rb_entry(node, struct bfq_entity, rb_node);
++	if (bfq_gt(entry->min_start, st->vtime)) {
++		st->vtime = entry->min_start;
++		bfq_forget_idle(st);
++	}
++}
++
++/**
++ * bfq_first_active_entity - find the eligible entity with
++ *                           the smallest finish time
++ * @st: the service tree to select from.
++ *
++ * This function searches the first schedulable entity, starting from the
++ * root of the tree and going on the left every time on this side there is
++ * a subtree with at least one eligible (start >= vtime) entity. The path on
++ * the right is followed only if a) the left subtree contains no eligible
++ * entities and b) no eligible entity has been found yet.
++ */
++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entry, *first = NULL;
++	struct rb_node *node = st->active.rb_node;
++
++	while (node) {
++		entry = rb_entry(node, struct bfq_entity, rb_node);
++left:
++		if (!bfq_gt(entry->start, st->vtime))
++			first = entry;
++
++		BUG_ON(bfq_gt(entry->min_start, st->vtime));
++
++		if (node->rb_left) {
++			entry = rb_entry(node->rb_left,
++					 struct bfq_entity, rb_node);
++			if (!bfq_gt(entry->min_start, st->vtime)) {
++				node = node->rb_left;
++				goto left;
++			}
++		}
++		if (first)
++			break;
++		node = node->rb_right;
++	}
++
++	BUG_ON(!first && !RB_EMPTY_ROOT(&st->active));
++	return first;
++}
++
++/**
++ * __bfq_lookup_next_entity - return the first eligible entity in @st.
++ * @st: the service tree.
++ *
++ * Update the virtual time in @st and return the first eligible entity
++ * it contains.
++ */
++static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
++						   bool force)
++{
++	struct bfq_entity *entity, *new_next_in_service = NULL;
++
++	if (RB_EMPTY_ROOT(&st->active))
++		return NULL;
++
++	bfq_update_vtime(st);
++	entity = bfq_first_active_entity(st);
++	BUG_ON(bfq_gt(entity->start, st->vtime));
++
++	/*
++	 * If the chosen entity does not match with the sched_data's
++	 * next_in_service and we are forcedly serving the IDLE priority
++	 * class tree, bubble up budget update.
++	 */
++	if (unlikely(force && entity != entity->sched_data->next_in_service)) {
++		new_next_in_service = entity;
++		for_each_entity(new_next_in_service)
++			bfq_update_budget(new_next_in_service);
++	}
++
++	return entity;
++}
++
++/**
++ * bfq_lookup_next_entity - return the first eligible entity in @sd.
++ * @sd: the sched_data.
++ * @extract: if true the returned entity will be also extracted from @sd.
++ *
++ * NOTE: since we cache the next_in_service entity at each level of the
++ * hierarchy, the complexity of the lookup can be decreased with
++ * absolutely no effort just returning the cached next_in_service value;
++ * we prefer to do full lookups to test the consistency of * the data
++ * structures.
++ */
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
++						 int extract,
++						 struct bfq_data *bfqd)
++{
++	struct bfq_service_tree *st = sd->service_tree;
++	struct bfq_entity *entity;
++	int i = 0;
++
++	BUG_ON(sd->in_service_entity);
++
++	if (bfqd &&
++	    jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
++		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
++						  true);
++		if (entity) {
++			i = BFQ_IOPRIO_CLASSES - 1;
++			bfqd->bfq_class_idle_last_service = jiffies;
++			sd->next_in_service = entity;
++		}
++	}
++	for (; i < BFQ_IOPRIO_CLASSES; i++) {
++		entity = __bfq_lookup_next_entity(st + i, false);
++		if (entity) {
++			if (extract) {
++				bfq_check_next_in_service(sd, entity);
++				bfq_active_extract(st + i, entity);
++				sd->in_service_entity = entity;
++				sd->next_in_service = NULL;
++			}
++			break;
++		}
++	}
++
++	return entity;
++}
++
++/*
++ * Get next queue for service.
++ */
++static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
++{
++	struct bfq_entity *entity = NULL;
++	struct bfq_sched_data *sd;
++	struct bfq_queue *bfqq;
++
++	BUG_ON(bfqd->in_service_queue);
++
++	if (bfqd->busy_queues == 0)
++		return NULL;
++
++	sd = &bfqd->root_group->sched_data;
++	for (; sd ; sd = entity->my_sched_data) {
++		entity = bfq_lookup_next_entity(sd, 1, bfqd);
++		BUG_ON(!entity);
++		entity->service = 0;
++	}
++
++	bfqq = bfq_entity_to_bfqq(entity);
++	BUG_ON(!bfqq);
++
++	return bfqq;
++}
++
++static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
++{
++	if (bfqd->in_service_bic) {
++		put_io_context(bfqd->in_service_bic->icq.ioc);
++		bfqd->in_service_bic = NULL;
++	}
++
++	bfqd->in_service_queue = NULL;
++	del_timer(&bfqd->idle_slice_timer);
++}
++
++static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++				int requeue)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	if (bfqq == bfqd->in_service_queue)
++		__bfq_bfqd_reset_in_service(bfqd);
++
++	bfq_deactivate_entity(entity, requeue);
++}
++
++static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	bfq_activate_entity(entity);
++}
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
++#endif
++
++/*
++ * Called when the bfqq no longer has requests pending, remove it from
++ * the service tree.
++ */
++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			      int requeue)
++{
++	BUG_ON(!bfq_bfqq_busy(bfqq));
++	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	bfq_log_bfqq(bfqd, bfqq, "del from busy");
++
++	bfq_clear_bfqq_busy(bfqq);
++
++	BUG_ON(bfqd->busy_queues == 0);
++	bfqd->busy_queues--;
++
++	if (!bfqq->dispatched) {
++		bfq_weights_tree_remove(bfqd, &bfqq->entity,
++					&bfqd->queue_weights_tree);
++		if (!blk_queue_nonrot(bfqd->queue)) {
++			BUG_ON(!bfqd->busy_in_flight_queues);
++			bfqd->busy_in_flight_queues--;
++			if (bfq_bfqq_constantly_seeky(bfqq)) {
++				BUG_ON(!bfqd->
++					const_seeky_busy_in_flight_queues);
++				bfqd->const_seeky_busy_in_flight_queues--;
++			}
++		}
++	}
++	if (bfqq->wr_coeff > 1)
++		bfqd->wr_busy_queues--;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	bfqg_stats_update_dequeue(bfqq_group(bfqq));
++#endif
++
++	bfq_deactivate_bfqq(bfqd, bfqq, requeue);
++}
++
++/*
++ * Called when an inactive queue receives a new request.
++ */
++static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	BUG_ON(bfq_bfqq_busy(bfqq));
++	BUG_ON(bfqq == bfqd->in_service_queue);
++
++	bfq_log_bfqq(bfqd, bfqq, "add to busy");
++
++	bfq_activate_bfqq(bfqd, bfqq);
++
++	bfq_mark_bfqq_busy(bfqq);
++	bfqd->busy_queues++;
++
++	if (!bfqq->dispatched) {
++		if (bfqq->wr_coeff == 1)
++			bfq_weights_tree_add(bfqd, &bfqq->entity,
++					     &bfqd->queue_weights_tree);
++		if (!blk_queue_nonrot(bfqd->queue)) {
++			bfqd->busy_in_flight_queues++;
++			if (bfq_bfqq_constantly_seeky(bfqq))
++				bfqd->const_seeky_busy_in_flight_queues++;
++		}
++	}
++	if (bfqq->wr_coeff > 1)
++		bfqd->wr_busy_queues++;
++}
+diff --git a/block/bfq.h b/block/bfq.h
+new file mode 100644
+index 0000000..485d0c9
+--- /dev/null
++++ b/block/bfq.h
+@@ -0,0 +1,801 @@
++/*
++ * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ */
++
++#ifndef _BFQ_H
++#define _BFQ_H
++
++#include <linux/blktrace_api.h>
++#include <linux/hrtimer.h>
++#include <linux/ioprio.h>
++#include <linux/rbtree.h>
++#include <linux/blk-cgroup.h>
++
++#define BFQ_IOPRIO_CLASSES	3
++#define BFQ_CL_IDLE_TIMEOUT	(HZ/5)
++
++#define BFQ_MIN_WEIGHT			1
++#define BFQ_MAX_WEIGHT			1000
++#define BFQ_WEIGHT_CONVERSION_COEFF	10
++
++#define BFQ_DEFAULT_QUEUE_IOPRIO	4
++
++#define BFQ_DEFAULT_GRP_WEIGHT	10
++#define BFQ_DEFAULT_GRP_IOPRIO	0
++#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE
++
++struct bfq_entity;
++
++/**
++ * struct bfq_service_tree - per ioprio_class service tree.
++ * @active: tree for active entities (i.e., those backlogged).
++ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
++ * @first_idle: idle entity with minimum F_i.
++ * @last_idle: idle entity with maximum F_i.
++ * @vtime: scheduler virtual time.
++ * @wsum: scheduler weight sum; active and idle entities contribute to it.
++ *
++ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
++ * ioprio_class has its own independent scheduler, and so its own
++ * bfq_service_tree.  All the fields are protected by the queue lock
++ * of the containing bfqd.
++ */
++struct bfq_service_tree {
++	struct rb_root active;
++	struct rb_root idle;
++
++	struct bfq_entity *first_idle;
++	struct bfq_entity *last_idle;
++
++	u64 vtime;
++	unsigned long wsum;
++};
++
++/**
++ * struct bfq_sched_data - multi-class scheduler.
++ * @in_service_entity: entity in service.
++ * @next_in_service: head-of-the-line entity in the scheduler.
++ * @service_tree: array of service trees, one per ioprio_class.
++ *
++ * bfq_sched_data is the basic scheduler queue.  It supports three
++ * ioprio_classes, and can be used either as a toplevel queue or as
++ * an intermediate queue on a hierarchical setup.
++ * @next_in_service points to the active entity of the sched_data
++ * service trees that will be scheduled next.
++ *
++ * The supported ioprio_classes are the same as in CFQ, in descending
++ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
++ * Requests from higher priority queues are served before all the
++ * requests from lower priority queues; among requests of the same
++ * queue requests are served according to B-WF2Q+.
++ * All the fields are protected by the queue lock of the containing bfqd.
++ */
++struct bfq_sched_data {
++	struct bfq_entity *in_service_entity;
++	struct bfq_entity *next_in_service;
++	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
++};
++
++/**
++ * struct bfq_weight_counter - counter of the number of all active entities
++ *                             with a given weight.
++ * @weight: weight of the entities that this counter refers to.
++ * @num_active: number of active entities with this weight.
++ * @weights_node: weights tree member (see bfq_data's @queue_weights_tree
++ *                and @group_weights_tree).
++ */
++struct bfq_weight_counter {
++	short int weight;
++	unsigned int num_active;
++	struct rb_node weights_node;
++};
++
++/**
++ * struct bfq_entity - schedulable entity.
++ * @rb_node: service_tree member.
++ * @weight_counter: pointer to the weight counter associated with this entity.
++ * @on_st: flag, true if the entity is on a tree (either the active or
++ *         the idle one of its service_tree).
++ * @finish: B-WF2Q+ finish timestamp (aka F_i).
++ * @start: B-WF2Q+ start timestamp (aka S_i).
++ * @tree: tree the entity is enqueued into; %NULL if not on a tree.
++ * @min_start: minimum start time of the (active) subtree rooted at
++ *             this entity; used for O(log N) lookups into active trees.
++ * @service: service received during the last round of service.
++ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
++ * @weight: weight of the queue
++ * @parent: parent entity, for hierarchical scheduling.
++ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
++ *                 associated scheduler queue, %NULL on leaf nodes.
++ * @sched_data: the scheduler queue this entity belongs to.
++ * @ioprio: the ioprio in use.
++ * @new_weight: when a weight change is requested, the new weight value.
++ * @orig_weight: original weight, used to implement weight boosting
++ * @prio_changed: flag, true when the user requested a weight, ioprio or
++ *		  ioprio_class change.
++ *
++ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
++ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
++ * entity belongs to the sched_data of the parent group in the cgroup
++ * hierarchy.  Non-leaf entities have also their own sched_data, stored
++ * in @my_sched_data.
++ *
++ * Each entity stores independently its priority values; this would
++ * allow different weights on different devices, but this
++ * functionality is not exported to userspace by now.  Priorities and
++ * weights are updated lazily, first storing the new values into the
++ * new_* fields, then setting the @prio_changed flag.  As soon as
++ * there is a transition in the entity state that allows the priority
++ * update to take place the effective and the requested priority
++ * values are synchronized.
++ *
++ * Unless cgroups are used, the weight value is calculated from the
++ * ioprio to export the same interface as CFQ.  When dealing with
++ * ``well-behaved'' queues (i.e., queues that do not spend too much
++ * time to consume their budget and have true sequential behavior, and
++ * when there are no external factors breaking anticipation) the
++ * relative weights at each level of the cgroups hierarchy should be
++ * guaranteed.  All the fields are protected by the queue lock of the
++ * containing bfqd.
++ */
++struct bfq_entity {
++	struct rb_node rb_node;
++	struct bfq_weight_counter *weight_counter;
++
++	int on_st;
++
++	u64 finish;
++	u64 start;
++
++	struct rb_root *tree;
++
++	u64 min_start;
++
++	int service, budget;
++	unsigned short weight, new_weight;
++	unsigned short orig_weight;
++
++	struct bfq_entity *parent;
++
++	struct bfq_sched_data *my_sched_data;
++	struct bfq_sched_data *sched_data;
++
++	int prio_changed;
++};
++
++struct bfq_group;
++
++/**
++ * struct bfq_queue - leaf schedulable entity.
++ * @ref: reference counter.
++ * @bfqd: parent bfq_data.
++ * @new_ioprio: when an ioprio change is requested, the new ioprio value.
++ * @ioprio_class: the ioprio_class in use.
++ * @new_ioprio_class: when an ioprio_class change is requested, the new
++ *                    ioprio_class value.
++ * @new_bfqq: shared bfq_queue if queue is cooperating with
++ *           one or more other queues.
++ * @sort_list: sorted list of pending requests.
++ * @next_rq: if fifo isn't expired, next request to serve.
++ * @queued: nr of requests queued in @sort_list.
++ * @allocated: currently allocated requests.
++ * @meta_pending: pending metadata requests.
++ * @fifo: fifo list of requests in sort_list.
++ * @entity: entity representing this queue in the scheduler.
++ * @max_budget: maximum budget allowed from the feedback mechanism.
++ * @budget_timeout: budget expiration (in jiffies).
++ * @dispatched: number of requests on the dispatch list or inside driver.
++ * @flags: status flags.
++ * @bfqq_list: node for active/idle bfqq list inside our bfqd.
++ * @burst_list_node: node for the device's burst list.
++ * @seek_samples: number of seeks sampled
++ * @seek_total: sum of the distances of the seeks sampled
++ * @seek_mean: mean seek distance
++ * @last_request_pos: position of the last request enqueued
++ * @requests_within_timer: number of consecutive pairs of request completion
++ *                         and arrival, such that the queue becomes idle
++ *                         after the completion, but the next request arrives
++ *                         within an idle time slice; used only if the queue's
++ *                         IO_bound has been cleared.
++ * @pid: pid of the process owning the queue, used for logging purposes.
++ * @last_wr_start_finish: start time of the current weight-raising period if
++ *                        the @bfq-queue is being weight-raised, otherwise
++ *                        finish time of the last weight-raising period
++ * @wr_cur_max_time: current max raising time for this queue
++ * @soft_rt_next_start: minimum time instant such that, only if a new
++ *                      request is enqueued after this time instant in an
++ *                      idle @bfq_queue with no outstanding requests, then
++ *                      the task associated with the queue it is deemed as
++ *                      soft real-time (see the comments to the function
++ *                      bfq_bfqq_softrt_next_start())
++ * @last_idle_bklogged: time of the last transition of the @bfq_queue from
++ *                      idle to backlogged
++ * @service_from_backlogged: cumulative service received from the @bfq_queue
++ *                           since the last transition from idle to
++ *                           backlogged
++ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
++ *	 queue is shared
++ *
++ * A bfq_queue is a leaf request queue; it can be associated with an
++ * io_context or more, if it  is  async or shared  between  cooperating
++ * processes. @cgroup holds a reference to the cgroup, to be sure that it
++ * does not disappear while a bfqq still references it (mostly to avoid
++ * races between request issuing and task migration followed by cgroup
++ * destruction).
++ * All the fields are protected by the queue lock of the containing bfqd.
++ */
++struct bfq_queue {
++	atomic_t ref;
++	struct bfq_data *bfqd;
++
++	unsigned short ioprio, new_ioprio;
++	unsigned short ioprio_class, new_ioprio_class;
++
++	/* fields for cooperating queues handling */
++	struct bfq_queue *new_bfqq;
++	struct rb_node pos_node;
++	struct rb_root *pos_root;
++
++	struct rb_root sort_list;
++	struct request *next_rq;
++	int queued[2];
++	int allocated[2];
++	int meta_pending;
++	struct list_head fifo;
++
++	struct bfq_entity entity;
++
++	int max_budget;
++	unsigned long budget_timeout;
++
++	int dispatched;
++
++	unsigned int flags;
++
++	struct list_head bfqq_list;
++
++	struct hlist_node burst_list_node;
++
++	unsigned int seek_samples;
++	u64 seek_total;
++	sector_t seek_mean;
++	sector_t last_request_pos;
++
++	unsigned int requests_within_timer;
++
++	pid_t pid;
++	struct bfq_io_cq *bic;
++
++	/* weight-raising fields */
++	unsigned long wr_cur_max_time;
++	unsigned long soft_rt_next_start;
++	unsigned long last_wr_start_finish;
++	unsigned int wr_coeff;
++	unsigned long last_idle_bklogged;
++	unsigned long service_from_backlogged;
++};
++
++/**
++ * struct bfq_ttime - per process thinktime stats.
++ * @ttime_total: total process thinktime
++ * @ttime_samples: number of thinktime samples
++ * @ttime_mean: average process thinktime
++ */
++struct bfq_ttime {
++	unsigned long last_end_request;
++
++	unsigned long ttime_total;
++	unsigned long ttime_samples;
++	unsigned long ttime_mean;
++};
++
++/**
++ * struct bfq_io_cq - per (request_queue, io_context) structure.
++ * @icq: associated io_cq structure
++ * @bfqq: array of two process queues, the sync and the async
++ * @ttime: associated @bfq_ttime struct
++ * @ioprio: per (request_queue, blkcg) ioprio.
++ * @blkcg_id: id of the blkcg the related io_cq belongs to.
++ */
++struct bfq_io_cq {
++	struct io_cq icq; /* must be the first member */
++	struct bfq_queue *bfqq[2];
++	struct bfq_ttime ttime;
++	int ioprio;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	uint64_t blkcg_id; /* the current blkcg ID */
++#endif
++};
++
++enum bfq_device_speed {
++	BFQ_BFQD_FAST,
++	BFQ_BFQD_SLOW,
++};
++
++/**
++ * struct bfq_data - per device data structure.
++ * @queue: request queue for the managed device.
++ * @root_group: root bfq_group for the device.
++ * @active_numerous_groups: number of bfq_groups containing more than one
++ *                          active @bfq_entity.
++ * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by
++ *                      weight. Used to keep track of whether all @bfq_queues
++ *                     have the same weight. The tree contains one counter
++ *                     for each distinct weight associated to some active
++ *                     and not weight-raised @bfq_queue (see the comments to
++ *                      the functions bfq_weights_tree_[add|remove] for
++ *                     further details).
++ * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted
++ *                      by weight. Used to keep track of whether all
++ *                     @bfq_groups have the same weight. The tree contains
++ *                     one counter for each distinct weight associated to
++ *                     some active @bfq_group (see the comments to the
++ *                     functions bfq_weights_tree_[add|remove] for further
++ *                     details).
++ * @busy_queues: number of bfq_queues containing requests (including the
++ *		 queue in service, even if it is idling).
++ * @busy_in_flight_queues: number of @bfq_queues containing pending or
++ *                         in-flight requests, plus the @bfq_queue in
++ *                         service, even if idle but waiting for the
++ *                         possible arrival of its next sync request. This
++ *                         field is updated only if the device is rotational,
++ *                         but used only if the device is also NCQ-capable.
++ *                         The reason why the field is updated also for non-
++ *                         NCQ-capable rotational devices is related to the
++ *                         fact that the value of @hw_tag may be set also
++ *                         later than when busy_in_flight_queues may need to
++ *                         be incremented for the first time(s). Taking also
++ *                         this possibility into account, to avoid unbalanced
++ *                         increments/decrements, would imply more overhead
++ *                         than just updating busy_in_flight_queues
++ *                         regardless of the value of @hw_tag.
++ * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues
++ *                                     (that is, seeky queues that expired
++ *                                     for budget timeout at least once)
++ *                                     containing pending or in-flight
++ *                                     requests, including the in-service
++ *                                     @bfq_queue if constantly seeky. This
++ *                                     field is updated only if the device
++ *                                     is rotational, but used only if the
++ *                                     device is also NCQ-capable (see the
++ *                                     comments to @busy_in_flight_queues).
++ * @wr_busy_queues: number of weight-raised busy @bfq_queues.
++ * @queued: number of queued requests.
++ * @rq_in_driver: number of requests dispatched and waiting for completion.
++ * @sync_flight: number of sync requests in the driver.
++ * @max_rq_in_driver: max number of reqs in driver in the last
++ *                    @hw_tag_samples completed requests.
++ * @hw_tag_samples: nr of samples used to calculate hw_tag.
++ * @hw_tag: flag set to one if the driver is showing a queueing behavior.
++ * @budgets_assigned: number of budgets assigned.
++ * @idle_slice_timer: timer set when idling for the next sequential request
++ *                    from the queue in service.
++ * @unplug_work: delayed work to restart dispatching on the request queue.
++ * @in_service_queue: bfq_queue in service.
++ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue.
++ * @last_position: on-disk position of the last served request.
++ * @last_budget_start: beginning of the last budget.
++ * @last_idling_start: beginning of the last idle slice.
++ * @peak_rate: peak transfer rate observed for a budget.
++ * @peak_rate_samples: number of samples used to calculate @peak_rate.
++ * @bfq_max_budget: maximum budget allotted to a bfq_queue before
++ *                  rescheduling.
++ * @active_list: list of all the bfq_queues active on the device.
++ * @idle_list: list of all the bfq_queues idle on the device.
++ * @bfq_fifo_expire: timeout for async/sync requests; when it expires
++ *                   requests are served in fifo order.
++ * @bfq_back_penalty: weight of backward seeks wrt forward ones.
++ * @bfq_back_max: maximum allowed backward seek.
++ * @bfq_slice_idle: maximum idling time.
++ * @bfq_user_max_budget: user-configured max budget value
++ *                       (0 for auto-tuning).
++ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
++ *                           async queues.
++ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
++ *               to prevent seeky queues to impose long latencies to well
++ *               behaved ones (this also implies that seeky queues cannot
++ *               receive guarantees in the service domain; after a timeout
++ *               they are charged for the whole allocated budget, to try
++ *               to preserve a behavior reasonably fair among them, but
++ *               without service-domain guarantees).
++ * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is
++ *                   no more granted any weight-raising.
++ * @bfq_failed_cooperations: number of consecutive failed cooperation
++ *                           chances after which weight-raising is restored
++ *                           to a queue subject to more than bfq_coop_thresh
++ *                           queue merges.
++ * @bfq_requests_within_timer: number of consecutive requests that must be
++ *                             issued within the idle time slice to set
++ *                             again idling to a queue which was marked as
++ *                             non-I/O-bound (see the definition of the
++ *                             IO_bound flag for further details).
++ * @last_ins_in_burst: last time at which a queue entered the current
++ *                     burst of queues being activated shortly after
++ *                     each other; for more details about this and the
++ *                     following parameters related to a burst of
++ *                     activations, see the comments to the function
++ *                     @bfq_handle_burst.
++ * @bfq_burst_interval: reference time interval used to decide whether a
++ *                      queue has been activated shortly after
++ *                      @last_ins_in_burst.
++ * @burst_size: number of queues in the current burst of queue activations.
++ * @bfq_large_burst_thresh: maximum burst size above which the current
++ * 			    queue-activation burst is deemed as 'large'.
++ * @large_burst: true if a large queue-activation burst is in progress.
++ * @burst_list: head of the burst list (as for the above fields, more details
++ * 		in the comments to the function bfq_handle_burst).
++ * @low_latency: if set to true, low-latency heuristics are enabled.
++ * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised
++ *                queue is multiplied.
++ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies).
++ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes.
++ * @bfq_wr_min_idle_time: minimum idle period after which weight-raising
++ *			  may be reactivated for a queue (in jiffies).
++ * @bfq_wr_min_inter_arr_async: minimum period between request arrivals
++ *				after which weight-raising may be
++ *				reactivated for an already busy queue
++ *				(in jiffies).
++ * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue,
++ *			    sectors per seconds.
++ * @RT_prod: cached value of the product R*T used for computing the maximum
++ *	     duration of the weight raising automatically.
++ * @device_speed: device-speed class for the low-latency heuristic.
++ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions.
++ *
++ * All the fields are protected by the @queue lock.
++ */
++struct bfq_data {
++	struct request_queue *queue;
++
++	struct bfq_group *root_group;
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	int active_numerous_groups;
++#endif
++
++	struct rb_root queue_weights_tree;
++	struct rb_root group_weights_tree;
++
++	int busy_queues;
++	int busy_in_flight_queues;
++	int const_seeky_busy_in_flight_queues;
++	int wr_busy_queues;
++	int queued;
++	int rq_in_driver;
++	int sync_flight;
++
++	int max_rq_in_driver;
++	int hw_tag_samples;
++	int hw_tag;
++
++	int budgets_assigned;
++
++	struct timer_list idle_slice_timer;
++	struct work_struct unplug_work;
++
++	struct bfq_queue *in_service_queue;
++	struct bfq_io_cq *in_service_bic;
++
++	sector_t last_position;
++
++	ktime_t last_budget_start;
++	ktime_t last_idling_start;
++	int peak_rate_samples;
++	u64 peak_rate;
++	int bfq_max_budget;
++
++	struct list_head active_list;
++	struct list_head idle_list;
++
++	unsigned int bfq_fifo_expire[2];
++	unsigned int bfq_back_penalty;
++	unsigned int bfq_back_max;
++	unsigned int bfq_slice_idle;
++	u64 bfq_class_idle_last_service;
++
++	int bfq_user_max_budget;
++	int bfq_max_budget_async_rq;
++	unsigned int bfq_timeout[2];
++
++	unsigned int bfq_coop_thresh;
++	unsigned int bfq_failed_cooperations;
++	unsigned int bfq_requests_within_timer;
++
++	unsigned long last_ins_in_burst;
++	unsigned long bfq_burst_interval;
++	int burst_size;
++	unsigned long bfq_large_burst_thresh;
++	bool large_burst;
++	struct hlist_head burst_list;
++
++	bool low_latency;
++
++	/* parameters of the low_latency heuristics */
++	unsigned int bfq_wr_coeff;
++	unsigned int bfq_wr_max_time;
++	unsigned int bfq_wr_rt_max_time;
++	unsigned int bfq_wr_min_idle_time;
++	unsigned long bfq_wr_min_inter_arr_async;
++	unsigned int bfq_wr_max_softrt_rate;
++	u64 RT_prod;
++	enum bfq_device_speed device_speed;
++
++	struct bfq_queue oom_bfqq;
++};
++
++enum bfqq_state_flags {
++	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is in service */
++	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
++	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
++	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
++	BFQ_BFQQ_FLAG_idle_window,	/* slice idling enabled */
++	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
++	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */
++	BFQ_BFQQ_FLAG_IO_bound,		/*
++					 * bfqq has timed-out at least once
++					 * having consumed at most 2/10 of
++					 * its budget
++					 */
++	BFQ_BFQQ_FLAG_in_large_burst,	/*
++					 * bfqq activated in a large burst,
++					 * see comments to bfq_handle_burst.
++					 */
++	BFQ_BFQQ_FLAG_constantly_seeky,	/*
++					 * bfqq has proved to be slow and
++					 * seeky until budget timeout
++					 */
++	BFQ_BFQQ_FLAG_softrt_update,	/*
++					 * may need softrt-next-start
++					 * update
++					 */
++};
++
++#define BFQ_BFQQ_FNS(name)						\
++static void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\
++{									\
++	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\
++}									\
++static void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)		\
++{									\
++	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\
++}									\
++static int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\
++{									\
++	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\
++}
++
++BFQ_BFQQ_FNS(busy);
++BFQ_BFQQ_FNS(wait_request);
++BFQ_BFQQ_FNS(must_alloc);
++BFQ_BFQQ_FNS(fifo_expire);
++BFQ_BFQQ_FNS(idle_window);
++BFQ_BFQQ_FNS(sync);
++BFQ_BFQQ_FNS(budget_new);
++BFQ_BFQQ_FNS(IO_bound);
++BFQ_BFQQ_FNS(in_large_burst);
++BFQ_BFQQ_FNS(constantly_seeky);
++BFQ_BFQQ_FNS(softrt_update);
++#undef BFQ_BFQQ_FNS
++
++/* Logging facilities. */
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
++	blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)
++
++#define bfq_log(bfqd, fmt, args...) \
++	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
++
++/* Expiration reasons. */
++enum bfqq_expiration {
++	BFQ_BFQQ_TOO_IDLE = 0,		/*
++					 * queue has been idling for
++					 * too long
++					 */
++	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
++	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
++	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
++};
++
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++
++struct bfqg_stats {
++	/* total bytes transferred */
++	struct blkg_rwstat		service_bytes;
++	/* total IOs serviced, post merge */
++	struct blkg_rwstat		serviced;
++	/* number of ios merged */
++	struct blkg_rwstat		merged;
++	/* total time spent on device in ns, may not be accurate w/ queueing */
++	struct blkg_rwstat		service_time;
++	/* total time spent waiting in scheduler queue in ns */
++	struct blkg_rwstat		wait_time;
++	/* number of IOs queued up */
++	struct blkg_rwstat		queued;
++	/* total sectors transferred */
++	struct blkg_stat		sectors;
++	/* total disk time and nr sectors dispatched by this group */
++	struct blkg_stat		time;
++	/* time not charged to this cgroup */
++	struct blkg_stat		unaccounted_time;
++	/* sum of number of ios queued across all samples */
++	struct blkg_stat		avg_queue_size_sum;
++	/* count of samples taken for average */
++	struct blkg_stat		avg_queue_size_samples;
++	/* how many times this group has been removed from service tree */
++	struct blkg_stat		dequeue;
++	/* total time spent waiting for it to be assigned a timeslice. */
++	struct blkg_stat		group_wait_time;
++	/* time spent idling for this blkcg_gq */
++	struct blkg_stat		idle_time;
++	/* total time with empty current active q with other requests queued */
++	struct blkg_stat		empty_time;
++	/* fields after this shouldn't be cleared on stat reset */
++	uint64_t			start_group_wait_time;
++	uint64_t			start_idle_time;
++	uint64_t			start_empty_time;
++	uint16_t			flags;
++};
++
++/*
++ * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
++ *
++ * @ps: @blkcg_policy_storage that this structure inherits
++ * @weight: weight of the bfq_group
++ */
++struct bfq_group_data {
++	/* must be the first member */
++	struct blkcg_policy_data pd;
++
++	unsigned short weight;
++};
++
++/**
++ * struct bfq_group - per (device, cgroup) data structure.
++ * @entity: schedulable entity to insert into the parent group sched_data.
++ * @sched_data: own sched_data, to contain child entities (they may be
++ *              both bfq_queues and bfq_groups).
++ * @bfqd: the bfq_data for the device this group acts upon.
++ * @async_bfqq: array of async queues for all the tasks belonging to
++ *              the group, one queue per ioprio value per ioprio_class,
++ *              except for the idle class that has only one queue.
++ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
++ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
++ *             to avoid too many special cases during group creation/
++ *             migration.
++ * @active_entities: number of active entities belonging to the group;
++ *                   unused for the root group. Used to know whether there
++ *                   are groups with more than one active @bfq_entity
++ *                   (see the comments to the function
++ *                   bfq_bfqq_must_not_expire()).
++ *
++ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
++ * there is a set of bfq_groups, each one collecting the lower-level
++ * entities belonging to the group that are acting on the same device.
++ *
++ * Locking works as follows:
++ *    o @bfqd is protected by the queue lock, RCU is used to access it
++ *      from the readers.
++ *    o All the other fields are protected by the @bfqd queue lock.
++ */
++struct bfq_group {
++	/* must be the first member */
++	struct blkg_policy_data pd;
++
++	struct bfq_entity entity;
++	struct bfq_sched_data sched_data;
++
++	void *bfqd;
++
++	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
++	struct bfq_queue *async_idle_bfqq;
++
++	struct bfq_entity *my_entity;
++
++	int active_entities;
++
++	struct bfqg_stats stats;
++	struct bfqg_stats dead_stats;	/* stats pushed from dead children */
++};
++
++#else
++struct bfq_group {
++	struct bfq_sched_data sched_data;
++
++	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
++	struct bfq_queue *async_idle_bfqq;
++};
++#endif
++
++static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
++
++static struct bfq_service_tree *
++bfq_entity_service_tree(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sched_data = entity->sched_data;
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
++				  BFQ_DEFAULT_GRP_CLASS;
++
++	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
++	BUG_ON(sched_data == NULL);
++
++	return sched_data->service_tree + idx;
++}
++
++static struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
++{
++	return bic->bfqq[is_sync];
++}
++
++static void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq,
++			 bool is_sync)
++{
++	bic->bfqq[is_sync] = bfqq;
++}
++
++static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
++{
++	return bic->icq.q->elevator->elevator_data;
++}
++
++/**
++ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
++ * @ptr: a pointer to a bfqd.
++ * @flags: storage for the flags to be saved.
++ *
++ * This function allows bfqg->bfqd to be protected by the
++ * queue lock of the bfqd they reference; the pointer is dereferenced
++ * under RCU, so the storage for bfqd is assured to be safe as long
++ * as the RCU read side critical section does not end.  After the
++ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
++ * sure that no other writer accessed it.  If we raced with a writer,
++ * the function returns NULL, with the queue unlocked, otherwise it
++ * returns the dereferenced pointer, with the queue locked.
++ */
++static struct bfq_data *bfq_get_bfqd_locked(void **ptr, unsigned long *flags)
++{
++	struct bfq_data *bfqd;
++
++	rcu_read_lock();
++	bfqd = rcu_dereference(*(struct bfq_data **)ptr);
++
++	if (bfqd != NULL) {
++		spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
++		if (ptr == NULL)
++			printk(KERN_CRIT "get_bfqd_locked pointer NULL\n");
++		else if (*ptr == bfqd)
++			goto out;
++		spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
++	}
++
++	bfqd = NULL;
++out:
++	rcu_read_unlock();
++	return bfqd;
++}
++
++static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags)
++{
++	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
++}
++
++static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
++static void bfq_put_queue(struct bfq_queue *bfqq);
++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
++				       struct bio *bio, int is_sync,
++				       struct bfq_io_cq *bic, gfp_t gfp_mask);
++static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
++				    struct bfq_group *bfqg);
++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
++
++#endif /* _BFQ_H */
+-- 
+1.9.1
+

diff --git a/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.6.patch b/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.6.patch
new file mode 100644
index 0000000..3a1c06d
--- /dev/null
+++ b/5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.6.patch
@@ -0,0 +1,1101 @@
+From c41c6d65f72971e565e7980db2c34224ca06fffd Mon Sep 17 00:00:00 2001
+From: Mauro Andreolini <mauro.andreolini@unimore.it>
+Date: Sun, 6 Sep 2015 16:09:05 +0200
+Subject: [PATCH 3/4] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r11 for
+ 4.6.0
+
+A set of processes may happen  to  perform interleaved reads, i.e.,requests
+whose union would give rise to a  sequential read  pattern.  There are two
+typical  cases: in the first  case,   processes  read  fixed-size chunks of
+data at a fixed distance from each other, while in the second case processes
+may read variable-size chunks at  variable distances. The latter case occurs
+for  example with  QEMU, which  splits the  I/O generated  by the  guest into
+multiple chunks,  and lets these chunks  be served by a  pool of cooperating
+processes,  iteratively  assigning  the  next  chunk of  I/O  to  the first
+available  process. CFQ  uses actual  queue merging  for the  first type of
+rocesses, whereas it  uses preemption to get a sequential  read pattern out
+of the read requests  performed by the second type of  processes. In the end
+it uses  two different  mechanisms to  achieve the  same goal: boosting the
+throughput with interleaved I/O.
+
+This patch introduces  Early Queue Merge (EQM), a unified mechanism to get a
+sequential  read pattern  with both  types of  processes. The  main idea is
+checking newly arrived requests against the next request of the active queue
+both in case of actual request insert and in case of request merge. By doing
+so, both the types of processes can be handled by just merging their queues.
+EQM is  then simpler and  more compact than the  pair of mechanisms used in
+CFQ.
+
+Finally, EQM  also preserves the  typical low-latency properties of BFQ, by
+properly restoring the weight-raising state of a queue when it gets back to
+a non-merged state.
+
+Signed-off-by: Mauro Andreolini <mauro.andreolini@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini@google.com>
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+---
+ block/bfq-cgroup.c  |   4 +
+ block/bfq-iosched.c | 687 ++++++++++++++++++++++++++++++++++++++++++++++++++--
+ block/bfq.h         |  66 +++++
+ 3 files changed, 743 insertions(+), 14 deletions(-)
+
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index 8610cd6..5ee99ec 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -437,6 +437,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
+ 				   */
+ 	bfqg->bfqd = bfqd;
+ 	bfqg->active_entities = 0;
++	bfqg->rq_pos_tree = RB_ROOT;
+ }
+ 
+ static void bfq_pd_free(struct blkg_policy_data *pd)
+@@ -530,6 +531,8 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+ 	return bfqg;
+ }
+ 
++static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
++
+ /**
+  * bfq_bfqq_move - migrate @bfqq to @bfqg.
+  * @bfqd: queue descriptor.
+@@ -577,6 +580,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	bfqg_get(bfqg);
+ 
+ 	if (busy) {
++		bfq_pos_tree_add_move(bfqd, bfqq);
+ 		if (resume)
+ 			bfq_activate_bfqq(bfqd, bfqq);
+ 	}
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index f9787a6..d1f648d 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -296,6 +296,72 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
+ 	}
+ }
+ 
++static struct bfq_queue *
++bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
++		     sector_t sector, struct rb_node **ret_parent,
++		     struct rb_node ***rb_link)
++{
++	struct rb_node **p, *parent;
++	struct bfq_queue *bfqq = NULL;
++
++	parent = NULL;
++	p = &root->rb_node;
++	while (*p) {
++		struct rb_node **n;
++
++		parent = *p;
++		bfqq = rb_entry(parent, struct bfq_queue, pos_node);
++
++		/*
++		 * Sort strictly based on sector. Smallest to the left,
++		 * largest to the right.
++		 */
++		if (sector > blk_rq_pos(bfqq->next_rq))
++			n = &(*p)->rb_right;
++		else if (sector < blk_rq_pos(bfqq->next_rq))
++			n = &(*p)->rb_left;
++		else
++			break;
++		p = n;
++		bfqq = NULL;
++	}
++
++	*ret_parent = parent;
++	if (rb_link)
++		*rb_link = p;
++
++	bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
++		(long long unsigned)sector,
++		bfqq ? bfqq->pid : 0);
++
++	return bfqq;
++}
++
++static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	struct rb_node **p, *parent;
++	struct bfq_queue *__bfqq;
++
++	if (bfqq->pos_root) {
++		rb_erase(&bfqq->pos_node, bfqq->pos_root);
++		bfqq->pos_root = NULL;
++	}
++
++	if (bfq_class_idle(bfqq))
++		return;
++	if (!bfqq->next_rq)
++		return;
++
++	bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
++	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
++			blk_rq_pos(bfqq->next_rq), &parent, &p);
++	if (!__bfqq) {
++		rb_link_node(&bfqq->pos_node, parent, p);
++		rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
++	} else
++		bfqq->pos_root = NULL;
++}
++
+ /*
+  * Tell whether there are active queues or groups with differentiated weights.
+  */
+@@ -528,6 +594,57 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+ 	return dur;
+ }
+ 
++static unsigned bfq_bfqq_cooperations(struct bfq_queue *bfqq)
++{
++	return bfqq->bic ? bfqq->bic->cooperations : 0;
++}
++
++static void
++bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
++{
++	if (bic->saved_idle_window)
++		bfq_mark_bfqq_idle_window(bfqq);
++	else
++		bfq_clear_bfqq_idle_window(bfqq);
++	if (bic->saved_IO_bound)
++		bfq_mark_bfqq_IO_bound(bfqq);
++	else
++		bfq_clear_bfqq_IO_bound(bfqq);
++	/* Assuming that the flag in_large_burst is already correctly set */
++	if (bic->wr_time_left && bfqq->bfqd->low_latency &&
++	    !bfq_bfqq_in_large_burst(bfqq) &&
++	    bic->cooperations < bfqq->bfqd->bfq_coop_thresh) {
++		/*
++		 * Start a weight raising period with the duration given by
++		 * the raising_time_left snapshot.
++		 */
++		if (bfq_bfqq_busy(bfqq))
++			bfqq->bfqd->wr_busy_queues++;
++		bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff;
++		bfqq->wr_cur_max_time = bic->wr_time_left;
++		bfqq->last_wr_start_finish = jiffies;
++		bfqq->entity.prio_changed = 1;
++	}
++	/*
++	 * Clear wr_time_left to prevent bfq_bfqq_save_state() from
++	 * getting confused about the queue's need of a weight-raising
++	 * period.
++	 */
++	bic->wr_time_left = 0;
++}
++
++static int bfqq_process_refs(struct bfq_queue *bfqq)
++{
++	int process_refs, io_refs;
++
++	lockdep_assert_held(bfqq->bfqd->queue->queue_lock);
++
++	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
++	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
++	BUG_ON(process_refs < 0);
++	return process_refs;
++}
++
+ /* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */
+ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+@@ -764,8 +881,14 @@ static void bfq_add_request(struct request *rq)
+ 	BUG_ON(!next_rq);
+ 	bfqq->next_rq = next_rq;
+ 
++	/*
++	 * Adjust priority tree position, if next_rq changes.
++	 */
++	if (prev != bfqq->next_rq)
++		bfq_pos_tree_add_move(bfqd, bfqq);
++
+ 	if (!bfq_bfqq_busy(bfqq)) {
+-		bool soft_rt, in_burst,
++		bool soft_rt, coop_or_in_burst,
+ 		     idle_for_long_time = time_is_before_jiffies(
+ 						bfqq->budget_timeout +
+ 						bfqd->bfq_wr_min_idle_time);
+@@ -793,11 +916,12 @@ static void bfq_add_request(struct request *rq)
+ 				bfqd->last_ins_in_burst = jiffies;
+ 		}
+ 
+-		in_burst = bfq_bfqq_in_large_burst(bfqq);
++		coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) ||
++			bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh;
+ 		soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+-			!in_burst &&
++			!coop_or_in_burst &&
+ 			time_is_before_jiffies(bfqq->soft_rt_next_start);
+-		interactive = !in_burst && idle_for_long_time;
++		interactive = !coop_or_in_burst && idle_for_long_time;
+ 		entity->budget = max_t(unsigned long, bfqq->max_budget,
+ 				       bfq_serv_to_charge(next_rq, bfqq));
+ 
+@@ -816,6 +940,9 @@ static void bfq_add_request(struct request *rq)
+ 		if (!bfqd->low_latency)
+ 			goto add_bfqq_busy;
+ 
++		if (bfq_bfqq_just_split(bfqq))
++			goto set_prio_changed;
++
+ 		/*
+ 		 * If the queue:
+ 		 * - is not being boosted,
+@@ -840,7 +967,7 @@ static void bfq_add_request(struct request *rq)
+ 		} else if (old_wr_coeff > 1) {
+ 			if (interactive)
+ 				bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+-			else if (in_burst ||
++			else if (coop_or_in_burst ||
+ 				 (bfqq->wr_cur_max_time ==
+ 				  bfqd->bfq_wr_rt_max_time &&
+ 				  !soft_rt)) {
+@@ -905,6 +1032,7 @@ static void bfq_add_request(struct request *rq)
+ 					bfqd->bfq_wr_rt_max_time;
+ 			}
+ 		}
++set_prio_changed:
+ 		if (old_wr_coeff != bfqq->wr_coeff)
+ 			entity->prio_changed = 1;
+ add_bfqq_busy:
+@@ -1047,6 +1175,15 @@ static void bfq_merged_request(struct request_queue *q, struct request *req,
+ 					 bfqd->last_position);
+ 		BUG_ON(!next_rq);
+ 		bfqq->next_rq = next_rq;
++		/*
++		 * If next_rq changes, update both the queue's budget to
++		 * fit the new request and the queue's position in its
++		 * rq_pos_tree.
++		 */
++		if (prev != bfqq->next_rq) {
++			bfq_updated_next_req(bfqd, bfqq);
++			bfq_pos_tree_add_move(bfqd, bfqq);
++		}
+ 	}
+ }
+ 
+@@ -1129,11 +1266,346 @@ static void bfq_end_wr(struct bfq_data *bfqd)
+ 	spin_unlock_irq(bfqd->queue->queue_lock);
+ }
+ 
++static sector_t bfq_io_struct_pos(void *io_struct, bool request)
++{
++	if (request)
++		return blk_rq_pos(io_struct);
++	else
++		return ((struct bio *)io_struct)->bi_iter.bi_sector;
++}
++
++static int bfq_rq_close_to_sector(void *io_struct, bool request,
++				  sector_t sector)
++{
++	return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
++	       BFQQ_SEEK_THR;
++}
++
++static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
++					 struct bfq_queue *bfqq,
++					 sector_t sector)
++{
++	struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
++	struct rb_node *parent, *node;
++	struct bfq_queue *__bfqq;
++
++	if (RB_EMPTY_ROOT(root))
++		return NULL;
++
++	/*
++	 * First, if we find a request starting at the end of the last
++	 * request, choose it.
++	 */
++	__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
++	if (__bfqq)
++		return __bfqq;
++
++	/*
++	 * If the exact sector wasn't found, the parent of the NULL leaf
++	 * will contain the closest sector (rq_pos_tree sorted by
++	 * next_request position).
++	 */
++	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);
++	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
++		return __bfqq;
++
++	if (blk_rq_pos(__bfqq->next_rq) < sector)
++		node = rb_next(&__bfqq->pos_node);
++	else
++		node = rb_prev(&__bfqq->pos_node);
++	if (!node)
++		return NULL;
++
++	__bfqq = rb_entry(node, struct bfq_queue, pos_node);
++	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
++		return __bfqq;
++
++	return NULL;
++}
++
++static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd,
++						   struct bfq_queue *cur_bfqq,
++						   sector_t sector)
++{
++	struct bfq_queue *bfqq;
++
++	/*
++	 * We shall notice if some of the queues are cooperating,
++	 * e.g., working closely on the same area of the device. In
++	 * that case, we can group them together and: 1) don't waste
++	 * time idling, and 2) serve the union of their requests in
++	 * the best possible order for throughput.
++	 */
++	bfqq = bfqq_find_close(bfqd, cur_bfqq, sector);
++	if (!bfqq || bfqq == cur_bfqq)
++		return NULL;
++
++	return bfqq;
++}
++
++static struct bfq_queue *
++bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
++{
++	int process_refs, new_process_refs;
++	struct bfq_queue *__bfqq;
++
++	/*
++	 * If there are no process references on the new_bfqq, then it is
++	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
++	 * may have dropped their last reference (not just their last process
++	 * reference).
++	 */
++	if (!bfqq_process_refs(new_bfqq))
++		return NULL;
++
++	/* Avoid a circular list and skip interim queue merges. */
++	while ((__bfqq = new_bfqq->new_bfqq)) {
++		if (__bfqq == bfqq)
++			return NULL;
++		new_bfqq = __bfqq;
++	}
++
++	process_refs = bfqq_process_refs(bfqq);
++	new_process_refs = bfqq_process_refs(new_bfqq);
++	/*
++	 * If the process for the bfqq has gone away, there is no
++	 * sense in merging the queues.
++	 */
++	if (process_refs == 0 || new_process_refs == 0)
++		return NULL;
++
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
++		new_bfqq->pid);
++
++	/*
++	 * Merging is just a redirection: the requests of the process
++	 * owning one of the two queues are redirected to the other queue.
++	 * The latter queue, in its turn, is set as shared if this is the
++	 * first time that the requests of some process are redirected to
++	 * it.
++	 *
++	 * We redirect bfqq to new_bfqq and not the opposite, because we
++	 * are in the context of the process owning bfqq, hence we have
++	 * the io_cq of this process. So we can immediately configure this
++	 * io_cq to redirect the requests of the process to new_bfqq.
++	 *
++	 * NOTE, even if new_bfqq coincides with the in-service queue, the
++	 * io_cq of new_bfqq is not available, because, if the in-service
++	 * queue is shared, bfqd->in_service_bic may not point to the
++	 * io_cq of the in-service queue.
++	 * Redirecting the requests of the process owning bfqq to the
++	 * currently in-service queue is in any case the best option, as
++	 * we feed the in-service queue with new requests close to the
++	 * last request served and, by doing so, hopefully increase the
++	 * throughput.
++	 */
++	bfqq->new_bfqq = new_bfqq;
++	atomic_add(process_refs, &new_bfqq->ref);
++	return new_bfqq;
++}
++
++static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
++					struct bfq_queue *new_bfqq)
++{
++	if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) ||
++	    (bfqq->ioprio_class != new_bfqq->ioprio_class))
++		return false;
++
++	/*
++	 * If either of the queues has already been detected as seeky,
++	 * then merging it with the other queue is unlikely to lead to
++	 * sequential I/O.
++	 */
++	if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq))
++		return false;
++
++	/*
++	 * Interleaved I/O is known to be done by (some) applications
++	 * only for reads, so it does not make sense to merge async
++	 * queues.
++	 */
++	if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq))
++		return false;
++
++	return true;
++}
++
++/*
++ * Attempt to schedule a merge of bfqq with the currently in-service queue
++ * or with a close queue among the scheduled queues.
++ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
++ * structure otherwise.
++ *
++ * The OOM queue is not allowed to participate to cooperation: in fact, since
++ * the requests temporarily redirected to the OOM queue could be redirected
++ * again to dedicated queues at any time, the state needed to correctly
++ * handle merging with the OOM queue would be quite complex and expensive
++ * to maintain. Besides, in such a critical condition as an out of memory,
++ * the benefits of queue merging may be little relevant, or even negligible.
++ */
++static struct bfq_queue *
++bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++		     void *io_struct, bool request)
++{
++	struct bfq_queue *in_service_bfqq, *new_bfqq;
++
++	if (bfqq->new_bfqq)
++		return bfqq->new_bfqq;
++	if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
++		return NULL;
++	/* If device has only one backlogged bfq_queue, don't search. */
++	if (bfqd->busy_queues == 1)
++		return NULL;
++
++	in_service_bfqq = bfqd->in_service_queue;
++
++	if (!in_service_bfqq || in_service_bfqq == bfqq ||
++	    !bfqd->in_service_bic ||
++	    unlikely(in_service_bfqq == &bfqd->oom_bfqq))
++		goto check_scheduled;
++
++	if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
++	    bfqq->entity.parent == in_service_bfqq->entity.parent &&
++	    bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) {
++		new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
++		if (new_bfqq)
++			return new_bfqq;
++	}
++	/*
++	 * Check whether there is a cooperator among currently scheduled
++	 * queues. The only thing we need is that the bio/request is not
++	 * NULL, as we need it to establish whether a cooperator exists.
++	 */
++check_scheduled:
++	new_bfqq = bfq_find_close_cooperator(bfqd, bfqq,
++			bfq_io_struct_pos(io_struct, request));
++
++	BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent);
++
++	if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) &&
++	    bfq_may_be_close_cooperator(bfqq, new_bfqq))
++		return bfq_setup_merge(bfqq, new_bfqq);
++
++	return NULL;
++}
++
++static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
++{
++	/*
++	 * If !bfqq->bic, the queue is already shared or its requests
++	 * have already been redirected to a shared queue; both idle window
++	 * and weight raising state have already been saved. Do nothing.
++	 */
++	if (!bfqq->bic)
++		return;
++	if (bfqq->bic->wr_time_left)
++		/*
++		 * This is the queue of a just-started process, and would
++		 * deserve weight raising: we set wr_time_left to the full
++		 * weight-raising duration to trigger weight-raising when
++		 * and if the queue is split and the first request of the
++		 * queue is enqueued.
++		 */
++		bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd);
++	else if (bfqq->wr_coeff > 1) {
++		unsigned long wr_duration =
++			jiffies - bfqq->last_wr_start_finish;
++		/*
++		 * It may happen that a queue's weight raising period lasts
++		 * longer than its wr_cur_max_time, as weight raising is
++		 * handled only when a request is enqueued or dispatched (it
++		 * does not use any timer). If the weight raising period is
++		 * about to end, don't save it.
++		 */
++		if (bfqq->wr_cur_max_time <= wr_duration)
++			bfqq->bic->wr_time_left = 0;
++		else
++			bfqq->bic->wr_time_left =
++				bfqq->wr_cur_max_time - wr_duration;
++		/*
++		 * The bfq_queue is becoming shared or the requests of the
++		 * process owning the queue are being redirected to a shared
++		 * queue. Stop the weight raising period of the queue, as in
++		 * both cases it should not be owned by an interactive or
++		 * soft real-time application.
++		 */
++		bfq_bfqq_end_wr(bfqq);
++	} else
++		bfqq->bic->wr_time_left = 0;
++	bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
++	bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
++	bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
++	bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
++	bfqq->bic->cooperations++;
++	bfqq->bic->failed_cooperations = 0;
++}
++
++static void bfq_get_bic_reference(struct bfq_queue *bfqq)
++{
++	/*
++	 * If bfqq->bic has a non-NULL value, the bic to which it belongs
++	 * is about to begin using a shared bfq_queue.
++	 */
++	if (bfqq->bic)
++		atomic_long_inc(&bfqq->bic->icq.ioc->refcount);
++}
++
++static void
++bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
++		struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
++{
++	bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
++		(long unsigned)new_bfqq->pid);
++	/* Save weight raising and idle window of the merged queues */
++	bfq_bfqq_save_state(bfqq);
++	bfq_bfqq_save_state(new_bfqq);
++	if (bfq_bfqq_IO_bound(bfqq))
++		bfq_mark_bfqq_IO_bound(new_bfqq);
++	bfq_clear_bfqq_IO_bound(bfqq);
++	/*
++	 * Grab a reference to the bic, to prevent it from being destroyed
++	 * before being possibly touched by a bfq_split_bfqq().
++	 */
++	bfq_get_bic_reference(bfqq);
++	bfq_get_bic_reference(new_bfqq);
++	/*
++	 * Merge queues (that is, let bic redirect its requests to new_bfqq)
++	 */
++	bic_set_bfqq(bic, new_bfqq, 1);
++	bfq_mark_bfqq_coop(new_bfqq);
++	/*
++	 * new_bfqq now belongs to at least two bics (it is a shared queue):
++	 * set new_bfqq->bic to NULL. bfqq either:
++	 * - does not belong to any bic any more, and hence bfqq->bic must
++	 *   be set to NULL, or
++	 * - is a queue whose owning bics have already been redirected to a
++	 *   different queue, hence the queue is destined to not belong to
++	 *   any bic soon and bfqq->bic is already NULL (therefore the next
++	 *   assignment causes no harm).
++	 */
++	new_bfqq->bic = NULL;
++	bfqq->bic = NULL;
++	bfq_put_queue(bfqq);
++}
++
++static void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
++{
++	struct bfq_io_cq *bic = bfqq->bic;
++	struct bfq_data *bfqd = bfqq->bfqd;
++
++	if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) {
++		bic->failed_cooperations++;
++		if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations)
++			bic->cooperations = 0;
++	}
++}
++
+ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+ 			   struct bio *bio)
+ {
+ 	struct bfq_data *bfqd = q->elevator->elevator_data;
+ 	struct bfq_io_cq *bic;
++	struct bfq_queue *bfqq, *new_bfqq;
+ 
+ 	/*
+ 	 * Disallow merge of a sync bio into an async request.
+@@ -1150,7 +1622,26 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+ 	if (!bic)
+ 		return 0;
+ 
+-	return bic_to_bfqq(bic, bfq_bio_sync(bio)) == RQ_BFQQ(rq);
++	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
++	/*
++	 * We take advantage of this function to perform an early merge
++	 * of the queues of possible cooperating processes.
++	 */
++	if (bfqq) {
++		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
++		if (new_bfqq) {
++			bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
++			/*
++			 * If we get here, the bio will be queued in the
++			 * shared queue, i.e., new_bfqq, so use new_bfqq
++			 * to decide whether bio and rq can be merged.
++			 */
++			bfqq = new_bfqq;
++		} else
++			bfq_bfqq_increase_failed_cooperations(bfqq);
++	}
++
++	return bfqq == RQ_BFQQ(rq);
+ }
+ 
+ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+@@ -1349,6 +1840,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 
+ 	__bfq_bfqd_reset_in_service(bfqd);
+ 
++	/*
++	 * If this bfqq is shared between multiple processes, check
++	 * to make sure that those processes are still issuing I/Os
++	 * within the mean seek distance. If not, it may be time to
++	 * break the queues apart again.
++	 */
++	if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
++		bfq_mark_bfqq_split_coop(bfqq);
++
+ 	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+ 		/*
+ 		 * Overloading budget_timeout field to store the time
+@@ -1357,8 +1857,13 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 		 */
+ 		bfqq->budget_timeout = jiffies;
+ 		bfq_del_bfqq_busy(bfqd, bfqq, 1);
+-	} else
++	} else {
+ 		bfq_activate_bfqq(bfqd, bfqq);
++		/*
++		 * Resort priority tree of potential close cooperators.
++		 */
++		bfq_pos_tree_add_move(bfqd, bfqq);
++	}
+ }
+ 
+ /**
+@@ -2242,10 +2747,12 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 		/*
+ 		 * If the queue was activated in a burst, or
+ 		 * too much time has elapsed from the beginning
+-		 * of this weight-raising period, then end weight
+-		 * raising.
++		 * of this weight-raising period, or the queue has
++		 * exceeded the acceptable number of cooperations,
++		 * then end weight raising.
+ 		 */
+ 		if (bfq_bfqq_in_large_burst(bfqq) ||
++		    bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh ||
+ 		    time_is_before_jiffies(bfqq->last_wr_start_finish +
+ 					   bfqq->wr_cur_max_time)) {
+ 			bfqq->last_wr_start_finish = jiffies;
+@@ -2474,6 +2981,25 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+ #endif
+ }
+ 
++static void bfq_put_cooperator(struct bfq_queue *bfqq)
++{
++	struct bfq_queue *__bfqq, *next;
++
++	/*
++	 * If this queue was scheduled to merge with another queue, be
++	 * sure to drop the reference taken on that queue (and others in
++	 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
++	 */
++	__bfqq = bfqq->new_bfqq;
++	while (__bfqq) {
++		if (__bfqq == bfqq)
++			break;
++		next = __bfqq->new_bfqq;
++		bfq_put_queue(__bfqq);
++		__bfqq = next;
++	}
++}
++
+ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ 	if (bfqq == bfqd->in_service_queue) {
+@@ -2484,6 +3010,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
+ 		     atomic_read(&bfqq->ref));
+ 
++	bfq_put_cooperator(bfqq);
++
+ 	bfq_put_queue(bfqq);
+ }
+ 
+@@ -2492,6 +3020,25 @@ static void bfq_init_icq(struct io_cq *icq)
+ 	struct bfq_io_cq *bic = icq_to_bic(icq);
+ 
+ 	bic->ttime.last_end_request = jiffies;
++	/*
++	 * A newly created bic indicates that the process has just
++	 * started doing I/O, and is probably mapping into memory its
++	 * executable and libraries: it definitely needs weight raising.
++	 * There is however the possibility that the process performs,
++	 * for a while, I/O close to some other process. EQM intercepts
++	 * this behavior and may merge the queue corresponding to the
++	 * process  with some other queue, BEFORE the weight of the queue
++	 * is raised. Merged queues are not weight-raised (they are assumed
++	 * to belong to processes that benefit only from high throughput).
++	 * If the merge is basically the consequence of an accident, then
++	 * the queue will be split soon and will get back its old weight.
++	 * It is then important to write down somewhere that this queue
++	 * does need weight raising, even if it did not make it to get its
++	 * weight raised before being merged. To this purpose, we overload
++	 * the field raising_time_left and assign 1 to it, to mark the queue
++	 * as needing weight raising.
++	 */
++	bic->wr_time_left = 1;
+ }
+ 
+ static void bfq_exit_icq(struct io_cq *icq)
+@@ -2505,6 +3052,13 @@ static void bfq_exit_icq(struct io_cq *icq)
+ 	}
+ 
+ 	if (bic->bfqq[BLK_RW_SYNC]) {
++		/*
++		 * If the bic is using a shared queue, put the reference
++		 * taken on the io_context when the bic started using a
++		 * shared bfq_queue.
++		 */
++		if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))
++			put_io_context(icq->ioc);
+ 		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
+ 		bic->bfqq[BLK_RW_SYNC] = NULL;
+ 	}
+@@ -2809,6 +3363,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
+ 	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
+ 		return;
+ 
++	/* Idle window just restored, statistics are meaningless. */
++	if (bfq_bfqq_just_split(bfqq))
++		return;
++
+ 	enable_idle = bfq_bfqq_idle_window(bfqq);
+ 
+ 	if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
+@@ -2856,6 +3414,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
+ 	    !BFQQ_SEEKY(bfqq))
+ 		bfq_update_idle_window(bfqd, bfqq, bic);
++	bfq_clear_bfqq_just_split(bfqq);
+ 
+ 	bfq_log_bfqq(bfqd, bfqq,
+ 		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
+@@ -2920,12 +3479,47 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+ {
+ 	struct bfq_data *bfqd = q->elevator->elevator_data;
+-	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;
+ 
+ 	assert_spin_locked(bfqd->queue->queue_lock);
+ 
++	/*
++	 * An unplug may trigger a requeue of a request from the device
++	 * driver: make sure we are in process context while trying to
++	 * merge two bfq_queues.
++	 */
++	if (!in_interrupt()) {
++		new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
++		if (new_bfqq) {
++			if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
++				new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
++			/*
++			 * Release the request's reference to the old bfqq
++			 * and make sure one is taken to the shared queue.
++			 */
++			new_bfqq->allocated[rq_data_dir(rq)]++;
++			bfqq->allocated[rq_data_dir(rq)]--;
++			atomic_inc(&new_bfqq->ref);
++			bfq_put_queue(bfqq);
++			if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
++				bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
++						bfqq, new_bfqq);
++			rq->elv.priv[1] = new_bfqq;
++			bfqq = new_bfqq;
++		} else
++			bfq_bfqq_increase_failed_cooperations(bfqq);
++	}
++
+ 	bfq_add_request(rq);
+ 
++	/*
++	 * Here a newly-created bfq_queue has already started a weight-raising
++	 * period: clear raising_time_left to prevent bfq_bfqq_save_state()
++	 * from assigning it a full weight-raising period. See the detailed
++	 * comments about this field in bfq_init_icq().
++	 */
++	if (bfqq->bic)
++		bfqq->bic->wr_time_left = 0;
+ 	rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+ 	list_add_tail(&rq->queuelist, &bfqq->fifo);
+ 
+@@ -3094,6 +3688,32 @@ static void bfq_put_request(struct request *rq)
+ }
+ 
+ /*
++ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
++ * was the last process referring to said bfqq.
++ */
++static struct bfq_queue *
++bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
++{
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
++
++	put_io_context(bic->icq.ioc);
++
++	if (bfqq_process_refs(bfqq) == 1) {
++		bfqq->pid = current->pid;
++		bfq_clear_bfqq_coop(bfqq);
++		bfq_clear_bfqq_split_coop(bfqq);
++		return bfqq;
++	}
++
++	bic_set_bfqq(bic, NULL, 1);
++
++	bfq_put_cooperator(bfqq);
++
++	bfq_put_queue(bfqq);
++	return NULL;
++}
++
++/*
+  * Allocate bfq data structures associated with this request.
+  */
+ static int bfq_set_request(struct request_queue *q, struct request *rq,
+@@ -3105,6 +3725,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ 	const int is_sync = rq_is_sync(rq);
+ 	struct bfq_queue *bfqq;
+ 	unsigned long flags;
++	bool split = false;
+ 
+ 	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+ 
+@@ -3117,15 +3738,30 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ 
+ 	bfq_bic_update_cgroup(bic, bio);
+ 
++new_queue:
+ 	bfqq = bic_to_bfqq(bic, is_sync);
+ 	if (!bfqq || bfqq == &bfqd->oom_bfqq) {
+ 		bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask);
+ 		bic_set_bfqq(bic, bfqq, is_sync);
+-		if (is_sync) {
+-			if (bfqd->large_burst)
++		if (split && is_sync) {
++			if ((bic->was_in_burst_list && bfqd->large_burst) ||
++			    bic->saved_in_large_burst)
+ 				bfq_mark_bfqq_in_large_burst(bfqq);
+-			else
+-				bfq_clear_bfqq_in_large_burst(bfqq);
++			else {
++			    bfq_clear_bfqq_in_large_burst(bfqq);
++			    if (bic->was_in_burst_list)
++			       hlist_add_head(&bfqq->burst_list_node,
++				              &bfqd->burst_list);
++			}
++		}
++	} else {
++		/* If the queue was seeky for too long, break it apart. */
++		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
++			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
++			bfqq = bfq_split_bfqq(bic, bfqq);
++			split = true;
++			if (!bfqq)
++				goto new_queue;
+ 		}
+ 	}
+ 
+@@ -3137,6 +3773,26 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ 	rq->elv.priv[0] = bic;
+ 	rq->elv.priv[1] = bfqq;
+ 
++	/*
++	 * If a bfq_queue has only one process reference, it is owned
++	 * by only one bfq_io_cq: we can set the bic field of the
++	 * bfq_queue to the address of that structure. Also, if the
++	 * queue has just been split, mark a flag so that the
++	 * information is available to the other scheduler hooks.
++	 */
++	if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
++		bfqq->bic = bic;
++		if (split) {
++			bfq_mark_bfqq_just_split(bfqq);
++			/*
++			 * If the queue has just been split from a shared
++			 * queue, restore the idle window and the possible
++			 * weight raising period.
++			 */
++			bfq_bfqq_resume_state(bfqq, bic);
++		}
++	}
++
+ 	spin_unlock_irqrestore(q->queue_lock, flags);
+ 
+ 	return 0;
+@@ -3290,6 +3946,7 @@ static void bfq_init_root_group(struct bfq_group *root_group,
+ 	root_group->my_entity = NULL;
+ 	root_group->bfqd = bfqd;
+ #endif
++	root_group->rq_pos_tree = RB_ROOT;
+ 	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+ 		root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+ }
+@@ -3370,6 +4027,8 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ 	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
+ 	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
+ 
++	bfqd->bfq_coop_thresh = 2;
++	bfqd->bfq_failed_cooperations = 7000;
+ 	bfqd->bfq_requests_within_timer = 120;
+ 
+ 	bfqd->bfq_large_burst_thresh = 11;
+diff --git a/block/bfq.h b/block/bfq.h
+index 485d0c9..f73c942 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -183,6 +183,8 @@ struct bfq_group;
+  *                    ioprio_class value.
+  * @new_bfqq: shared bfq_queue if queue is cooperating with
+  *           one or more other queues.
++ * @pos_node: request-position tree member (see bfq_group's @rq_pos_tree).
++ * @pos_root: request-position tree root (see bfq_group's @rq_pos_tree).
+  * @sort_list: sorted list of pending requests.
+  * @next_rq: if fifo isn't expired, next request to serve.
+  * @queued: nr of requests queued in @sort_list.
+@@ -304,6 +306,26 @@ struct bfq_ttime {
+  * @ttime: associated @bfq_ttime struct
+  * @ioprio: per (request_queue, blkcg) ioprio.
+  * @blkcg_id: id of the blkcg the related io_cq belongs to.
++ * @wr_time_left: snapshot of the time left before weight raising ends
++ *                for the sync queue associated to this process; this
++ *		  snapshot is taken to remember this value while the weight
++ *		  raising is suspended because the queue is merged with a
++ *		  shared queue, and is used to set @raising_cur_max_time
++ *		  when the queue is split from the shared queue and its
++ *		  weight is raised again
++ * @saved_idle_window: same purpose as the previous field for the idle
++ *                     window
++ * @saved_IO_bound: same purpose as the previous two fields for the I/O
++ *                  bound classification of a queue
++ * @saved_in_large_burst: same purpose as the previous fields for the
++ *                        value of the field keeping the queue's belonging
++ *                        to a large burst
++ * @was_in_burst_list: true if the queue belonged to a burst list
++ *                     before its merge with another cooperating queue
++ * @cooperations: counter of consecutive successful queue merges underwent
++ *                by any of the process' @bfq_queues
++ * @failed_cooperations: counter of consecutive failed queue merges of any
++ *                       of the process' @bfq_queues
+  */
+ struct bfq_io_cq {
+ 	struct io_cq icq; /* must be the first member */
+@@ -314,6 +336,16 @@ struct bfq_io_cq {
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	uint64_t blkcg_id; /* the current blkcg ID */
+ #endif
++
++	unsigned int wr_time_left;
++	bool saved_idle_window;
++	bool saved_IO_bound;
++
++	bool saved_in_large_burst;
++	bool was_in_burst_list;
++
++	unsigned int cooperations;
++	unsigned int failed_cooperations;
+ };
+ 
+ enum bfq_device_speed {
+@@ -557,6 +589,9 @@ enum bfqq_state_flags {
+ 					 * may need softrt-next-start
+ 					 * update
+ 					 */
++	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
++	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be split */
++	BFQ_BFQQ_FLAG_just_split,	/* queue has just been split */
+ };
+ 
+ #define BFQ_BFQQ_FNS(name)						\
+@@ -583,6 +618,9 @@ BFQ_BFQQ_FNS(budget_new);
+ BFQ_BFQQ_FNS(IO_bound);
+ BFQ_BFQQ_FNS(in_large_burst);
+ BFQ_BFQQ_FNS(constantly_seeky);
++BFQ_BFQQ_FNS(coop);
++BFQ_BFQQ_FNS(split_coop);
++BFQ_BFQQ_FNS(just_split);
+ BFQ_BFQQ_FNS(softrt_update);
+ #undef BFQ_BFQQ_FNS
+ 
+@@ -675,6 +713,9 @@ struct bfq_group_data {
+  *                   are groups with more than one active @bfq_entity
+  *                   (see the comments to the function
+  *                   bfq_bfqq_must_not_expire()).
++ * @rq_pos_tree: rbtree sorted by next_request position, used when
++ *               determining if two or more queues have interleaving
++ *               requests (see bfq_find_close_cooperator()).
+  *
+  * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
+  * there is a set of bfq_groups, each one collecting the lower-level
+@@ -701,6 +742,8 @@ struct bfq_group {
+ 
+ 	int active_entities;
+ 
++	struct rb_root rq_pos_tree;
++
+ 	struct bfqg_stats stats;
+ 	struct bfqg_stats dead_stats;	/* stats pushed from dead children */
+ };
+@@ -711,6 +754,8 @@ struct bfq_group {
+ 
+ 	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
+ 	struct bfq_queue *async_idle_bfqq;
++
++	struct rb_root rq_pos_tree;
+ };
+ #endif
+ 
+@@ -787,6 +832,27 @@ static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags)
+ 	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
+ }
+ 
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++
++static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *group_entity = bfqq->entity.parent;
++
++	if (!group_entity)
++		group_entity = &bfqq->bfqd->root_group->entity;
++
++	return container_of(group_entity, struct bfq_group, entity);
++}
++
++#else
++
++static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
++{
++	return bfqq->bfqd->root_group;
++}
++
++#endif
++
+ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
+ static void bfq_put_queue(struct bfq_queue *bfqq);
+ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
+-- 
+1.9.1
+

diff --git a/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2 b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2
new file mode 100644
index 0000000..bbccb23
--- /dev/null
+++ b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2
@@ -0,0 +1,6361 @@
+From 62745bf5f16f14ce2bd56377eeb84615b4a19cd2 Mon Sep 17 00:00:00 2001
+From: Paolo Valente <paolo.valente@linaro.org>
+Date: Tue, 17 May 2016 08:28:04 +0200
+Subject: [PATCH 4/4] blkck, bfq: turn BFQ-v7r11 for 4.7.0 into BFQ-v8 for
+ 4.7.0
+
+---
+ block/Kconfig.iosched |    2 +-
+ block/bfq-cgroup.c    |  448 +++++----
+ block/bfq-iosched.c   | 2582 +++++++++++++++++++++++++++++--------------------
+ block/bfq-sched.c     |  432 +++++++--
+ block/bfq.h           |  697 +++++++------
+ 5 files changed, 2433 insertions(+), 1728 deletions(-)
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index f78cd1a..6d92579 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -53,7 +53,7 @@ config IOSCHED_BFQ
+ 
+ config BFQ_GROUP_IOSCHED
+ 	bool "BFQ hierarchical scheduling support"
+-	depends on CGROUPS && IOSCHED_BFQ=y
++	depends on IOSCHED_BFQ && BLK_CGROUP
+ 	default n
+ 	---help---
+ 	  Enable hierarchical scheduling in BFQ, using the blkio controller.
+diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
+index 5ee99ec..bc01663 100644
+--- a/block/bfq-cgroup.c
++++ b/block/bfq-cgroup.c
+@@ -162,7 +162,6 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
+ static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
+ {
+ 	struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq);
+-	BUG_ON(!pd);
+ 	return pd_to_bfqg(pd);
+ }
+ 
+@@ -224,14 +223,6 @@ static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw)
+ 	blkg_rwstat_add(&bfqg->stats.merged, rw, 1);
+ }
+ 
+-static void bfqg_stats_update_dispatch(struct bfq_group *bfqg,
+-					      uint64_t bytes, int rw)
+-{
+-	blkg_stat_add(&bfqg->stats.sectors, bytes >> 9);
+-	blkg_rwstat_add(&bfqg->stats.serviced, rw, 1);
+-	blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes);
+-}
+-
+ static void bfqg_stats_update_completion(struct bfq_group *bfqg,
+ 			uint64_t start_time, uint64_t io_start_time, int rw)
+ {
+@@ -248,17 +239,11 @@ static void bfqg_stats_update_completion(struct bfq_group *bfqg,
+ /* @stats = 0 */
+ static void bfqg_stats_reset(struct bfqg_stats *stats)
+ {
+-	if (!stats)
+-		return;
+-
+ 	/* queued stats shouldn't be cleared */
+-	blkg_rwstat_reset(&stats->service_bytes);
+-	blkg_rwstat_reset(&stats->serviced);
+ 	blkg_rwstat_reset(&stats->merged);
+ 	blkg_rwstat_reset(&stats->service_time);
+ 	blkg_rwstat_reset(&stats->wait_time);
+ 	blkg_stat_reset(&stats->time);
+-	blkg_stat_reset(&stats->unaccounted_time);
+ 	blkg_stat_reset(&stats->avg_queue_size_sum);
+ 	blkg_stat_reset(&stats->avg_queue_size_samples);
+ 	blkg_stat_reset(&stats->dequeue);
+@@ -268,21 +253,19 @@ static void bfqg_stats_reset(struct bfqg_stats *stats)
+ }
+ 
+ /* @to += @from */
+-static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from)
++static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
+ {
+ 	if (!to || !from)
+ 		return;
+ 
+ 	/* queued stats shouldn't be cleared */
+-	blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes);
+-	blkg_rwstat_add_aux(&to->serviced, &from->serviced);
+ 	blkg_rwstat_add_aux(&to->merged, &from->merged);
+ 	blkg_rwstat_add_aux(&to->service_time, &from->service_time);
+ 	blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
+ 	blkg_stat_add_aux(&from->time, &from->time);
+-	blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
+ 	blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
+-	blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
++	blkg_stat_add_aux(&to->avg_queue_size_samples,
++			  &from->avg_queue_size_samples);
+ 	blkg_stat_add_aux(&to->dequeue, &from->dequeue);
+ 	blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
+ 	blkg_stat_add_aux(&to->idle_time, &from->idle_time);
+@@ -308,10 +291,8 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
+ 	if (unlikely(!parent))
+ 		return;
+ 
+-	bfqg_stats_merge(&parent->dead_stats, &bfqg->stats);
+-	bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats);
++	bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
+ 	bfqg_stats_reset(&bfqg->stats);
+-	bfqg_stats_reset(&bfqg->dead_stats);
+ }
+ 
+ static void bfq_init_entity(struct bfq_entity *entity,
+@@ -332,15 +313,11 @@ static void bfq_init_entity(struct bfq_entity *entity,
+ 
+ static void bfqg_stats_exit(struct bfqg_stats *stats)
+ {
+-	blkg_rwstat_exit(&stats->service_bytes);
+-	blkg_rwstat_exit(&stats->serviced);
+ 	blkg_rwstat_exit(&stats->merged);
+ 	blkg_rwstat_exit(&stats->service_time);
+ 	blkg_rwstat_exit(&stats->wait_time);
+ 	blkg_rwstat_exit(&stats->queued);
+-	blkg_stat_exit(&stats->sectors);
+ 	blkg_stat_exit(&stats->time);
+-	blkg_stat_exit(&stats->unaccounted_time);
+ 	blkg_stat_exit(&stats->avg_queue_size_sum);
+ 	blkg_stat_exit(&stats->avg_queue_size_samples);
+ 	blkg_stat_exit(&stats->dequeue);
+@@ -351,15 +328,11 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
+ 
+ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
+ {
+-	if (blkg_rwstat_init(&stats->service_bytes, gfp) ||
+-	    blkg_rwstat_init(&stats->serviced, gfp) ||
+-	    blkg_rwstat_init(&stats->merged, gfp) ||
++	if (blkg_rwstat_init(&stats->merged, gfp) ||
+ 	    blkg_rwstat_init(&stats->service_time, gfp) ||
+ 	    blkg_rwstat_init(&stats->wait_time, gfp) ||
+ 	    blkg_rwstat_init(&stats->queued, gfp) ||
+-	    blkg_stat_init(&stats->sectors, gfp) ||
+ 	    blkg_stat_init(&stats->time, gfp) ||
+-	    blkg_stat_init(&stats->unaccounted_time, gfp) ||
+ 	    blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
+ 	    blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
+ 	    blkg_stat_init(&stats->dequeue, gfp) ||
+@@ -374,20 +347,36 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
+ }
+ 
+ static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
+- {
++{
+ 	return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
+- }
++}
+ 
+ static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
+ {
+ 	return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
+ }
+ 
++static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
++{
++	struct bfq_group_data *bgd;
++
++	bgd = kzalloc(sizeof(*bgd), GFP_KERNEL);
++	if (!bgd)
++		return NULL;
++	return &bgd->pd;
++}
++
+ static void bfq_cpd_init(struct blkcg_policy_data *cpd)
+ {
+ 	struct bfq_group_data *d = cpd_to_bfqgd(cpd);
+ 
+-	d->weight = BFQ_DEFAULT_GRP_WEIGHT;
++	d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
++		CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
++}
++
++static void bfq_cpd_free(struct blkcg_policy_data *cpd)
++{
++	kfree(cpd_to_bfqgd(cpd));
+ }
+ 
+ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+@@ -398,8 +387,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+ 	if (!bfqg)
+ 		return NULL;
+ 
+-	if (bfqg_stats_init(&bfqg->stats, gfp) ||
+-	    bfqg_stats_init(&bfqg->dead_stats, gfp)) {
++	if (bfqg_stats_init(&bfqg->stats, gfp)) {
+ 		kfree(bfqg);
+ 		return NULL;
+ 	}
+@@ -407,27 +395,20 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+ 	return &bfqg->pd;
+ }
+ 
+-static void bfq_group_set_parent(struct bfq_group *bfqg,
+-					struct bfq_group *parent)
++static void bfq_pd_init(struct blkg_policy_data *pd)
+ {
++	struct blkcg_gq *blkg;
++	struct bfq_group *bfqg;
++	struct bfq_data *bfqd;
+ 	struct bfq_entity *entity;
++	struct bfq_group_data *d;
+ 
+-	BUG_ON(!parent);
+-	BUG_ON(!bfqg);
+-	BUG_ON(bfqg == parent);
+-
++	blkg = pd_to_blkg(pd);
++	BUG_ON(!blkg);
++	bfqg = blkg_to_bfqg(blkg);
++	bfqd = blkg->q->elevator->elevator_data;
+ 	entity = &bfqg->entity;
+-	entity->parent = parent->my_entity;
+-	entity->sched_data = &parent->sched_data;
+-}
+-
+-static void bfq_pd_init(struct blkg_policy_data *pd)
+-{
+-	struct blkcg_gq *blkg = pd_to_blkg(pd);
+-	struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+-	struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
+-	struct bfq_entity *entity = &bfqg->entity;
+-	struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
++	d = blkcg_to_bfqgd(blkg->blkcg);
+ 
+ 	entity->orig_weight = entity->weight = entity->new_weight = d->weight;
+ 	entity->my_sched_data = &bfqg->sched_data;
+@@ -445,45 +426,28 @@ static void bfq_pd_free(struct blkg_policy_data *pd)
+ 	struct bfq_group *bfqg = pd_to_bfqg(pd);
+ 
+ 	bfqg_stats_exit(&bfqg->stats);
+-	bfqg_stats_exit(&bfqg->dead_stats);
+-
+ 	return kfree(bfqg);
+ }
+ 
+-/* offset delta from bfqg->stats to bfqg->dead_stats */
+-static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) -
+-					offsetof(struct bfq_group, stats);
+-
+-/* to be used by recursive prfill, sums live and dead stats recursively */
+-static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
++static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
+ {
+-	u64 sum = 0;
++	struct bfq_group *bfqg = pd_to_bfqg(pd);
+ 
+-	sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
+-	sum += blkg_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
+-				       off + dead_stats_off_delta);
+-	return sum;
++	bfqg_stats_reset(&bfqg->stats);
+ }
+ 
+-/* to be used by recursive prfill, sums live and dead rwstats recursively */
+-static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
+-						       int off)
++static void bfq_group_set_parent(struct bfq_group *bfqg,
++					struct bfq_group *parent)
+ {
+-	struct blkg_rwstat a, b;
+-
+-	a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off);
+-	b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq,
+-				      off + dead_stats_off_delta);
+-	blkg_rwstat_add_aux(&a, &b);
+-	return a;
+-}
++	struct bfq_entity *entity;
+ 
+-static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
+-{
+-	struct bfq_group *bfqg = pd_to_bfqg(pd);
++	BUG_ON(!parent);
++	BUG_ON(!bfqg);
++	BUG_ON(bfqg == parent);
+ 
+-	bfqg_stats_reset(&bfqg->stats);
+-	bfqg_stats_reset(&bfqg->dead_stats);
++	entity = &bfqg->entity;
++	entity->parent = parent->my_entity;
++	entity->sched_data = &parent->sched_data;
+ }
+ 
+ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+@@ -531,13 +495,18 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+ 	return bfqg;
+ }
+ 
+-static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
++static void bfq_pos_tree_add_move(struct bfq_data *bfqd,
++				  struct bfq_queue *bfqq);
++
++static void bfq_bfqq_expire(struct bfq_data *bfqd,
++			    struct bfq_queue *bfqq,
++			    bool compensate,
++			    enum bfqq_expiration reason);
+ 
+ /**
+  * bfq_bfqq_move - migrate @bfqq to @bfqg.
+  * @bfqd: queue descriptor.
+  * @bfqq: the queue to move.
+- * @entity: @bfqq's entity.
+  * @bfqg: the group to move to.
+  *
+  * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
+@@ -548,26 +517,40 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+  * rcu_read_lock()).
+  */
+ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+-			  struct bfq_entity *entity, struct bfq_group *bfqg)
++			  struct bfq_group *bfqg)
+ {
+-	int busy, resume;
++	struct bfq_entity *entity = &bfqq->entity;
+ 
+-	busy = bfq_bfqq_busy(bfqq);
+-	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
+-
+-	BUG_ON(resume && !entity->on_st);
+-	BUG_ON(busy && !resume && entity->on_st &&
++	BUG_ON(!bfq_bfqq_busy(bfqq) && !RB_EMPTY_ROOT(&bfqq->sort_list));
++	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list) && !entity->on_st);
++	BUG_ON(bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list)
++	       && entity->on_st &&
+ 	       bfqq != bfqd->in_service_queue);
++	BUG_ON(!bfq_bfqq_busy(bfqq) && bfqq == bfqd->in_service_queue);
++
++	/* If bfqq is empty, then bfq_bfqq_expire also invokes
++	 * bfq_del_bfqq_busy, thereby removing bfqq and its entity
++	 * from data structures related to current group. Otherwise we
++	 * need to remove bfqq explicitly with bfq_deactivate_bfqq, as
++	 * we do below.
++	 */
++	if (bfqq == bfqd->in_service_queue)
++		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
++				false, BFQ_BFQQ_PREEMPTED);
++
++	BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq)
++	    && &bfq_entity_service_tree(entity)->idle !=
++	       entity->tree);
+ 
+-	if (busy) {
+-		BUG_ON(atomic_read(&bfqq->ref) < 2);
++	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
+ 
+-		if (!resume)
+-			bfq_del_bfqq_busy(bfqd, bfqq, 0);
+-		else
+-			bfq_deactivate_bfqq(bfqd, bfqq, 0);
+-	} else if (entity->on_st)
++	if (bfq_bfqq_busy(bfqq))
++		bfq_deactivate_bfqq(bfqd, bfqq, 0);
++	else if (entity->on_st) {
++		BUG_ON(&bfq_entity_service_tree(entity)->idle !=
++		       entity->tree);
+ 		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
++	}
+ 	bfqg_put(bfqq_group(bfqq));
+ 
+ 	/*
+@@ -579,14 +562,17 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	entity->sched_data = &bfqg->sched_data;
+ 	bfqg_get(bfqg);
+ 
+-	if (busy) {
++	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq));
++	if (bfq_bfqq_busy(bfqq)) {
+ 		bfq_pos_tree_add_move(bfqd, bfqq);
+-		if (resume)
+-			bfq_activate_bfqq(bfqd, bfqq);
++		bfq_activate_bfqq(bfqd, bfqq);
+ 	}
+ 
+ 	if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
+ 		bfq_schedule_dispatch(bfqd);
++	BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq)
++	       && &bfq_entity_service_tree(entity)->idle !=
++	       entity->tree);
+ }
+ 
+ /**
+@@ -621,7 +607,8 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ 			bic_set_bfqq(bic, NULL, 0);
+ 			bfq_log_bfqq(bfqd, async_bfqq,
+ 				     "bic_change_group: %p %d",
+-				     async_bfqq, atomic_read(&async_bfqq->ref));
++				     async_bfqq,
++				     async_bfqq->ref);
+ 			bfq_put_queue(async_bfqq);
+ 		}
+ 	}
+@@ -629,7 +616,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ 	if (sync_bfqq) {
+ 		entity = &sync_bfqq->entity;
+ 		if (entity->sched_data != &bfqg->sched_data)
+-			bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
++			bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ 	}
+ 
+ 	return bfqg;
+@@ -638,25 +625,23 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+ {
+ 	struct bfq_data *bfqd = bic_to_bfqd(bic);
+-	struct blkcg *blkcg;
+ 	struct bfq_group *bfqg = NULL;
+-	uint64_t id;
++	uint64_t serial_nr;
+ 
+ 	rcu_read_lock();
+-	blkcg = bio_blkcg(bio);
+-	id = blkcg->css.serial_nr;
+-	rcu_read_unlock();
++	serial_nr = bio_blkcg(bio)->css.serial_nr;
+ 
+ 	/*
+ 	 * Check whether blkcg has changed.  The condition may trigger
+ 	 * spuriously on a newly created cic but there's no harm.
+ 	 */
+-	if (unlikely(!bfqd) || likely(bic->blkcg_id == id))
+-		return;
++	if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
++		goto out;
+ 
+-	bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg);
+-	BUG_ON(!bfqg);
+-	bic->blkcg_id = id;
++	bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
++	bic->blkcg_serial_nr = serial_nr;
++out:
++	rcu_read_unlock();
+ }
+ 
+ /**
+@@ -682,8 +667,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
+ 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ 
+ 	BUG_ON(!bfqq);
+-	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
+-	return;
++	bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
+ }
+ 
+ /**
+@@ -711,16 +695,15 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd,
+ 	if (bfqg->sched_data.in_service_entity)
+ 		bfq_reparent_leaf_entity(bfqd,
+ 			bfqg->sched_data.in_service_entity);
+-
+-	return;
+ }
+ 
+ /**
+- * bfq_destroy_group - destroy @bfqg.
+- * @bfqg: the group being destroyed.
++ * bfq_pd_offline - deactivate the entity associated with @pd,
++ *		    and reparent its children entities.
++ * @pd: descriptor of the policy going offline.
+  *
+- * Destroy @bfqg, making sure that it is not referenced from its parent.
+- * blkio already grabs the queue_lock for us, so no need to use RCU-based magic
++ * blkio already grabs the queue_lock for us, so no need to use
++ * RCU-based magic
+  */
+ static void bfq_pd_offline(struct blkg_policy_data *pd)
+ {
+@@ -779,6 +762,12 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
+ 	bfq_put_async_queues(bfqd, bfqg);
+ 	BUG_ON(entity->tree);
+ 
++	/*
++	 * @blkg is going offline and will be ignored by
++	 * blkg_[rw]stat_recursive_sum().  Transfer stats to the parent so
++	 * that they don't get lost.  If IOs complete after this point, the
++	 * stats for them will be lost.  Oh well...
++	 */
+ 	bfqg_stats_xfer_dead(bfqg);
+ }
+ 
+@@ -788,46 +777,35 @@ static void bfq_end_wr_async(struct bfq_data *bfqd)
+ 
+ 	list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
+ 		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++		BUG_ON(!bfqg);
+ 
+ 		bfq_end_wr_async_queues(bfqd, bfqg);
+ 	}
+ 	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+ }
+ 
+-static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css,
+-				       struct cftype *cftype)
+-{
+-	struct blkcg *blkcg = css_to_blkcg(css);
+-	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+-	int ret = -EINVAL;
+-
+-	spin_lock_irq(&blkcg->lock);
+-	ret = bfqgd->weight;
+-	spin_unlock_irq(&blkcg->lock);
+-
+-	return ret;
+-}
+-
+-static int bfqio_cgroup_weight_read_dfl(struct seq_file *sf, void *v)
++static int bfq_io_show_weight(struct seq_file *sf, void *v)
+ {
+ 	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+ 	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
++	unsigned int val = 0;
+ 
+-	spin_lock_irq(&blkcg->lock);
+-	seq_printf(sf, "%u\n", bfqgd->weight);
+-	spin_unlock_irq(&blkcg->lock);
++	if (bfqgd)
++		val = bfqgd->weight;
++
++	seq_printf(sf, "%u\n", val);
+ 
+ 	return 0;
+ }
+ 
+-static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
+-					struct cftype *cftype,
+-					u64 val)
++static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
++				    struct cftype *cftype,
++				    u64 val)
+ {
+ 	struct blkcg *blkcg = css_to_blkcg(css);
+ 	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+ 	struct blkcg_gq *blkg;
+-	int ret = -EINVAL;
++	int ret = -ERANGE;
+ 
+ 	if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
+ 		return ret;
+@@ -837,6 +815,7 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
+ 	bfqgd->weight = (unsigned short)val;
+ 	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+ 		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
++
+ 		if (!bfqg)
+ 			continue;
+ 		/*
+@@ -871,13 +850,18 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
+ 	return ret;
+ }
+ 
+-static ssize_t bfqio_cgroup_weight_write_dfl(struct kernfs_open_file *of,
+-					     char *buf, size_t nbytes,
+-					     loff_t off)
++static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
++				 char *buf, size_t nbytes,
++				 loff_t off)
+ {
++	u64 weight;
+ 	/* First unsigned long found in the file is used */
+-	return bfqio_cgroup_weight_write(of_css(of), NULL,
+-					 simple_strtoull(strim(buf), NULL, 0));
++	int ret = kstrtoull(strim(buf), 0, &weight);
++
++	if (ret)
++		return ret;
++
++	return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
+ }
+ 
+ static int bfqg_print_stat(struct seq_file *sf, void *v)
+@@ -897,16 +881,17 @@ static int bfqg_print_rwstat(struct seq_file *sf, void *v)
+ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
+ 				      struct blkg_policy_data *pd, int off)
+ {
+-	u64 sum = bfqg_stat_pd_recursive_sum(pd, off);
+-
++	u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
++					  &blkcg_policy_bfq, off);
+ 	return __blkg_prfill_u64(sf, pd, sum);
+ }
+ 
+ static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
+ 					struct blkg_policy_data *pd, int off)
+ {
+-	struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off);
+-
++	struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
++							   &blkcg_policy_bfq,
++							   off);
+ 	return __blkg_prfill_rwstat(sf, pd, &sum);
+ }
+ 
+@@ -926,6 +911,41 @@ static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
+ 	return 0;
+ }
+ 
++static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
++			       int off)
++{
++	u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
++
++	return __blkg_prfill_u64(sf, pd, sum >> 9);
++}
++
++static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
++{
++	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++			  bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
++	return 0;
++}
++
++static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
++					 struct blkg_policy_data *pd, int off)
++{
++	struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
++					offsetof(struct blkcg_gq, stat_bytes));
++	u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
++		atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
++
++	return __blkg_prfill_u64(sf, pd, sum >> 9);
++}
++
++static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
++{
++	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
++			  bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
++			  false);
++	return 0;
++}
++
++
+ static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
+ 				      struct blkg_policy_data *pd, int off)
+ {
+@@ -950,7 +970,8 @@ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
+ 	return 0;
+ }
+ 
+-static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
++static struct bfq_group *
++bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+ {
+ 	int ret;
+ 
+@@ -958,41 +979,18 @@ static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int n
+ 	if (ret)
+ 		return NULL;
+ 
+-        return blkg_to_bfqg(bfqd->queue->root_blkg);
+-}
+-
+-static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
+-{
+-        struct bfq_group_data *bgd;
+-
+-        bgd = kzalloc(sizeof(*bgd), GFP_KERNEL);
+-        if (!bgd)
+-                return NULL;
+-        return &bgd->pd;
++	return blkg_to_bfqg(bfqd->queue->root_blkg);
+ }
+ 
+-static void bfq_cpd_free(struct blkcg_policy_data *cpd)
+-{
+-        kfree(cpd_to_bfqgd(cpd));
+-}
+-
+-static struct cftype bfqio_files_dfl[] = {
++static struct cftype bfq_blkcg_legacy_files[] = {
+ 	{
+-		.name = "weight",
++		.name = "bfq.weight",
+ 		.flags = CFTYPE_NOT_ON_ROOT,
+-		.seq_show = bfqio_cgroup_weight_read_dfl,
+-		.write = bfqio_cgroup_weight_write_dfl,
++		.seq_show = bfq_io_show_weight,
++		.write_u64 = bfq_io_set_weight_legacy,
+ 	},
+-	{} /* terminate */
+-};
+ 
+-static struct cftype bfqio_files[] = {
+-	{
+-		.name = "bfq.weight",
+-		.read_u64 = bfqio_cgroup_weight_read,
+-		.write_u64 = bfqio_cgroup_weight_write,
+-	},
+-	/* statistics, cover only the tasks in the bfqg */
++	/* statistics, covers only the tasks in the bfqg */
+ 	{
+ 		.name = "bfq.time",
+ 		.private = offsetof(struct bfq_group, stats.time),
+@@ -1000,18 +998,17 @@ static struct cftype bfqio_files[] = {
+ 	},
+ 	{
+ 		.name = "bfq.sectors",
+-		.private = offsetof(struct bfq_group, stats.sectors),
+-		.seq_show = bfqg_print_stat,
++		.seq_show = bfqg_print_stat_sectors,
+ 	},
+ 	{
+ 		.name = "bfq.io_service_bytes",
+-		.private = offsetof(struct bfq_group, stats.service_bytes),
+-		.seq_show = bfqg_print_rwstat,
++		.private = (unsigned long)&blkcg_policy_bfq,
++		.seq_show = blkg_print_stat_bytes,
+ 	},
+ 	{
+ 		.name = "bfq.io_serviced",
+-		.private = offsetof(struct bfq_group, stats.serviced),
+-		.seq_show = bfqg_print_rwstat,
++		.private = (unsigned long)&blkcg_policy_bfq,
++		.seq_show = blkg_print_stat_ios,
+ 	},
+ 	{
+ 		.name = "bfq.io_service_time",
+@@ -1042,18 +1039,17 @@ static struct cftype bfqio_files[] = {
+ 	},
+ 	{
+ 		.name = "bfq.sectors_recursive",
+-		.private = offsetof(struct bfq_group, stats.sectors),
+-		.seq_show = bfqg_print_stat_recursive,
++		.seq_show = bfqg_print_stat_sectors_recursive,
+ 	},
+ 	{
+ 		.name = "bfq.io_service_bytes_recursive",
+-		.private = offsetof(struct bfq_group, stats.service_bytes),
+-		.seq_show = bfqg_print_rwstat_recursive,
++		.private = (unsigned long)&blkcg_policy_bfq,
++		.seq_show = blkg_print_stat_bytes_recursive,
+ 	},
+ 	{
+ 		.name = "bfq.io_serviced_recursive",
+-		.private = offsetof(struct bfq_group, stats.serviced),
+-		.seq_show = bfqg_print_rwstat_recursive,
++		.private = (unsigned long)&blkcg_policy_bfq,
++		.seq_show = blkg_print_stat_ios_recursive,
+ 	},
+ 	{
+ 		.name = "bfq.io_service_time_recursive",
+@@ -1099,32 +1095,35 @@ static struct cftype bfqio_files[] = {
+ 		.private = offsetof(struct bfq_group, stats.dequeue),
+ 		.seq_show = bfqg_print_stat,
+ 	},
+-	{
+-		.name = "bfq.unaccounted_time",
+-		.private = offsetof(struct bfq_group, stats.unaccounted_time),
+-		.seq_show = bfqg_print_stat,
+-	},
+ 	{ }	/* terminate */
+ };
+ 
+-static struct blkcg_policy blkcg_policy_bfq = {
+-       .dfl_cftypes            = bfqio_files_dfl,
+-       .legacy_cftypes         = bfqio_files,
+-
+-       .pd_alloc_fn            = bfq_pd_alloc,
+-       .pd_init_fn             = bfq_pd_init,
+-       .pd_offline_fn          = bfq_pd_offline,
+-       .pd_free_fn             = bfq_pd_free,
+-       .pd_reset_stats_fn      = bfq_pd_reset_stats,
+-
+-       .cpd_alloc_fn           = bfq_cpd_alloc,
+-       .cpd_init_fn            = bfq_cpd_init,
+-       .cpd_bind_fn	       = bfq_cpd_init,
+-       .cpd_free_fn            = bfq_cpd_free,
+-
++static struct cftype bfq_blkg_files[] = {
++	{
++		.name = "bfq.weight",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = bfq_io_show_weight,
++		.write = bfq_io_set_weight,
++	},
++	{} /* terminate */
+ };
+ 
+-#else
++#else /* CONFIG_BFQ_GROUP_IOSCHED */
++
++static inline void bfqg_stats_update_io_add(struct bfq_group *bfqg,
++			struct bfq_queue *bfqq, int rw) { }
++static inline void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw) { }
++static inline void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) { }
++static inline void bfqg_stats_update_completion(struct bfq_group *bfqg,
++			uint64_t start_time, uint64_t io_start_time, int rw) { }
++static inline void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
++struct bfq_group *curr_bfqg) { }
++static inline void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { }
++static inline void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
++static inline void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
++static inline void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
++static inline void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
++static inline void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
+ 
+ static void bfq_init_entity(struct bfq_entity *entity,
+ 			    struct bfq_group *bfqg)
+@@ -1146,29 +1145,22 @@ bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+ 	return bfqd->root_group;
+ }
+ 
+-static void bfq_bfqq_move(struct bfq_data *bfqd,
+-			  struct bfq_queue *bfqq,
+-			  struct bfq_entity *entity,
+-			  struct bfq_group *bfqg)
+-{
+-}
+-
+ static void bfq_end_wr_async(struct bfq_data *bfqd)
+ {
+ 	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+ }
+ 
+-static void bfq_disconnect_groups(struct bfq_data *bfqd)
+-{
+-	bfq_put_async_queues(bfqd, bfqd->root_group);
+-}
+-
+ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+                                               struct blkcg *blkcg)
+ {
+ 	return bfqd->root_group;
+ }
+ 
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
++{
++	return bfqq->bfqd->root_group;
++}
++
+ static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+ {
+ 	struct bfq_group *bfqg;
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index d1f648d..d1f3666 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -7,25 +7,26 @@
+  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+  *		      Paolo Valente <paolo.valente@unimore.it>
+  *
+- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ * Copyright (C) 2016 Paolo Valente <paolo.valente@unimore.it>
+  *
+  * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+  * file.
+  *
+- * BFQ is a proportional-share storage-I/O scheduling algorithm based on
+- * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
+- * measured in number of sectors, to processes instead of time slices. The
+- * device is not granted to the in-service process for a given time slice,
+- * but until it has exhausted its assigned budget. This change from the time
+- * to the service domain allows BFQ to distribute the device throughput
+- * among processes as desired, without any distortion due to ZBR, workload
+- * fluctuations or other factors. BFQ uses an ad hoc internal scheduler,
+- * called B-WF2Q+, to schedule processes according to their budgets. More
+- * precisely, BFQ schedules queues associated to processes. Thanks to the
+- * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to
+- * I/O-bound processes issuing sequential requests (to boost the
+- * throughput), and yet guarantee a low latency to interactive and soft
+- * real-time applications.
++ * BFQ is a proportional-share storage-I/O scheduling algorithm based
++ * on the slice-by-slice service scheme of CFQ. But BFQ assigns
++ * budgets, measured in number of sectors, to processes instead of
++ * time slices. The device is not granted to the in-service process
++ * for a given time slice, but until it has exhausted its assigned
++ * budget. This change from the time to the service domain enables BFQ
++ * to distribute the device throughput among processes as desired,
++ * without any distortion due to throughput fluctuations, or to device
++ * internal queueing. BFQ uses an ad hoc internal scheduler, called
++ * B-WF2Q+, to schedule processes according to their budgets. More
++ * precisely, BFQ schedules queues associated with processes. Thanks to
++ * the accurate policy of B-WF2Q+, BFQ can afford to assign high
++ * budgets to I/O-bound processes issuing sequential requests (to
++ * boost the throughput), and yet guarantee a low latency to
++ * interactive and soft real-time applications.
+  *
+  * BFQ is described in [1], where also a reference to the initial, more
+  * theoretical paper on BFQ can be found. The interested reader can find
+@@ -87,7 +88,6 @@ static const int bfq_stats_min_budgets = 194;
+ 
+ /* Default maximum budget values, in sectors and number of requests. */
+ static const int bfq_default_max_budget = 16 * 1024;
+-static const int bfq_max_budget_async_rq = 4;
+ 
+ /*
+  * Async to sync throughput distribution is controlled as follows:
+@@ -97,8 +97,7 @@ static const int bfq_max_budget_async_rq = 4;
+ static const int bfq_async_charge_factor = 10;
+ 
+ /* Default timeout values, in jiffies, approximating CFQ defaults. */
+-static const int bfq_timeout_sync = HZ / 8;
+-static int bfq_timeout_async = HZ / 25;
++static const int bfq_timeout = HZ / 8;
+ 
+ struct kmem_cache *bfq_pool;
+ 
+@@ -109,8 +108,9 @@ struct kmem_cache *bfq_pool;
+ #define BFQ_HW_QUEUE_THRESHOLD	4
+ #define BFQ_HW_QUEUE_SAMPLES	32
+ 
+-#define BFQQ_SEEK_THR	 (sector_t)(8 * 1024)
+-#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
++#define BFQQ_SEEK_THR	 	(sector_t)(8 * 100)
++#define BFQQ_CLOSE_THR		(sector_t)(8 * 1024)
++#define BFQQ_SEEKY(bfqq)	(hweight32(bfqq->seek_history) > 32/8)
+ 
+ /* Min samples used for peak rate estimation (for autotuning). */
+ #define BFQ_PEAK_RATE_SAMPLES	32
+@@ -141,16 +141,24 @@ struct kmem_cache *bfq_pool;
+  * The device's speed class is dynamically (re)detected in
+  * bfq_update_peak_rate() every time the estimated peak rate is updated.
+  *
+- * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0]
+- * are the reference values for a slow/fast rotational device, whereas
+- * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for
+- * a slow/fast non-rotational device. Finally, device_speed_thresh are the
+- * thresholds used to switch between speed classes.
++ * In the following definitions, R_slow[0]/R_fast[0] and
++ * T_slow[0]/T_fast[0] are the reference values for a slow/fast
++ * rotational device, whereas R_slow[1]/R_fast[1] and
++ * T_slow[1]/T_fast[1] are the reference values for a slow/fast
++ * non-rotational device. Finally, device_speed_thresh are the
++ * thresholds used to switch between speed classes. The reference
++ * rates are not the actual peak rates of the devices used as a
++ * reference, but slightly lower values. The reason for using these
++ * slightly lower values is that the peak-rate estimator tends to
++ * yield slightly lower values than the actual peak rate (it can yield
++ * the actual peak rate only if there is only one process doing I/O,
++ * and the process does sequential I/O).
++ *
+  * Both the reference peak rates and the thresholds are measured in
+  * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+  */
+-static int R_slow[2] = {1536, 10752};
+-static int R_fast[2] = {17415, 34791};
++static int R_slow[2] = {1000, 10700};
++static int R_fast[2] = {14000, 33000};
+ /*
+  * To improve readability, a conversion function is used to initialize the
+  * following arrays, which entails that they can be initialized only in a
+@@ -410,11 +418,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
+  */
+ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
+ {
+-	return
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+-		!bfqd->active_numerous_groups &&
+-#endif
+-		!bfq_differentiated_weights(bfqd);
++	return !bfq_differentiated_weights(bfqd);
+ }
+ 
+ /*
+@@ -534,9 +538,19 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
+ static unsigned long bfq_serv_to_charge(struct request *rq,
+ 					struct bfq_queue *bfqq)
+ {
+-	return blk_rq_sectors(rq) *
+-		(1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) *
+-		bfq_async_charge_factor));
++	if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
++		return blk_rq_sectors(rq);
++
++	/*
++	 * If there are no weight-raised queues, then amplify service
++	 * by just the async charge factor; otherwise amplify service
++	 * by twice the async charge factor, to further reduce latency
++	 * for weight-raised queues.
++	 */
++	if (bfqq->bfqd->wr_busy_queues == 0)
++		return blk_rq_sectors(rq) * bfq_async_charge_factor;
++
++	return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor;
+ }
+ 
+ /**
+@@ -591,12 +605,23 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+ 	dur = bfqd->RT_prod;
+ 	do_div(dur, bfqd->peak_rate);
+ 
+-	return dur;
+-}
++	/*
++	 * Limit duration between 3 and 13 seconds. Tests show that
++	 * higher values than 13 seconds often yield the opposite of
++	 * the desired result, i.e., worsen responsiveness by letting
++	 * non-interactive and non-soft-real-time applications
++	 * preserve weight raising for a too long time interval.
++	 *
++	 * On the other end, lower values than 3 seconds make it
++	 * difficult for most interactive tasks to complete their jobs
++	 * before weight-raising finishes.
++	 */
++	if (dur > msecs_to_jiffies(13000))
++		dur = msecs_to_jiffies(13000);
++	else if (dur < msecs_to_jiffies(3000))
++		dur = msecs_to_jiffies(3000);
+ 
+-static unsigned bfq_bfqq_cooperations(struct bfq_queue *bfqq)
+-{
+-	return bfqq->bic ? bfqq->bic->cooperations : 0;
++	return dur;
+ }
+ 
+ static void
+@@ -606,31 +631,11 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+ 		bfq_mark_bfqq_idle_window(bfqq);
+ 	else
+ 		bfq_clear_bfqq_idle_window(bfqq);
++
+ 	if (bic->saved_IO_bound)
+ 		bfq_mark_bfqq_IO_bound(bfqq);
+ 	else
+ 		bfq_clear_bfqq_IO_bound(bfqq);
+-	/* Assuming that the flag in_large_burst is already correctly set */
+-	if (bic->wr_time_left && bfqq->bfqd->low_latency &&
+-	    !bfq_bfqq_in_large_burst(bfqq) &&
+-	    bic->cooperations < bfqq->bfqd->bfq_coop_thresh) {
+-		/*
+-		 * Start a weight raising period with the duration given by
+-		 * the raising_time_left snapshot.
+-		 */
+-		if (bfq_bfqq_busy(bfqq))
+-			bfqq->bfqd->wr_busy_queues++;
+-		bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff;
+-		bfqq->wr_cur_max_time = bic->wr_time_left;
+-		bfqq->last_wr_start_finish = jiffies;
+-		bfqq->entity.prio_changed = 1;
+-	}
+-	/*
+-	 * Clear wr_time_left to prevent bfq_bfqq_save_state() from
+-	 * getting confused about the queue's need of a weight-raising
+-	 * period.
+-	 */
+-	bic->wr_time_left = 0;
+ }
+ 
+ static int bfqq_process_refs(struct bfq_queue *bfqq)
+@@ -640,7 +645,7 @@ static int bfqq_process_refs(struct bfq_queue *bfqq)
+ 	lockdep_assert_held(bfqq->bfqd->queue->queue_lock);
+ 
+ 	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
+-	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
++	process_refs = bfqq->ref - io_refs - bfqq->entity.on_st;
+ 	BUG_ON(process_refs < 0);
+ 	return process_refs;
+ }
+@@ -655,6 +660,7 @@ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 		hlist_del_init(&item->burst_list_node);
+ 	hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+ 	bfqd->burst_size = 1;
++	bfqd->burst_parent_entity = bfqq->entity.parent;
+ }
+ 
+ /* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
+@@ -663,6 +669,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 	/* Increment burst size to take into account also bfqq */
+ 	bfqd->burst_size++;
+ 
++	bfq_log_bfqq(bfqd, bfqq, "add_to_burst %d", bfqd->burst_size);
++
++	BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh);
++
+ 	if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
+ 		struct bfq_queue *pos, *bfqq_item;
+ 		struct hlist_node *n;
+@@ -672,15 +682,19 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 		 * other to consider this burst as large.
+ 		 */
+ 		bfqd->large_burst = true;
++		bfq_log_bfqq(bfqd, bfqq, "add_to_burst: large burst started");
+ 
+ 		/*
+ 		 * We can now mark all queues in the burst list as
+ 		 * belonging to a large burst.
+ 		 */
+ 		hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
+-				     burst_list_node)
++				     burst_list_node) {
+ 		        bfq_mark_bfqq_in_large_burst(bfqq_item);
++			bfq_log_bfqq(bfqd, bfqq_item, "marked in large burst");
++		}
+ 		bfq_mark_bfqq_in_large_burst(bfqq);
++		bfq_log_bfqq(bfqd, bfqq, "marked in large burst");
+ 
+ 		/*
+ 		 * From now on, and until the current burst finishes, any
+@@ -692,67 +706,79 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 		hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
+ 					  burst_list_node)
+ 			hlist_del_init(&pos->burst_list_node);
+-	} else /* burst not yet large: add bfqq to the burst list */
++	} else /*
++		* Burst not yet large: add bfqq to the burst list. Do
++		* not increment the ref counter for bfqq, because bfqq
++		* is removed from the burst list before freeing bfqq
++		* in put_queue.
++		*/
+ 		hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+ }
+ 
+ /*
+- * If many queues happen to become active shortly after each other, then,
+- * to help the processes associated to these queues get their job done as
+- * soon as possible, it is usually better to not grant either weight-raising
+- * or device idling to these queues. In this comment we describe, firstly,
+- * the reasons why this fact holds, and, secondly, the next function, which
+- * implements the main steps needed to properly mark these queues so that
+- * they can then be treated in a different way.
++ * If many queues belonging to the same group happen to be created
++ * shortly after each other, then the processes associated with these
++ * queues have typically a common goal. In particular, bursts of queue
++ * creations are usually caused by services or applications that spawn
++ * many parallel threads/processes. Examples are systemd during boot,
++ * or git grep. To help these processes get their job done as soon as
++ * possible, it is usually better to not grant either weight-raising
++ * or device idling to their queues.
+  *
+- * As for the terminology, we say that a queue becomes active, i.e.,
+- * switches from idle to backlogged, either when it is created (as a
+- * consequence of the arrival of an I/O request), or, if already existing,
+- * when a new request for the queue arrives while the queue is idle.
+- * Bursts of activations, i.e., activations of different queues occurring
+- * shortly after each other, are typically caused by services or applications
+- * that spawn or reactivate many parallel threads/processes. Examples are
+- * systemd during boot or git grep.
++ * In this comment we describe, firstly, the reasons why this fact
++ * holds, and, secondly, the next function, which implements the main
++ * steps needed to properly mark these queues so that they can then be
++ * treated in a different way.
+  *
+- * These services or applications benefit mostly from a high throughput:
+- * the quicker the requests of the activated queues are cumulatively served,
+- * the sooner the target job of these queues gets completed. As a consequence,
+- * weight-raising any of these queues, which also implies idling the device
+- * for it, is almost always counterproductive: in most cases it just lowers
+- * throughput.
++ * The above services or applications benefit mostly from a high
++ * throughput: the quicker the requests of the activated queues are
++ * cumulatively served, the sooner the target job of these queues gets
++ * completed. As a consequence, weight-raising any of these queues,
++ * which also implies idling the device for it, is almost always
++ * counterproductive. In most cases it just lowers throughput.
+  *
+- * On the other hand, a burst of activations may be also caused by the start
+- * of an application that does not consist in a lot of parallel I/O-bound
+- * threads. In fact, with a complex application, the burst may be just a
+- * consequence of the fact that several processes need to be executed to
+- * start-up the application. To start an application as quickly as possible,
+- * the best thing to do is to privilege the I/O related to the application
+- * with respect to all other I/O. Therefore, the best strategy to start as
+- * quickly as possible an application that causes a burst of activations is
+- * to weight-raise all the queues activated during the burst. This is the
++ * On the other hand, a burst of queue creations may be caused also by
++ * the start of an application that does not consist of a lot of
++ * parallel I/O-bound threads. In fact, with a complex application,
++ * several short processes may need to be executed to start-up the
++ * application. In this respect, to start an application as quickly as
++ * possible, the best thing to do is in any case to privilege the I/O
++ * related to the application with respect to all other
++ * I/O. Therefore, the best strategy to start as quickly as possible
++ * an application that causes a burst of queue creations is to
++ * weight-raise all the queues created during the burst. This is the
+  * exact opposite of the best strategy for the other type of bursts.
+  *
+- * In the end, to take the best action for each of the two cases, the two
+- * types of bursts need to be distinguished. Fortunately, this seems
+- * relatively easy to do, by looking at the sizes of the bursts. In
+- * particular, we found a threshold such that bursts with a larger size
+- * than that threshold are apparently caused only by services or commands
+- * such as systemd or git grep. For brevity, hereafter we call just 'large'
+- * these bursts. BFQ *does not* weight-raise queues whose activations occur
+- * in a large burst. In addition, for each of these queues BFQ performs or
+- * does not perform idling depending on which choice boosts the throughput
+- * most. The exact choice depends on the device and request pattern at
++ * In the end, to take the best action for each of the two cases, the
++ * two types of bursts need to be distinguished. Fortunately, this
++ * seems relatively easy, by looking at the sizes of the bursts. In
++ * particular, we found a threshold such that only bursts with a
++ * larger size than that threshold are apparently caused by
++ * services or commands such as systemd or git grep. For brevity,
++ * hereafter we call just 'large' these bursts. BFQ *does not*
++ * weight-raise queues whose creation occurs in a large burst. In
++ * addition, for each of these queues BFQ performs or does not perform
++ * idling depending on which choice boosts the throughput more. The
++ * exact choice depends on the device and request pattern at
+  * hand.
+  *
+- * Turning back to the next function, it implements all the steps needed
+- * to detect the occurrence of a large burst and to properly mark all the
+- * queues belonging to it (so that they can then be treated in a different
+- * way). This goal is achieved by maintaining a special "burst list" that
+- * holds, temporarily, the queues that belong to the burst in progress. The
+- * list is then used to mark these queues as belonging to a large burst if
+- * the burst does become large. The main steps are the following.
++ * Unfortunately, false positives may occur while an interactive task
++ * is starting (e.g., an application is being started). The
++ * consequence is that the queues associated with the task do not
++ * enjoy weight raising as expected. Fortunately these false positives
++ * are very rare. They typically occur if some service happens to
++ * start doing I/O exactly when the interactive task starts.
++ *
++ * Turning back to the next function, it implements all the steps
++ * needed to detect the occurrence of a large burst and to properly
++ * mark all the queues belonging to it (so that they can then be
++ * treated in a different way). This goal is achieved by maintaining a
++ * "burst list" that holds, temporarily, the queues that belong to the
++ * burst in progress. The list is then used to mark these queues as
++ * belonging to a large burst if the burst does become large. The main
++ * steps are the following.
+  *
+- * . when the very first queue is activated, the queue is inserted into the
++ * . when the very first queue is created, the queue is inserted into the
+  *   list (as it could be the first queue in a possible burst)
+  *
+  * . if the current burst has not yet become large, and a queue Q that does
+@@ -773,13 +799,13 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+  *
+  *     . the device enters a large-burst mode
+  *
+- * . if a queue Q that does not belong to the burst is activated while
++ * . if a queue Q that does not belong to the burst is created while
+  *   the device is in large-burst mode and shortly after the last time
+  *   at which a queue either entered the burst list or was marked as
+  *   belonging to the current large burst, then Q is immediately marked
+  *   as belonging to a large burst.
+  *
+- * . if a queue Q that does not belong to the burst is activated a while
++ * . if a queue Q that does not belong to the burst is created a while
+  *   later, i.e., not shortly after, than the last time at which a queue
+  *   either entered the burst list or was marked as belonging to the
+  *   current large burst, then the current burst is deemed as finished and:
+@@ -792,52 +818,44 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+  *          in a possible new burst (then the burst list contains just Q
+  *          after this step).
+  */
+-static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+-			     bool idle_for_long_time)
++static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ 	/*
+-	 * If bfqq happened to be activated in a burst, but has been idle
+-	 * for at least as long as an interactive queue, then we assume
+-	 * that, in the overall I/O initiated in the burst, the I/O
+-	 * associated to bfqq is finished. So bfqq does not need to be
+-	 * treated as a queue belonging to a burst anymore. Accordingly,
+-	 * we reset bfqq's in_large_burst flag if set, and remove bfqq
+-	 * from the burst list if it's there. We do not decrement instead
+-	 * burst_size, because the fact that bfqq does not need to belong
+-	 * to the burst list any more does not invalidate the fact that
+-	 * bfqq may have been activated during the current burst.
+-	 */
+-	if (idle_for_long_time) {
+-		hlist_del_init(&bfqq->burst_list_node);
+-		bfq_clear_bfqq_in_large_burst(bfqq);
+-	}
+-
+-	/*
+ 	 * If bfqq is already in the burst list or is part of a large
+-	 * burst, then there is nothing else to do.
++	 * burst, or finally has just been split, then there is
++	 * nothing else to do.
+ 	 */
+ 	if (!hlist_unhashed(&bfqq->burst_list_node) ||
+-	    bfq_bfqq_in_large_burst(bfqq))
++	    bfq_bfqq_in_large_burst(bfqq) ||
++	    time_is_after_eq_jiffies(bfqq->split_time +
++				     msecs_to_jiffies(10)))
+ 		return;
+ 
+ 	/*
+-	 * If bfqq's activation happens late enough, then the current
+-	 * burst is finished, and related data structures must be reset.
++	 * If bfqq's creation happens late enough, or bfqq belongs to
++	 * a different group than the burst group, then the current
++	 * burst is finished, and related data structures must be
++	 * reset.
+ 	 *
+-	 * In this respect, consider the special case where bfqq is the very
+-	 * first queue being activated. In this case, last_ins_in_burst is
+-	 * not yet significant when we get here. But it is easy to verify
+-	 * that, whether or not the following condition is true, bfqq will
+-	 * end up being inserted into the burst list. In particular the
+-	 * list will happen to contain only bfqq. And this is exactly what
+-	 * has to happen, as bfqq may be the first queue in a possible
++	 * In this respect, consider the special case where bfqq is
++	 * the very first queue created after BFQ is selected for this
++	 * device. In this case, last_ins_in_burst and
++	 * burst_parent_entity are not yet significant when we get
++	 * here. But it is easy to verify that, whether or not the
++	 * following condition is true, bfqq will end up being
++	 * inserted into the burst list. In particular the list will
++	 * happen to contain only bfqq. And this is exactly what has
++	 * to happen, as bfqq may be the first queue of the first
+ 	 * burst.
+ 	 */
+ 	if (time_is_before_jiffies(bfqd->last_ins_in_burst +
+-	    bfqd->bfq_burst_interval)) {
++	    bfqd->bfq_burst_interval) ||
++	    bfqq->entity.parent != bfqd->burst_parent_entity) {
+ 		bfqd->large_burst = false;
+ 		bfq_reset_burst_list(bfqd, bfqq);
+-		return;
++		bfq_log_bfqq(bfqd, bfqq,
++			"handle_burst: late activation or different group");
++		goto end;
+ 	}
+ 
+ 	/*
+@@ -846,8 +864,9 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	 * bfqq as belonging to this large burst immediately.
+ 	 */
+ 	if (bfqd->large_burst) {
++		bfq_log_bfqq(bfqd, bfqq, "handle_burst: marked in burst");
+ 		bfq_mark_bfqq_in_large_burst(bfqq);
+-		return;
++		goto end;
+ 	}
+ 
+ 	/*
+@@ -856,25 +875,492 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	 * queue. Then we add bfqq to the burst.
+ 	 */
+ 	bfq_add_to_burst(bfqd, bfqq);
++end:
++	/*
++	 * At this point, bfqq either has been added to the current
++	 * burst or has caused the current burst to terminate and a
++	 * possible new burst to start. In particular, in the second
++	 * case, bfqq has become the first queue in the possible new
++	 * burst.  In both cases last_ins_in_burst needs to be moved
++	 * forward.
++	 */
++	bfqd->last_ins_in_burst = jiffies;
++
++}
++
++static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	return entity->budget - entity->service;
++}
++
++/*
++ * If enough samples have been computed, return the current max budget
++ * stored in bfqd, which is dynamically updated according to the
++ * estimated disk peak rate; otherwise return the default max budget
++ */
++static int bfq_max_budget(struct bfq_data *bfqd)
++{
++	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
++		return bfq_default_max_budget;
++	else
++		return bfqd->bfq_max_budget;
++}
++
++/*
++ * Return min budget, which is a fraction of the current or default
++ * max budget (trying with 1/32)
++ */
++static int bfq_min_budget(struct bfq_data *bfqd)
++{
++	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
++		return bfq_default_max_budget / 32;
++	else
++		return bfqd->bfq_max_budget / 32;
++}
++
++static void bfq_bfqq_expire(struct bfq_data *bfqd,
++			    struct bfq_queue *bfqq,
++			    bool compensate,
++			    enum bfqq_expiration reason);
++
++/*
++ * The next function, invoked after the input queue bfqq switches from
++ * idle to busy, updates the budget of bfqq. The function also tells
++ * whether the in-service queue should be expired, by returning
++ * true. The purpose of expiring the in-service queue is to give bfqq
++ * the chance to possibly preempt the in-service queue, and the reason
++ * for preempting the in-service queue is to achieve one of the two
++ * goals below.
++ *
++ * 1. Guarantee to bfqq its reserved bandwidth even if bfqq has
++ * expired because it has remained idle. In particular, bfqq may have
++ * expired for one of the following two reasons:
++ *
++ * - BFQ_BFQQ_NO_MORE_REQUEST bfqq did not enjoy any device idling and
++ *   did not make it to issue a new request before its last request
++ *   was served;
++ *
++ * - BFQ_BFQQ_TOO_IDLE bfqq did enjoy device idling, but did not issue
++ *   a new request before the expiration of the idling-time.
++ *
++ * Even if bfqq has expired for one of the above reasons, the process
++ * associated with the queue may be however issuing requests greedily,
++ * and thus be sensitive to the bandwidth it receives (bfqq may have
++ * remained idle for other reasons: CPU high load, bfqq not enjoying
++ * idling, I/O throttling somewhere in the path from the process to
++ * the I/O scheduler, ...). But if, after every expiration for one of
++ * the above two reasons, bfqq has to wait for the service of at least
++ * one full budget of another queue before being served again, then
++ * bfqq is likely to get a much lower bandwidth or resource time than
++ * its reserved ones. To address this issue, two countermeasures need
++ * to be taken.
++ *
++ * First, the budget and the timestamps of bfqq need to be updated in
++ * a special way on bfqq reactivation: they need to be updated as if
++ * bfqq did not remain idle and did not expire. In fact, if they are
++ * computed as if bfqq expired and remained idle until reactivation,
++ * then the process associated with bfqq is treated as if, instead of
++ * being greedy, it stopped issuing requests when bfqq remained idle,
++ * and restarts issuing requests only on this reactivation. In other
++ * words, the scheduler does not help the process recover the "service
++ * hole" between bfqq expiration and reactivation. As a consequence,
++ * the process receives a lower bandwidth than its reserved one. In
++ * contrast, to recover this hole, the budget must be updated as if
++ * bfqq was not expired at all before this reactivation, i.e., it must
++ * be set to the value of the remaining budget when bfqq was
++ * expired. Along the same line, timestamps need to be assigned the
++ * value they had the last time bfqq was selected for service, i.e.,
++ * before last expiration. Thus timestamps need to be back-shifted
++ * with respect to their normal computation (see [1] for more details
++ * on this tricky aspect).
++ *
++ * Secondly, to allow the process to recover the hole, the in-service
++ * queue must be expired too, to give bfqq the chance to preempt it
++ * immediately. In fact, if bfqq has to wait for a full budget of the
++ * in-service queue to be completed, then it may become impossible to
++ * let the process recover the hole, even if the back-shifted
++ * timestamps of bfqq are lower than those of the in-service queue. If
++ * this happens for most or all of the holes, then the process may not
++ * receive its reserved bandwidth. In this respect, it is worth noting
++ * that, being the service of outstanding requests unpreemptible, a
++ * little fraction of the holes may however be unrecoverable, thereby
++ * causing a little loss of bandwidth.
++ *
++ * The last important point is detecting whether bfqq does need this
++ * bandwidth recovery. In this respect, the next function deems the
++ * process associated with bfqq greedy, and thus allows it to recover
++ * the hole, if: 1) the process is waiting for the arrival of a new
++ * request (which implies that bfqq expired for one of the above two
++ * reasons), and 2) such a request has arrived soon. The first
++ * condition is controlled through the flag non_blocking_wait_rq,
++ * while the second through the flag arrived_in_time. If both
++ * conditions hold, then the function computes the budget in the
++ * above-described special way, and signals that the in-service queue
++ * should be expired. Timestamp back-shifting is done later in
++ * __bfq_activate_entity.
++ *
++ * 2. Reduce latency. Even if timestamps are not backshifted to let
++ * the process associated with bfqq recover a service hole, bfqq may
++ * however happen to have, after being (re)activated, a lower finish
++ * timestamp than the in-service queue.  That is, the next budget of
++ * bfqq may have to be completed before the one of the in-service
++ * queue. If this is the case, then preempting the in-service queue
++ * allows this goal to be achieved, apart from the unpreemptible,
++ * outstanding requests mentioned above.
++ *
++ * Unfortunately, regardless of which of the above two goals one wants
++ * to achieve, service trees need first to be updated to know whether
++ * the in-service queue must be preempted. To have service trees
++ * correctly updated, the in-service queue must be expired and
++ * rescheduled, and bfqq must be scheduled too. This is one of the
++ * most costly operations (in future versions, the scheduling
++ * mechanism may be re-designed in such a way to make it possible to
++ * know whether preemption is needed without needing to update service
++ * trees). In addition, queue preemptions almost always cause random
++ * I/O, and thus loss of throughput. Because of these facts, the next
++ * function adopts the following simple scheme to avoid both costly
++ * operations and too frequent preemptions: it requests the expiration
++ * of the in-service queue (unconditionally) only for queues that need
++ * to recover a hole, or that either are weight-raised or deserve to
++ * be weight-raised.
++ */
++static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
++						struct bfq_queue *bfqq,
++						bool arrived_in_time,
++						bool wr_or_deserves_wr)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time) {
++		/*
++		 * We do not clear the flag non_blocking_wait_rq here, as
++		 * the latter is used in bfq_activate_bfqq to signal
++		 * that timestamps need to be back-shifted (and is
++		 * cleared right after).
++		 */
++
++		/*
++		 * In next assignment we rely on that either
++		 * entity->service or entity->budget are not updated
++		 * on expiration if bfqq is empty (see
++		 * __bfq_bfqq_recalc_budget). Thus both quantities
++		 * remain unchanged after such an expiration, and the
++		 * following statement therefore assigns to
++		 * entity->budget the remaining budget on such an
++		 * expiration. For clarity, entity->service is not
++		 * updated on expiration in any case, and, in normal
++		 * operation, is reset only when bfqq is selected for
++		 * service (see bfq_get_next_queue).
++		 */
++		entity->budget = min_t(unsigned long,
++				       bfq_bfqq_budget_left(bfqq),
++				       bfqq->max_budget);
++
++		BUG_ON(entity->budget < 0);
++		return true;
++	}
++
++	entity->budget = max_t(unsigned long, bfqq->max_budget,
++			       bfq_serv_to_charge(bfqq->next_rq,bfqq));
++	BUG_ON(entity->budget < 0);
++
++	bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
++	return wr_or_deserves_wr;
++}
++
++static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
++					     struct bfq_queue *bfqq,
++					     unsigned int old_wr_coeff,
++					     bool wr_or_deserves_wr,
++					     bool interactive,
++					     bool in_burst,
++					     bool soft_rt)
++{
++	if (old_wr_coeff == 1 && wr_or_deserves_wr) {
++		/* start a weight-raising period */
++		bfqq->wr_coeff = bfqd->bfq_wr_coeff;
++		if (interactive) /* update wr duration */
++			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++		else
++			bfqq->wr_cur_max_time =
++				bfqd->bfq_wr_rt_max_time;
++		/*
++		 * If needed, further reduce budget to make sure it is
++		 * close to bfqq's backlog, so as to reduce the
++		 * scheduling-error component due to a too large
++		 * budget. Do not care about throughput consequences,
++		 * but only about latency. Finally, do not assign a
++		 * too small budget either, to avoid increasing
++		 * latency by causing too frequent expirations.
++		 */
++		bfqq->entity.budget = min_t(unsigned long,
++					    bfqq->entity.budget,
++					    2 * bfq_min_budget(bfqd));
++
++		bfq_log_bfqq(bfqd, bfqq,
++			     "wrais starting at %lu, rais_max_time %u",
++			     jiffies,
++			     jiffies_to_msecs(bfqq->wr_cur_max_time));
++	} else if (old_wr_coeff > 1) {
++		if (interactive) /* update wr duration */
++			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
++		else if (in_burst) {
++			bfqq->wr_coeff = 1;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "wrais ending at %lu, rais_max_time %u",
++				     jiffies,
++				     jiffies_to_msecs(bfqq->
++						      wr_cur_max_time));
++		} else if (time_before(
++				   bfqq->last_wr_start_finish +
++				   bfqq->wr_cur_max_time,
++				   jiffies +
++				   bfqd->bfq_wr_rt_max_time) &&
++			   soft_rt) {
++			/*
++			 * The remaining weight-raising time is lower
++			 * than bfqd->bfq_wr_rt_max_time, which means
++			 * that the application is enjoying weight
++			 * raising either because deemed soft-rt in
++			 * the near past, or because deemed interactive
++			 * a long ago.
++			 * In both cases, resetting now the current
++			 * remaining weight-raising time for the
++			 * application to the weight-raising duration
++			 * for soft rt applications would not cause any
++			 * latency increase for the application (as the
++			 * new duration would be higher than the
++			 * remaining time).
++			 *
++			 * In addition, the application is now meeting
++			 * the requirements for being deemed soft rt.
++			 * In the end we can correctly and safely
++			 * (re)charge the weight-raising duration for
++			 * the application with the weight-raising
++			 * duration for soft rt applications.
++			 *
++			 * In particular, doing this recharge now, i.e.,
++			 * before the weight-raising period for the
++			 * application finishes, reduces the probability
++			 * of the following negative scenario:
++			 * 1) the weight of a soft rt application is
++			 *    raised at startup (as for any newly
++			 *    created application),
++			 * 2) since the application is not interactive,
++			 *    at a certain time weight-raising is
++			 *    stopped for the application,
++			 * 3) at that time the application happens to
++			 *    still have pending requests, and hence
++			 *    is destined to not have a chance to be
++			 *    deemed soft rt before these requests are
++			 *    completed (see the comments to the
++			 *    function bfq_bfqq_softrt_next_start()
++			 *    for details on soft rt detection),
++			 * 4) these pending requests experience a high
++			 *    latency because the application is not
++			 *    weight-raised while they are pending.
++			 */
++			bfqq->last_wr_start_finish = jiffies;
++			bfqq->wr_cur_max_time =
++				bfqd->bfq_wr_rt_max_time;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "switching to soft_rt wr, or "
++				     " just moving forward duration");
++		}
++	}
++}
++
++static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd,
++					struct bfq_queue *bfqq)
++{
++	return bfqq->dispatched == 0 &&
++		time_is_before_jiffies(
++			bfqq->budget_timeout +
++			bfqd->bfq_wr_min_idle_time);
++}
++
++static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
++					     struct bfq_queue *bfqq,
++					     int old_wr_coeff,
++					     struct request *rq,
++					     bool *interactive)
++{
++	bool soft_rt, in_burst,	wr_or_deserves_wr,
++		bfqq_wants_to_preempt,
++		idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq),
++		/*
++		 * See the comments on
++		 * bfq_bfqq_update_budg_for_activation for
++		 * details on the usage of the next variable.
++		 */
++		arrived_in_time = time_is_after_jiffies(
++			RQ_BIC(rq)->ttime.last_end_request +
++			bfqd->bfq_slice_idle * 3);
++
++	bfq_log_bfqq(bfqd, bfqq,
++		     "bfq_add_request non-busy: "
++		     "jiffies %lu, in_time %d, idle_long %d busyw %d "
++		     "wr_coeff %u",
++		     jiffies, arrived_in_time,
++		     idle_for_long_time,
++		     bfq_bfqq_non_blocking_wait_rq(bfqq),
++		     old_wr_coeff);
++
++	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
++
++	BUG_ON(bfqq == bfqd->in_service_queue);
++	bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq,
++				 rq->cmd_flags);
++
++	/*
++	 * bfqq deserves to be weight-raised if:
++	 * - it is sync,
++	 * - it does not belong to a large burst,
++	 * - it has been idle for enough time or is soft real-time,
++	 * - is linked to a bfq_io_cq (it is not shared in any sense)
++	 */
++	in_burst = bfq_bfqq_in_large_burst(bfqq);
++	soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
++		!in_burst &&
++		time_is_before_jiffies(bfqq->soft_rt_next_start);
++	*interactive =
++		!in_burst &&
++		idle_for_long_time;
++	wr_or_deserves_wr = bfqd->low_latency &&
++		(bfqq->wr_coeff > 1 ||
++		 (bfq_bfqq_sync(bfqq) &&
++		  bfqq->bic && (*interactive || soft_rt)));
++
++	bfq_log_bfqq(bfqd, bfqq,
++		     "bfq_add_request: "
++		     "in_burst %d, "
++		     "soft_rt %d (next %lu), inter %d, bic %p",
++		     bfq_bfqq_in_large_burst(bfqq), soft_rt,
++		     bfqq->soft_rt_next_start,
++		     *interactive,
++		     bfqq->bic);
++
++	/*
++	 * Using the last flag, update budget and check whether bfqq
++	 * may want to preempt the in-service queue.
++	 */
++	bfqq_wants_to_preempt =
++		bfq_bfqq_update_budg_for_activation(bfqd, bfqq,
++						    arrived_in_time,
++						    wr_or_deserves_wr);
++
++	/*
++	 * If bfqq happened to be activated in a burst, but has been
++	 * idle for much more than an interactive queue, then we
++	 * assume that, in the overall I/O initiated in the burst, the
++	 * I/O associated with bfqq is finished. So bfqq does not need
++	 * to be treated as a queue belonging to a burst
++	 * anymore. Accordingly, we reset bfqq's in_large_burst flag
++	 * if set, and remove bfqq from the burst list if it's
++	 * there. We do not decrement burst_size, because the fact
++	 * that bfqq does not need to belong to the burst list any
++	 * more does not invalidate the fact that bfqq was created in
++	 * a burst.
++	 */
++	if (likely(!bfq_bfqq_just_created(bfqq)) &&
++	    idle_for_long_time &&
++	    time_is_before_jiffies(
++		    bfqq->budget_timeout +
++		    msecs_to_jiffies(10000))) {
++		hlist_del_init(&bfqq->burst_list_node);
++		bfq_clear_bfqq_in_large_burst(bfqq);
++	}
++
++	bfq_clear_bfqq_just_created(bfqq);
++
++	if (!bfq_bfqq_IO_bound(bfqq)) {
++		if (arrived_in_time) {
++			bfqq->requests_within_timer++;
++			if (bfqq->requests_within_timer >=
++			    bfqd->bfq_requests_within_timer)
++				bfq_mark_bfqq_IO_bound(bfqq);
++		} else
++			bfqq->requests_within_timer = 0;
++		bfq_log_bfqq(bfqd, bfqq, "requests in time %d",
++			     bfqq->requests_within_timer);
++	}
++
++	if (bfqd->low_latency) {
++		if (unlikely(time_is_after_jiffies(bfqq->split_time)))
++			/* wraparound */
++			bfqq->split_time =
++				jiffies - bfqd->bfq_wr_min_idle_time - 1;
++
++		if (time_is_before_jiffies(bfqq->split_time +
++					   bfqd->bfq_wr_min_idle_time)) {
++			bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq,
++							 old_wr_coeff,
++							 wr_or_deserves_wr,
++							 *interactive,
++							 in_burst,
++							 soft_rt);
++
++			if (old_wr_coeff != bfqq->wr_coeff)
++				bfqq->entity.prio_changed = 1;
++		}
++	}
++
++	bfqq->last_idle_bklogged = jiffies;
++	bfqq->service_from_backlogged = 0;
++	bfq_clear_bfqq_softrt_update(bfqq);
++
++	bfq_add_bfqq_busy(bfqd, bfqq);
++
++	/*
++	 * Expire in-service queue only if preemption may be needed
++	 * for guarantees. In this respect, the function
++	 * next_queue_may_preempt just checks a simple, necessary
++	 * condition, and not a sufficient condition based on
++	 * timestamps. In fact, for the latter condition to be
++	 * evaluated, timestamps would need first to be updated, and
++	 * this operation is quite costly (see the comments on the
++	 * function bfq_bfqq_update_budg_for_activation).
++	 */
++	if (bfqd->in_service_queue && bfqq_wants_to_preempt &&
++	    bfqd->in_service_queue->wr_coeff == 1 &&
++	    next_queue_may_preempt(bfqd)) {
++		struct bfq_queue *in_serv =
++			bfqd->in_service_queue;
++		BUG_ON(in_serv == bfqq);
++
++		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
++				false, BFQ_BFQQ_PREEMPTED);
++		BUG_ON(in_serv->entity.budget < 0);
++	}
+ }
+ 
+ static void bfq_add_request(struct request *rq)
+ {
+ 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+-	struct bfq_entity *entity = &bfqq->entity;
+ 	struct bfq_data *bfqd = bfqq->bfqd;
+ 	struct request *next_rq, *prev;
+-	unsigned long old_wr_coeff = bfqq->wr_coeff;
++	unsigned int old_wr_coeff = bfqq->wr_coeff;
+ 	bool interactive = false;
+ 
+-	bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
++	bfq_log_bfqq(bfqd, bfqq, "add_request: size %u %s",
++		     blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A");
++
++	if (bfqq->wr_coeff > 1) /* queue is being weight-raised */
++		bfq_log_bfqq(bfqd, bfqq,
++			"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
++			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
++			jiffies_to_msecs(bfqq->wr_cur_max_time),
++			bfqq->wr_coeff,
++			bfqq->entity.weight, bfqq->entity.orig_weight);
++
+ 	bfqq->queued[rq_is_sync(rq)]++;
+ 	bfqd->queued++;
+ 
+ 	elv_rb_add(&bfqq->sort_list, rq);
+ 
+ 	/*
+-	 * Check if this request is a better next-serve candidate.
++	 * Check if this request is a better next-to-serve candidate.
+ 	 */
+ 	prev = bfqq->next_rq;
+ 	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
+@@ -887,160 +1373,10 @@ static void bfq_add_request(struct request *rq)
+ 	if (prev != bfqq->next_rq)
+ 		bfq_pos_tree_add_move(bfqd, bfqq);
+ 
+-	if (!bfq_bfqq_busy(bfqq)) {
+-		bool soft_rt, coop_or_in_burst,
+-		     idle_for_long_time = time_is_before_jiffies(
+-						bfqq->budget_timeout +
+-						bfqd->bfq_wr_min_idle_time);
+-
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+-		bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq,
+-					 rq->cmd_flags);
+-#endif
+-		if (bfq_bfqq_sync(bfqq)) {
+-			bool already_in_burst =
+-			   !hlist_unhashed(&bfqq->burst_list_node) ||
+-			   bfq_bfqq_in_large_burst(bfqq);
+-			bfq_handle_burst(bfqd, bfqq, idle_for_long_time);
+-			/*
+-			 * If bfqq was not already in the current burst,
+-			 * then, at this point, bfqq either has been
+-			 * added to the current burst or has caused the
+-			 * current burst to terminate. In particular, in
+-			 * the second case, bfqq has become the first
+-			 * queue in a possible new burst.
+-			 * In both cases last_ins_in_burst needs to be
+-			 * moved forward.
+-			 */
+-			if (!already_in_burst)
+-				bfqd->last_ins_in_burst = jiffies;
+-		}
+-
+-		coop_or_in_burst = bfq_bfqq_in_large_burst(bfqq) ||
+-			bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh;
+-		soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+-			!coop_or_in_burst &&
+-			time_is_before_jiffies(bfqq->soft_rt_next_start);
+-		interactive = !coop_or_in_burst && idle_for_long_time;
+-		entity->budget = max_t(unsigned long, bfqq->max_budget,
+-				       bfq_serv_to_charge(next_rq, bfqq));
+-
+-		if (!bfq_bfqq_IO_bound(bfqq)) {
+-			if (time_before(jiffies,
+-					RQ_BIC(rq)->ttime.last_end_request +
+-					bfqd->bfq_slice_idle)) {
+-				bfqq->requests_within_timer++;
+-				if (bfqq->requests_within_timer >=
+-				    bfqd->bfq_requests_within_timer)
+-					bfq_mark_bfqq_IO_bound(bfqq);
+-			} else
+-				bfqq->requests_within_timer = 0;
+-		}
+-
+-		if (!bfqd->low_latency)
+-			goto add_bfqq_busy;
+-
+-		if (bfq_bfqq_just_split(bfqq))
+-			goto set_prio_changed;
+-
+-		/*
+-		 * If the queue:
+-		 * - is not being boosted,
+-		 * - has been idle for enough time,
+-		 * - is not a sync queue or is linked to a bfq_io_cq (it is
+-		 *   shared "for its nature" or it is not shared and its
+-		 *   requests have not been redirected to a shared queue)
+-		 * start a weight-raising period.
+-		 */
+-		if (old_wr_coeff == 1 && (interactive || soft_rt) &&
+-		    (!bfq_bfqq_sync(bfqq) || bfqq->bic)) {
+-			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+-			if (interactive)
+-				bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+-			else
+-				bfqq->wr_cur_max_time =
+-					bfqd->bfq_wr_rt_max_time;
+-			bfq_log_bfqq(bfqd, bfqq,
+-				     "wrais starting at %lu, rais_max_time %u",
+-				     jiffies,
+-				     jiffies_to_msecs(bfqq->wr_cur_max_time));
+-		} else if (old_wr_coeff > 1) {
+-			if (interactive)
+-				bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+-			else if (coop_or_in_burst ||
+-				 (bfqq->wr_cur_max_time ==
+-				  bfqd->bfq_wr_rt_max_time &&
+-				  !soft_rt)) {
+-				bfqq->wr_coeff = 1;
+-				bfq_log_bfqq(bfqd, bfqq,
+-					"wrais ending at %lu, rais_max_time %u",
+-					jiffies,
+-					jiffies_to_msecs(bfqq->
+-						wr_cur_max_time));
+-			} else if (time_before(
+-					bfqq->last_wr_start_finish +
+-					bfqq->wr_cur_max_time,
+-					jiffies +
+-					bfqd->bfq_wr_rt_max_time) &&
+-				   soft_rt) {
+-				/*
+-				 *
+-				 * The remaining weight-raising time is lower
+-				 * than bfqd->bfq_wr_rt_max_time, which means
+-				 * that the application is enjoying weight
+-				 * raising either because deemed soft-rt in
+-				 * the near past, or because deemed interactive
+-				 * a long ago.
+-				 * In both cases, resetting now the current
+-				 * remaining weight-raising time for the
+-				 * application to the weight-raising duration
+-				 * for soft rt applications would not cause any
+-				 * latency increase for the application (as the
+-				 * new duration would be higher than the
+-				 * remaining time).
+-				 *
+-				 * In addition, the application is now meeting
+-				 * the requirements for being deemed soft rt.
+-				 * In the end we can correctly and safely
+-				 * (re)charge the weight-raising duration for
+-				 * the application with the weight-raising
+-				 * duration for soft rt applications.
+-				 *
+-				 * In particular, doing this recharge now, i.e.,
+-				 * before the weight-raising period for the
+-				 * application finishes, reduces the probability
+-				 * of the following negative scenario:
+-				 * 1) the weight of a soft rt application is
+-				 *    raised at startup (as for any newly
+-				 *    created application),
+-				 * 2) since the application is not interactive,
+-				 *    at a certain time weight-raising is
+-				 *    stopped for the application,
+-				 * 3) at that time the application happens to
+-				 *    still have pending requests, and hence
+-				 *    is destined to not have a chance to be
+-				 *    deemed soft rt before these requests are
+-				 *    completed (see the comments to the
+-				 *    function bfq_bfqq_softrt_next_start()
+-				 *    for details on soft rt detection),
+-				 * 4) these pending requests experience a high
+-				 *    latency because the application is not
+-				 *    weight-raised while they are pending.
+-				 */
+-				bfqq->last_wr_start_finish = jiffies;
+-				bfqq->wr_cur_max_time =
+-					bfqd->bfq_wr_rt_max_time;
+-			}
+-		}
+-set_prio_changed:
+-		if (old_wr_coeff != bfqq->wr_coeff)
+-			entity->prio_changed = 1;
+-add_bfqq_busy:
+-		bfqq->last_idle_bklogged = jiffies;
+-		bfqq->service_from_backlogged = 0;
+-		bfq_clear_bfqq_softrt_update(bfqq);
+-		bfq_add_bfqq_busy(bfqd, bfqq);
+-	} else {
++	if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */
++		bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff,
++						 rq, &interactive);
++	else {
+ 		if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
+ 		    time_is_before_jiffies(
+ 				bfqq->last_wr_start_finish +
+@@ -1049,16 +1385,43 @@ add_bfqq_busy:
+ 			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+ 
+ 			bfqd->wr_busy_queues++;
+-			entity->prio_changed = 1;
++			bfqq->entity.prio_changed = 1;
+ 			bfq_log_bfqq(bfqd, bfqq,
+-			    "non-idle wrais starting at %lu, rais_max_time %u",
+-			    jiffies,
+-			    jiffies_to_msecs(bfqq->wr_cur_max_time));
++				     "non-idle wrais starting, "
++				     "wr_max_time %u wr_busy %d",
++				     jiffies_to_msecs(bfqq->wr_cur_max_time),
++				     bfqd->wr_busy_queues);
+ 		}
+ 		if (prev != bfqq->next_rq)
+ 			bfq_updated_next_req(bfqd, bfqq);
+ 	}
+ 
++	/*
++	 * Assign jiffies to last_wr_start_finish in the following
++	 * cases:
++	 *
++	 * . if bfqq is not going to be weight-raised, because, for
++	 *   non weight-raised queues, last_wr_start_finish stores the
++	 *   arrival time of the last request; as of now, this piece
++	 *   of information is used only for deciding whether to
++	 *   weight-raise async queues
++	 *
++	 * . if bfqq is not weight-raised, because, if bfqq is now
++	 *   switching to weight-raised, then last_wr_start_finish
++	 *   stores the time when weight-raising starts
++	 *
++	 * . if bfqq is interactive, because, regardless of whether
++	 *   bfqq is currently weight-raised, the weight-raising
++	 *   period must start or restart (this case is considered
++	 *   separately because it is not detected by the above
++	 *   conditions, if bfqq is already weight-raised)
++	 *
++	 * last_wr_start_finish has to be updated also if bfqq is soft
++	 * real-time, because the weight-raising period is constantly
++	 * restarted on idle-to-busy transitions for these queues, but
++	 * this is already done in bfq_bfqq_handle_idle_busy_switch if
++	 * needed.
++	 */
+ 	if (bfqd->low_latency &&
+ 		(old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
+ 		bfqq->last_wr_start_finish = jiffies;
+@@ -1106,6 +1469,9 @@ static void bfq_remove_request(struct request *rq)
+ 	struct bfq_data *bfqd = bfqq->bfqd;
+ 	const int sync = rq_is_sync(rq);
+ 
++	BUG_ON(bfqq->entity.service > bfqq->entity.budget &&
++	       bfqq == bfqd->in_service_queue);
++
+ 	if (bfqq->next_rq == rq) {
+ 		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
+ 		bfq_updated_next_req(bfqd, bfqq);
+@@ -1119,8 +1485,25 @@ static void bfq_remove_request(struct request *rq)
+ 	elv_rb_del(&bfqq->sort_list, rq);
+ 
+ 	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+-		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue)
++		BUG_ON(bfqq->entity.budget < 0);
++
++		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) {
+ 			bfq_del_bfqq_busy(bfqd, bfqq, 1);
++
++			/* bfqq emptied. In normal operation, when
++			 * bfqq is empty, bfqq->entity.service and
++			 * bfqq->entity.budget must contain,
++			 * respectively, the service received and the
++			 * budget used last time bfqq emptied. These
++			 * facts do not hold in this case, as at least
++			 * this last removal occurred while bfqq is
++			 * not in service. To avoid inconsistencies,
++			 * reset both bfqq->entity.service and
++			 * bfqq->entity.budget.
++			 */
++			bfqq->entity.budget = bfqq->entity.service = 0;
++		}
++
+ 		/*
+ 		 * Remove queue from request-position tree as it is empty.
+ 		 */
+@@ -1134,9 +1517,7 @@ static void bfq_remove_request(struct request *rq)
+ 		BUG_ON(bfqq->meta_pending == 0);
+ 		bfqq->meta_pending--;
+ 	}
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
+-#endif
+ }
+ 
+ static int bfq_merge(struct request_queue *q, struct request **req,
+@@ -1221,21 +1602,25 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
+ 		bfqq->next_rq = rq;
+ 
+ 	bfq_remove_request(next);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+-#endif
+ }
+ 
+ /* Must be called with bfqq != NULL */
+ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
+ {
+ 	BUG_ON(!bfqq);
++
+ 	if (bfq_bfqq_busy(bfqq))
+ 		bfqq->bfqd->wr_busy_queues--;
+ 	bfqq->wr_coeff = 1;
+ 	bfqq->wr_cur_max_time = 0;
+-	/* Trigger a weight change on the next activation of the queue */
++	/*
++	 * Trigger a weight change on the next invocation of
++	 * __bfq_entity_update_weight_prio.
++	 */
+ 	bfqq->entity.prio_changed = 1;
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "end_wr: wr_busy %d",
++		     bfqq->bfqd->wr_busy_queues);
+ }
+ 
+ static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+@@ -1278,7 +1663,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request,
+ 				  sector_t sector)
+ {
+ 	return abs(bfq_io_struct_pos(io_struct, request) - sector) <=
+-	       BFQQ_SEEK_THR;
++	       BFQQ_CLOSE_THR;
+ }
+ 
+ static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
+@@ -1400,7 +1785,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+ 	 * throughput.
+ 	 */
+ 	bfqq->new_bfqq = new_bfqq;
+-	atomic_add(process_refs, &new_bfqq->ref);
++	new_bfqq->ref += process_refs;
+ 	return new_bfqq;
+ }
+ 
+@@ -1431,9 +1816,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+ }
+ 
+ /*
+- * Attempt to schedule a merge of bfqq with the currently in-service queue
+- * or with a close queue among the scheduled queues.
+- * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
++ * If this function returns true, then bfqq cannot be merged. The idea
++ * is that true cooperation happens very early after processes start
++ * to do I/O. Usually, late cooperations are just accidental false
++ * positives. In case bfqq is weight-raised, such false positives
++ * would evidently degrade latency guarantees for bfqq.
++ */
++bool wr_from_too_long(struct bfq_queue *bfqq)
++{
++	return bfqq->wr_coeff > 1 &&
++		time_is_before_jiffies(bfqq->last_wr_start_finish +
++				       msecs_to_jiffies(100));
++}
++
++/*
++ * Attempt to schedule a merge of bfqq with the currently in-service
++ * queue or with a close queue among the scheduled queues.  Return
++ * NULL if no merge was scheduled, a pointer to the shared bfq_queue
+  * structure otherwise.
+  *
+  * The OOM queue is not allowed to participate to cooperation: in fact, since
+@@ -1442,6 +1841,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+  * handle merging with the OOM queue would be quite complex and expensive
+  * to maintain. Besides, in such a critical condition as an out of memory,
+  * the benefits of queue merging may be little relevant, or even negligible.
++ *
++ * Weight-raised queues can be merged only if their weight-raising
++ * period has just started. In fact cooperating processes are usually
++ * started together. Thus, with this filter we avoid false positives
++ * that would jeopardize low-latency guarantees.
++ *
++ * WARNING: queue merging may impair fairness among non-weight raised
++ * queues, for at least two reasons: 1) the original weight of a
++ * merged queue may change during the merged state, 2) even being the
++ * weight the same, a merged queue may be bloated with many more
++ * requests than the ones produced by its originally-associated
++ * process.
+  */
+ static struct bfq_queue *
+ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -1451,16 +1862,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 
+ 	if (bfqq->new_bfqq)
+ 		return bfqq->new_bfqq;
+-	if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
++
++	if (io_struct && wr_from_too_long(bfqq) &&
++	    likely(bfqq != &bfqd->oom_bfqq))
++		bfq_log_bfqq(bfqd, bfqq,
++			     "would have looked for coop, but bfq%d wr",
++			bfqq->pid);
++
++	if (!io_struct ||
++	    wr_from_too_long(bfqq) ||
++	    unlikely(bfqq == &bfqd->oom_bfqq))
+ 		return NULL;
+-	/* If device has only one backlogged bfq_queue, don't search. */
++
++	/* If there is only one backlogged queue, don't search. */
+ 	if (bfqd->busy_queues == 1)
+ 		return NULL;
+ 
+ 	in_service_bfqq = bfqd->in_service_queue;
+ 
++	if (in_service_bfqq && in_service_bfqq != bfqq &&
++	    bfqd->in_service_bic && wr_from_too_long(in_service_bfqq)
++	    && likely(in_service_bfqq == &bfqd->oom_bfqq))
++		bfq_log_bfqq(bfqd, bfqq,
++		"would have tried merge with in-service-queue, but wr");
++
+ 	if (!in_service_bfqq || in_service_bfqq == bfqq ||
+-	    !bfqd->in_service_bic ||
++	    !bfqd->in_service_bic || wr_from_too_long(in_service_bfqq) ||
+ 	    unlikely(in_service_bfqq == &bfqd->oom_bfqq))
+ 		goto check_scheduled;
+ 
+@@ -1482,7 +1909,15 @@ check_scheduled:
+ 
+ 	BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent);
+ 
+-	if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) &&
++	if (new_bfqq && wr_from_too_long(new_bfqq) &&
++	    likely(new_bfqq != &bfqd->oom_bfqq) &&
++	    bfq_may_be_close_cooperator(bfqq, new_bfqq))
++		bfq_log_bfqq(bfqd, bfqq,
++			     "would have merged with bfq%d, but wr",
++			     new_bfqq->pid);
++
++	if (new_bfqq && !wr_from_too_long(new_bfqq) &&
++	    likely(new_bfqq != &bfqd->oom_bfqq) &&
+ 	    bfq_may_be_close_cooperator(bfqq, new_bfqq))
+ 		return bfq_setup_merge(bfqq, new_bfqq);
+ 
+@@ -1498,46 +1933,11 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
+ 	 */
+ 	if (!bfqq->bic)
+ 		return;
+-	if (bfqq->bic->wr_time_left)
+-		/*
+-		 * This is the queue of a just-started process, and would
+-		 * deserve weight raising: we set wr_time_left to the full
+-		 * weight-raising duration to trigger weight-raising when
+-		 * and if the queue is split and the first request of the
+-		 * queue is enqueued.
+-		 */
+-		bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd);
+-	else if (bfqq->wr_coeff > 1) {
+-		unsigned long wr_duration =
+-			jiffies - bfqq->last_wr_start_finish;
+-		/*
+-		 * It may happen that a queue's weight raising period lasts
+-		 * longer than its wr_cur_max_time, as weight raising is
+-		 * handled only when a request is enqueued or dispatched (it
+-		 * does not use any timer). If the weight raising period is
+-		 * about to end, don't save it.
+-		 */
+-		if (bfqq->wr_cur_max_time <= wr_duration)
+-			bfqq->bic->wr_time_left = 0;
+-		else
+-			bfqq->bic->wr_time_left =
+-				bfqq->wr_cur_max_time - wr_duration;
+-		/*
+-		 * The bfq_queue is becoming shared or the requests of the
+-		 * process owning the queue are being redirected to a shared
+-		 * queue. Stop the weight raising period of the queue, as in
+-		 * both cases it should not be owned by an interactive or
+-		 * soft real-time application.
+-		 */
+-		bfq_bfqq_end_wr(bfqq);
+-	} else
+-		bfqq->bic->wr_time_left = 0;
++
+ 	bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
+ 	bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+ 	bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
+ 	bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
+-	bfqq->bic->cooperations++;
+-	bfqq->bic->failed_cooperations = 0;
+ }
+ 
+ static void bfq_get_bic_reference(struct bfq_queue *bfqq)
+@@ -1562,6 +1962,40 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+ 	if (bfq_bfqq_IO_bound(bfqq))
+ 		bfq_mark_bfqq_IO_bound(new_bfqq);
+ 	bfq_clear_bfqq_IO_bound(bfqq);
++
++	/*
++	 * If bfqq is weight-raised, then let new_bfqq inherit
++	 * weight-raising. To reduce false positives, neglect the case
++	 * where bfqq has just been created, but has not yet made it
++	 * to be weight-raised (which may happen because EQM may merge
++	 * bfqq even before bfq_add_request is executed for the first
++	 * time for bfqq). Handling this case would however be very
++	 * easy, thanks to the flag just_created.
++	 */
++	if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) {
++		new_bfqq->wr_coeff = bfqq->wr_coeff;
++		new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time;
++		new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
++		if (bfq_bfqq_busy(new_bfqq))
++		    bfqd->wr_busy_queues++;
++		new_bfqq->entity.prio_changed = 1;
++		bfq_log_bfqq(bfqd, new_bfqq,
++			     "wr starting after merge with %d, "
++			     "rais_max_time %u",
++			     bfqq->pid,
++			     jiffies_to_msecs(bfqq->wr_cur_max_time));
++	}
++
++	if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */
++		bfqq->wr_coeff = 1;
++		bfqq->entity.prio_changed = 1;
++		if (bfq_bfqq_busy(bfqq))
++			bfqd->wr_busy_queues--;
++	}
++
++	bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d",
++		     bfqd->wr_busy_queues);
++
+ 	/*
+ 	 * Grab a reference to the bic, to prevent it from being destroyed
+ 	 * before being possibly touched by a bfq_split_bfqq().
+@@ -1588,18 +2022,6 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+ 	bfq_put_queue(bfqq);
+ }
+ 
+-static void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
+-{
+-	struct bfq_io_cq *bic = bfqq->bic;
+-	struct bfq_data *bfqd = bfqq->bfqd;
+-
+-	if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) {
+-		bic->failed_cooperations++;
+-		if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations)
+-			bic->cooperations = 0;
+-	}
+-}
+-
+ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+ 			   struct bio *bio)
+ {
+@@ -1637,30 +2059,86 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+ 			 * to decide whether bio and rq can be merged.
+ 			 */
+ 			bfqq = new_bfqq;
+-		} else
+-			bfq_bfqq_increase_failed_cooperations(bfqq);
++		}
+ 	}
+ 
+ 	return bfqq == RQ_BFQQ(rq);
+ }
+ 
++/*
++ * Set the maximum time for the in-service queue to consume its
++ * budget. This prevents seeky processes from lowering the throughput.
++ * In practice, a time-slice service scheme is used with seeky
++ * processes.
++ */
++static void bfq_set_budget_timeout(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq)
++{
++	unsigned int timeout_coeff;
++	if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
++		timeout_coeff = 1;
++	else
++		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
++
++	bfqd->last_budget_start = ktime_get();
++
++	bfqq->budget_timeout = jiffies +
++		bfqd->bfq_timeout * timeout_coeff;
++
++	bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
++		jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff));
++}
++
+ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+ 				       struct bfq_queue *bfqq)
+ {
+ 	if (bfqq) {
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 		bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
+-#endif
+ 		bfq_mark_bfqq_must_alloc(bfqq);
+-		bfq_mark_bfqq_budget_new(bfqq);
+ 		bfq_clear_bfqq_fifo_expire(bfqq);
+ 
+ 		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
+ 
++		BUG_ON(bfqq == bfqd->in_service_queue);
++		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++
++		if (bfqq->wr_coeff > 1 &&
++		    bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
++			time_is_before_jiffies(bfqq->budget_timeout)) {
++			/*
++			 * For soft real-time queues, move the start
++			 * of the weight-raising period forward by the
++			 * time the queue has not received any
++			 * service. Otherwise, a relatively long
++			 * service delay is likely to cause the
++			 * weight-raising period of the queue to end,
++			 * because of the short duration of the
++			 * weight-raising period of a soft real-time
++			 * queue.  It is worth noting that this move
++			 * is not so dangerous for the other queues,
++			 * because soft real-time queues are not
++			 * greedy.
++			 *
++			 * To not add a further variable, we use the
++			 * overloaded field budget_timeout to
++			 * determine for how long the queue has not
++			 * received service, i.e., how much time has
++			 * elapsed since the queue expired. However,
++			 * this is a little imprecise, because
++			 * budget_timeout is set to jiffies if bfqq
++			 * not only expires, but also remains with no
++			 * request.
++			 */
++			bfqq->last_wr_start_finish += jiffies -
++				bfqq->budget_timeout;
++		}
++
++		bfq_set_budget_timeout(bfqd, bfqq);
+ 		bfq_log_bfqq(bfqd, bfqq,
+ 			     "set_in_service_queue, cur-budget = %d",
+ 			     bfqq->entity.budget);
+-	}
++	} else
++		bfq_log(bfqd, "set_in_service_queue: NULL");
+ 
+ 	bfqd->in_service_queue = bfqq;
+ }
+@@ -1676,31 +2154,6 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
+ 	return bfqq;
+ }
+ 
+-/*
+- * If enough samples have been computed, return the current max budget
+- * stored in bfqd, which is dynamically updated according to the
+- * estimated disk peak rate; otherwise return the default max budget
+- */
+-static int bfq_max_budget(struct bfq_data *bfqd)
+-{
+-	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+-		return bfq_default_max_budget;
+-	else
+-		return bfqd->bfq_max_budget;
+-}
+-
+-/*
+- * Return min budget, which is a fraction of the current or default
+- * max budget (trying with 1/32)
+- */
+-static int bfq_min_budget(struct bfq_data *bfqd)
+-{
+-	if (bfqd->budgets_assigned < bfq_stats_min_budgets)
+-		return bfq_default_max_budget / 32;
+-	else
+-		return bfqd->bfq_max_budget / 32;
+-}
+-
+ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+ {
+ 	struct bfq_queue *bfqq = bfqd->in_service_queue;
+@@ -1723,64 +2176,36 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
+ 	 *
+ 	 * To prevent processes with (partly) seeky workloads from
+ 	 * being too ill-treated, grant them a small fraction of the
+-	 * assigned budget before reducing the waiting time to
+-	 * BFQ_MIN_TT. This happened to help reduce latency.
+-	 */
+-	sl = bfqd->bfq_slice_idle;
+-	/*
+-	 * Unless the queue is being weight-raised or the scenario is
+-	 * asymmetric, grant only minimum idle time if the queue either
+-	 * has been seeky for long enough or has already proved to be
+-	 * constantly seeky.
+-	 */
+-	if (bfq_sample_valid(bfqq->seek_samples) &&
+-	    ((BFQQ_SEEKY(bfqq) && bfqq->entity.service >
+-				  bfq_max_budget(bfqq->bfqd) / 8) ||
+-	      bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 &&
+-	    bfq_symmetric_scenario(bfqd))
+-		sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
+-	else if (bfqq->wr_coeff > 1)
+-		sl = sl * 3;
+-	bfqd->last_idling_start = ktime_get();
+-	mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+-	bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
+-#endif
+-	bfq_log(bfqd, "arm idle: %u/%u ms",
+-		jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
+-}
+-
+-/*
+- * Set the maximum time for the in-service queue to consume its
+- * budget. This prevents seeky processes from lowering the disk
+- * throughput (always guaranteed with a time slice scheme as in CFQ).
+- */
+-static void bfq_set_budget_timeout(struct bfq_data *bfqd)
+-{
+-	struct bfq_queue *bfqq = bfqd->in_service_queue;
+-	unsigned int timeout_coeff;
+-	if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
+-		timeout_coeff = 1;
+-	else
+-		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
+-
+-	bfqd->last_budget_start = ktime_get();
+-
+-	bfq_clear_bfqq_budget_new(bfqq);
+-	bfqq->budget_timeout = jiffies +
+-		bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
++	 * assigned budget before reducing the waiting time to
++	 * BFQ_MIN_TT. This happened to help reduce latency.
++	 */
++	sl = bfqd->bfq_slice_idle;
++	/*
++	 * Unless the queue is being weight-raised or the scenario is
++	 * asymmetric, grant only minimum idle time if the queue
++	 * is seeky. A long idling is preserved for a weight-raised
++	 * queue, or, more in general, in an asymemtric scenario,
++	 * because a long idling is needed for guaranteeing to a queue
++	 * its reserved share of the throughput (in particular, it is
++	 * needed if the queue has a higher weight than some other
++	 * queue).
++	 */
++	if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
++	    bfq_symmetric_scenario(bfqd))
++		sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
+ 
+-	bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
+-		jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
+-		timeout_coeff));
++	bfqd->last_idling_start = ktime_get();
++	mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
++	bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
++	bfq_log(bfqd, "arm idle: %u/%u ms",
++		jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
+ }
+ 
+ /*
+- * Move request from internal lists to the request queue dispatch list.
++ * Move request from internal lists to the dispatch list of the request queue
+  */
+ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
+ {
+-	struct bfq_data *bfqd = q->elevator->elevator_data;
+ 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+ 
+ 	/*
+@@ -1794,15 +2219,9 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
+ 	 * incrementing bfqq->dispatched.
+ 	 */
+ 	bfqq->dispatched++;
++
+ 	bfq_remove_request(rq);
+ 	elv_dispatch_sort(q, rq);
+-
+-	if (bfq_bfqq_sync(bfqq))
+-		bfqd->sync_flight++;
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+-	bfqg_stats_update_dispatch(bfqq_group(bfqq), blk_rq_bytes(rq),
+-				   rq->cmd_flags);
+-#endif
+ }
+ 
+ /*
+@@ -1822,18 +2241,12 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
+ 
+ 	rq = rq_entry_fifo(bfqq->fifo.next);
+ 
+-	if (time_before(jiffies, rq->fifo_time))
++	if (time_is_after_jiffies(rq->fifo_time))
+ 		return NULL;
+ 
+ 	return rq;
+ }
+ 
+-static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
+-{
+-	struct bfq_entity *entity = &bfqq->entity;
+-	return entity->budget - entity->service;
+-}
+-
+ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ 	BUG_ON(bfqq != bfqd->in_service_queue);
+@@ -1850,12 +2263,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 		bfq_mark_bfqq_split_coop(bfqq);
+ 
+ 	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+-		/*
+-		 * Overloading budget_timeout field to store the time
+-		 * at which the queue remains with no backlog; used by
+-		 * the weight-raising mechanism.
+-		 */
+-		bfqq->budget_timeout = jiffies;
++		if (bfqq->dispatched == 0)
++			/*
++			 * Overloading budget_timeout field to store
++			 * the time at which the queue remains with no
++			 * backlog and no outstanding request; used by
++			 * the weight-raising mechanism.
++			 */
++			bfqq->budget_timeout = jiffies;
++
+ 		bfq_del_bfqq_busy(bfqd, bfqq, 1);
+ 	} else {
+ 		bfq_activate_bfqq(bfqd, bfqq);
+@@ -1882,10 +2298,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ 	struct request *next_rq;
+ 	int budget, min_budget;
+ 
+-	budget = bfqq->max_budget;
++	BUG_ON(bfqq != bfqd->in_service_queue);
++
+ 	min_budget = bfq_min_budget(bfqd);
+ 
+-	BUG_ON(bfqq != bfqd->in_service_queue);
++	if (bfqq->wr_coeff == 1)
++		budget = bfqq->max_budget;
++	else /*
++	      * Use a constant, low budget for weight-raised queues,
++	      * to help achieve a low latency. Keep it slightly higher
++	      * than the minimum possible budget, to cause a little
++	      * bit fewer expirations.
++	      */
++		budget = 2 * min_budget;
+ 
+ 	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
+ 		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
+@@ -1894,7 +2319,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ 	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
+ 		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
+ 
+-	if (bfq_bfqq_sync(bfqq)) {
++	if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) {
+ 		switch (reason) {
+ 		/*
+ 		 * Caveat: in all the following cases we trade latency
+@@ -1936,14 +2361,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ 			break;
+ 		case BFQ_BFQQ_BUDGET_TIMEOUT:
+ 			/*
+-			 * We double the budget here because: 1) it
+-			 * gives the chance to boost the throughput if
+-			 * this is not a seeky process (which may have
+-			 * bumped into this timeout because of, e.g.,
+-			 * ZBR), 2) together with charge_full_budget
+-			 * it helps give seeky processes higher
+-			 * timestamps, and hence be served less
+-			 * frequently.
++			 * We double the budget here because it gives
++			 * the chance to boost the throughput if this
++			 * is not a seeky process (and has bumped into
++			 * this timeout because of, e.g., ZBR).
+ 			 */
+ 			budget = min(budget * 2, bfqd->bfq_max_budget);
+ 			break;
+@@ -1960,17 +2381,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ 			budget = min(budget * 4, bfqd->bfq_max_budget);
+ 			break;
+ 		case BFQ_BFQQ_NO_MORE_REQUESTS:
+-		       /*
+-			* Leave the budget unchanged.
+-			*/
++			/*
++			 * For queues that expire for this reason, it
++			 * is particularly important to keep the
++			 * budget close to the actual service they
++			 * need. Doing so reduces the timestamp
++			 * misalignment problem described in the
++			 * comments in the body of
++			 * __bfq_activate_entity. In fact, suppose
++			 * that a queue systematically expires for
++			 * BFQ_BFQQ_NO_MORE_REQUESTS and presents a
++			 * new request in time to enjoy timestamp
++			 * back-shifting. The larger the budget of the
++			 * queue is with respect to the service the
++			 * queue actually requests in each service
++			 * slot, the more times the queue can be
++			 * reactivated with the same virtual finish
++			 * time. It follows that, even if this finish
++			 * time is pushed to the system virtual time
++			 * to reduce the consequent timestamp
++			 * misalignment, the queue unjustly enjoys for
++			 * many re-activations a lower finish time
++			 * than all newly activated queues.
++			 *
++			 * The service needed by bfqq is measured
++			 * quite precisely by bfqq->entity.service.
++			 * Since bfqq does not enjoy device idling,
++			 * bfqq->entity.service is equal to the number
++			 * of sectors that the process associated with
++			 * bfqq requested to read/write before waiting
++			 * for request completions, or blocking for
++			 * other reasons.
++			 */
++			budget = max_t(int, bfqq->entity.service, min_budget);
++			break;
+ 		default:
+ 			return;
+ 		}
+-	} else
++	} else if (!bfq_bfqq_sync(bfqq))
+ 		/*
+-		 * Async queues get always the maximum possible budget
+-		 * (their ability to dispatch is limited by
+-		 * @bfqd->bfq_max_budget_async_rq).
++		 * Async queues get always the maximum possible
++		 * budget, as for them we do not care about latency
++		 * (in addition, their ability to dispatch is limited
++		 * by the charging factor).
+ 		 */
+ 		budget = bfqd->bfq_max_budget;
+ 
+@@ -1981,65 +2434,105 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
+ 		bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
+ 
+ 	/*
+-	 * Make sure that we have enough budget for the next request.
+-	 * Since the finish time of the bfqq must be kept in sync with
+-	 * the budget, be sure to call __bfq_bfqq_expire() after the
++	 * If there is still backlog, then assign a new budget, making
++	 * sure that it is large enough for the next request.  Since
++	 * the finish time of bfqq must be kept in sync with the
++	 * budget, be sure to call __bfq_bfqq_expire() *after* this
+ 	 * update.
++	 *
++	 * If there is no backlog, then no need to update the budget;
++	 * it will be updated on the arrival of a new request.
+ 	 */
+ 	next_rq = bfqq->next_rq;
+-	if (next_rq)
++	if (next_rq) {
++		BUG_ON(reason == BFQ_BFQQ_TOO_IDLE ||
++		       reason == BFQ_BFQQ_NO_MORE_REQUESTS);
+ 		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
+ 					    bfq_serv_to_charge(next_rq, bfqq));
+-	else
+-		bfqq->entity.budget = bfqq->max_budget;
++		BUG_ON(!bfq_bfqq_busy(bfqq));
++		BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++	}
+ 
+ 	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d",
+ 			next_rq ? blk_rq_sectors(next_rq) : 0,
+ 			bfqq->entity.budget);
+ }
+ 
+-static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
++static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
+ {
+-	unsigned long max_budget;
+-
+ 	/*
+ 	 * The max_budget calculated when autotuning is equal to the
+-	 * amount of sectors transfered in timeout_sync at the
++	 * amount of sectors transfered in timeout at the
+ 	 * estimated peak rate.
+ 	 */
+-	max_budget = (unsigned long)(peak_rate * 1000 *
+-				     timeout >> BFQ_RATE_SHIFT);
+-
+-	return max_budget;
++	return bfqd->peak_rate * 1000 * jiffies_to_msecs(bfqd->bfq_timeout) >>
++		BFQ_RATE_SHIFT;
+ }
+ 
+ /*
+- * In addition to updating the peak rate, checks whether the process
+- * is "slow", and returns 1 if so. This slow flag is used, in addition
+- * to the budget timeout, to reduce the amount of service provided to
+- * seeky processes, and hence reduce their chances to lower the
+- * throughput. See the code for more details.
++ * Update the read peak rate (quantity used for auto-tuning) as a
++ * function of the rate at which bfqq has been served, and check
++ * whether the process associated with bfqq is "slow". Return true if
++ * the process is slow. The slow flag is used, in addition to the
++ * budget timeout, to reduce the amount of service provided to seeky
++ * processes, and hence reduce their chances to lower the
++ * throughput. More details in the body of the function.
++ *
++ * An important observation is in order: with devices with internal
++ * queues, it is hard if ever possible to know when and for how long
++ * an I/O request is processed by the device (apart from the trivial
++ * I/O pattern where a new request is dispatched only after the
++ * previous one has been completed). This makes it hard to evaluate
++ * the real rate at which the I/O requests of each bfq_queue are
++ * served.  In fact, for an I/O scheduler like BFQ, serving a
++ * bfq_queue means just dispatching its requests during its service
++ * slot, i.e., until the budget of the queue is exhausted, or the
++ * queue remains idle, or, finally, a timeout fires. But, during the
++ * service slot of a bfq_queue, the device may be still processing
++ * requests of bfq_queues served in previous service slots. On the
++ * opposite end, the requests of the in-service bfq_queue may be
++ * completed after the service slot of the queue finishes. Anyway,
++ * unless more sophisticated solutions are used (where possible), the
++ * sum of the sizes of the requests dispatched during the service slot
++ * of a bfq_queue is probably the only approximation available for
++ * the service received by the bfq_queue during its service slot. And,
++ * as written above, this sum is the quantity used in this function to
++ * evaluate the peak rate.
+  */
+ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+-				 bool compensate, enum bfqq_expiration reason)
++				 bool compensate, enum bfqq_expiration reason,
++				 unsigned long *delta_ms)
+ {
+-	u64 bw, usecs, expected, timeout;
+-	ktime_t delta;
++	u64 bw, bwdiv10, delta_usecs, delta_ms_tmp;
++	ktime_t delta_ktime;
+ 	int update = 0;
++	bool slow = BFQQ_SEEKY(bfqq); /* if delta too short, use seekyness */
+ 
+-	if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
++	if (!bfq_bfqq_sync(bfqq))
+ 		return false;
+ 
+ 	if (compensate)
+-		delta = bfqd->last_idling_start;
++		delta_ktime = bfqd->last_idling_start;
+ 	else
+-		delta = ktime_get();
+-	delta = ktime_sub(delta, bfqd->last_budget_start);
+-	usecs = ktime_to_us(delta);
++		delta_ktime = ktime_get();
++	delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
++	delta_usecs = ktime_to_us(delta_ktime);
+ 
+ 	/* Don't trust short/unrealistic values. */
+-	if (usecs < 100 || usecs >= LONG_MAX)
+-		return false;
++	if (delta_usecs < 1000 || delta_usecs >= LONG_MAX) {
++		if (blk_queue_nonrot(bfqd->queue))
++			*delta_ms = BFQ_MIN_TT; /* give same worst-case
++						   guarantees as
++						   idling for seeky
++						*/
++		else /* Charge at least one seek */
++			*delta_ms = jiffies_to_msecs(bfq_slice_idle);
++		return slow;
++	}
++
++	delta_ms_tmp = delta_usecs;
++	do_div(delta_ms_tmp, 1000);
++	*delta_ms = delta_ms_tmp;
+ 
+ 	/*
+ 	 * Calculate the bandwidth for the last slice.  We use a 64 bit
+@@ -2048,32 +2541,51 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	 * and to avoid overflows.
+ 	 */
+ 	bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
+-	do_div(bw, (unsigned long)usecs);
+-
+-	timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++	do_div(bw, (unsigned long)delta_usecs);
+ 
++	bfq_log(bfqd, "measured bw = %llu sects/sec",
++		(1000000*bw)>>BFQ_RATE_SHIFT);
+ 	/*
+ 	 * Use only long (> 20ms) intervals to filter out spikes for
+ 	 * the peak rate estimation.
+ 	 */
+-	if (usecs > 20000) {
++	if (delta_usecs > 20000) {
++		bool fully_sequential = bfqq->seek_history == 0;
++		/*
++		 * Soft real-time queues are not good candidates for
++		 * evaluating bw, as they are likely to be slow even
++		 * if sequential.
++		 */
++		bool non_soft_rt = bfqq->wr_coeff == 1 ||
++			bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time;
++		bool consumed_large_budget =
++			reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
++			bfqq->entity.budget >= bfqd->bfq_max_budget * 2 / 3;
++		bool served_for_long_time =
++			reason == BFQ_BFQQ_BUDGET_TIMEOUT ||
++			consumed_large_budget;
++
++		BUG_ON(bfqq->seek_history == 0 &&
++		       hweight32(bfqq->seek_history) != 0);
++
+ 		if (bw > bfqd->peak_rate ||
+-		   (!BFQQ_SEEKY(bfqq) &&
+-		    reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {
+-			bfq_log(bfqd, "measured bw =%llu", bw);
++		    (bfq_bfqq_sync(bfqq) && fully_sequential && non_soft_rt &&
++		     served_for_long_time)) {
+ 			/*
+ 			 * To smooth oscillations use a low-pass filter with
+-			 * alpha=7/8, i.e.,
+-			 * new_rate = (7/8) * old_rate + (1/8) * bw
++			 * alpha=9/10, i.e.,
++			 * new_rate = (9/10) * old_rate + (1/10) * bw
+ 			 */
+-			do_div(bw, 8);
+-			if (bw == 0)
+-				return 0;
+-			bfqd->peak_rate *= 7;
+-			do_div(bfqd->peak_rate, 8);
+-			bfqd->peak_rate += bw;
++			bwdiv10 = bw;
++			do_div(bwdiv10, 10);
++			if (bwdiv10 == 0)
++				return false; /* bw too low to be used */
++			bfqd->peak_rate *= 9;
++			do_div(bfqd->peak_rate, 10);
++			bfqd->peak_rate += bwdiv10;
+ 			update = 1;
+-			bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);
++			bfq_log(bfqd, "new peak_rate = %llu sects/sec",
++				(1000000*bfqd->peak_rate)>>BFQ_RATE_SHIFT);
+ 		}
+ 
+ 		update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
+@@ -2086,9 +2598,8 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 			int dev_type = blk_queue_nonrot(bfqd->queue);
+ 			if (bfqd->bfq_user_max_budget == 0) {
+ 				bfqd->bfq_max_budget =
+-					bfq_calc_max_budget(bfqd->peak_rate,
+-							    timeout);
+-				bfq_log(bfqd, "new max_budget=%d",
++					bfq_calc_max_budget(bfqd);
++				bfq_log(bfqd, "new max_budget = %d",
+ 					bfqd->bfq_max_budget);
+ 			}
+ 			if (bfqd->device_speed == BFQ_BFQD_FAST &&
+@@ -2102,38 +2613,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 				bfqd->RT_prod = R_fast[dev_type] *
+ 						T_fast[dev_type];
+ 			}
++			bfq_log(bfqd, "dev_speed_class = %d (%d sects/sec), "
++				"thresh %d setcs/sec",
++				bfqd->device_speed,
++				bfqd->device_speed == BFQ_BFQD_FAST ?
++				(1000000*R_fast[dev_type])>>BFQ_RATE_SHIFT :
++				(1000000*R_slow[dev_type])>>BFQ_RATE_SHIFT,
++				(1000000*device_speed_thresh[dev_type])>>
++				BFQ_RATE_SHIFT);
+ 		}
++		/*
++		 * Caveat: processes doing IO in the slower disk zones
++		 * tend to be slow(er) even if not seeky. In this
++		 * respect, the estimated peak rate is likely to be an
++		 * average over the disk surface. Accordingly, to not
++		 * be too harsh with unlucky processes, a process is
++		 * deemed slow only if its bw has been lower than half
++		 * of the estimated peak rate.
++		 */
++		slow = bw < bfqd->peak_rate / 2;
+ 	}
+ 
+-	/*
+-	 * If the process has been served for a too short time
+-	 * interval to let its possible sequential accesses prevail on
+-	 * the initial seek time needed to move the disk head on the
+-	 * first sector it requested, then give the process a chance
+-	 * and for the moment return false.
+-	 */
+-	if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
+-		return false;
+-
+-	/*
+-	 * A process is considered ``slow'' (i.e., seeky, so that we
+-	 * cannot treat it fairly in the service domain, as it would
+-	 * slow down too much the other processes) if, when a slice
+-	 * ends for whatever reason, it has received service at a
+-	 * rate that would not be high enough to complete the budget
+-	 * before the budget timeout expiration.
+-	 */
+-	expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;
++	bfq_log_bfqq(bfqd, bfqq,
++		     "update_peak_rate: bw %llu sect/s, peak rate %llu, "
++		     "slow %d",
++		     (1000000*bw)>>BFQ_RATE_SHIFT,
++		     (1000000*bfqd->peak_rate)>>BFQ_RATE_SHIFT,
++		     bw < bfqd->peak_rate / 2);
+ 
+-	/*
+-	 * Caveat: processes doing IO in the slower disk zones will
+-	 * tend to be slow(er) even if not seeky. And the estimated
+-	 * peak rate will actually be an average over the disk
+-	 * surface. Hence, to not be too harsh with unlucky processes,
+-	 * we keep a budget/3 margin of safety before declaring a
+-	 * process slow.
+-	 */
+-	return expected > (4 * bfqq->entity.budget) / 3;
++	return slow;
+ }
+ 
+ /*
+@@ -2191,6 +2699,15 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+ 						struct bfq_queue *bfqq)
+ {
++	bfq_log_bfqq(bfqd, bfqq,
++		     "softrt_next_start: service_blkg %lu "
++		     "soft_rate %u sects/sec"
++		     "interval %u",
++		     bfqq->service_from_backlogged,
++		     bfqd->bfq_wr_max_softrt_rate,
++		     jiffies_to_msecs(HZ * bfqq->service_from_backlogged /
++				      bfqd->bfq_wr_max_softrt_rate));
++
+ 	return max(bfqq->last_idle_bklogged +
+ 		   HZ * bfqq->service_from_backlogged /
+ 		   bfqd->bfq_wr_max_softrt_rate,
+@@ -2198,13 +2715,21 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+ }
+ 
+ /*
+- * Return the largest-possible time instant such that, for as long as possible,
+- * the current time will be lower than this time instant according to the macro
+- * time_is_before_jiffies().
++ * Return the farthest future time instant according to jiffies
++ * macros.
++ */
++static unsigned long bfq_greatest_from_now(void)
++{
++	return jiffies + MAX_JIFFY_OFFSET;
++}
++
++/*
++ * Return the farthest past time instant according to jiffies
++ * macros.
+  */
+-static unsigned long bfq_infinity_from_now(unsigned long now)
++static unsigned long bfq_smallest_from_now(void)
+ {
+-	return now + ULONG_MAX / 2;
++	return jiffies - MAX_JIFFY_OFFSET;
+ }
+ 
+ /**
+@@ -2214,28 +2739,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now)
+  * @compensate: if true, compensate for the time spent idling.
+  * @reason: the reason causing the expiration.
+  *
++ * If the process associated with bfqq does slow I/O (e.g., because it
++ * issues random requests), we charge bfqq with the time it has been
++ * in service instead of the service it has received (see
++ * bfq_bfqq_charge_time for details on how this goal is achieved). As
++ * a consequence, bfqq will typically get higher timestamps upon
++ * reactivation, and hence it will be rescheduled as if it had
++ * received more service than what it has actually received. In the
++ * end, bfqq receives less service in proportion to how slowly its
++ * associated process consumes its budgets (and hence how seriously it
++ * tends to lower the throughput). In addition, this time-charging
++ * strategy guarantees time fairness among slow processes. In
++ * contrast, if the process associated with bfqq is not slow, we
++ * charge bfqq exactly with the service it has received.
+  *
+- * If the process associated to the queue is slow (i.e., seeky), or in
+- * case of budget timeout, or, finally, if it is async, we
+- * artificially charge it an entire budget (independently of the
+- * actual service it received). As a consequence, the queue will get
+- * higher timestamps than the correct ones upon reactivation, and
+- * hence it will be rescheduled as if it had received more service
+- * than what it actually received. In the end, this class of processes
+- * will receive less service in proportion to how slowly they consume
+- * their budgets (and hence how seriously they tend to lower the
+- * throughput).
+- *
+- * In contrast, when a queue expires because it has been idling for
+- * too much or because it exhausted its budget, we do not touch the
+- * amount of service it has received. Hence when the queue will be
+- * reactivated and its timestamps updated, the latter will be in sync
+- * with the actual service received by the queue until expiration.
+- *
+- * Charging a full budget to the first type of queues and the exact
+- * service to the others has the effect of using the WF2Q+ policy to
+- * schedule the former on a timeslice basis, without violating the
+- * service domain guarantees of the latter.
++ * Charging time to the first type of queues and the exact service to
++ * the other has the effect of using the WF2Q+ policy to schedule the
++ * former on a timeslice basis, without violating service domain
++ * guarantees among the latter.
+  */
+ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ 			    struct bfq_queue *bfqq,
+@@ -2243,40 +2764,51 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ 			    enum bfqq_expiration reason)
+ {
+ 	bool slow;
++	unsigned long delta = 0;
++	struct bfq_entity *entity = &bfqq->entity;
++
+ 	BUG_ON(bfqq != bfqd->in_service_queue);
+ 
+ 	/*
+-	 * Update disk peak rate for autotuning and check whether the
++	 * Update device peak rate for autotuning and check whether the
+ 	 * process is slow (see bfq_update_peak_rate).
+ 	 */
+-	slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
++	slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason, &delta);
+ 
+ 	/*
+-	 * As above explained, 'punish' slow (i.e., seeky), timed-out
+-	 * and async queues, to favor sequential sync workloads.
+-	 *
+-	 * Processes doing I/O in the slower disk zones will tend to be
+-	 * slow(er) even if not seeky. Hence, since the estimated peak
+-	 * rate is actually an average over the disk surface, these
+-	 * processes may timeout just for bad luck. To avoid punishing
+-	 * them we do not charge a full budget to a process that
+-	 * succeeded in consuming at least 2/3 of its budget.
++	 * Increase service_from_backlogged before next statement,
++	 * because the possible next invocation of
++	 * bfq_bfqq_charge_time would likely inflate
++	 * entity->service. In contrast, service_from_backlogged must
++	 * contain real service, to enable the soft real-time
++	 * heuristic to correctly compute the bandwidth consumed by
++	 * bfqq.
+ 	 */
+-	if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+-		     bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3))
+-		bfq_bfqq_charge_full_budget(bfqq);
++	bfqq->service_from_backlogged += entity->service;
+ 
+-	bfqq->service_from_backlogged += bfqq->entity.service;
+-
+-	if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
+-	    !bfq_bfqq_constantly_seeky(bfqq)) {
+-		bfq_mark_bfqq_constantly_seeky(bfqq);
+-		if (!blk_queue_nonrot(bfqd->queue))
+-			bfqd->const_seeky_busy_in_flight_queues++;
+-	}
++	/*
++	 * As above explained, charge slow (typically seeky) and
++	 * timed-out queues with the time and not the service
++	 * received, to favor sequential workloads.
++	 *
++	 * Processes doing I/O in the slower disk zones will tend to
++	 * be slow(er) even if not seeky. Therefore, since the
++	 * estimated peak rate is actually an average over the disk
++	 * surface, these processes may timeout just for bad luck. To
++	 * avoid punishing them, do not charge time to processes that
++	 * succeeded in consuming at least 2/3 of their budget. This
++	 * allows BFQ to preserve enough elasticity to still perform
++	 * bandwidth, and not time, distribution with little unlucky
++	 * or quasi-sequential processes.
++	 */
++	if (bfqq->wr_coeff == 1 &&
++	    (slow ||
++	     (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
++	      bfq_bfqq_budget_left(bfqq) >=  entity->budget / 3)))
++		bfq_bfqq_charge_time(bfqd, bfqq, delta);
+ 
+ 	if (reason == BFQ_BFQQ_TOO_IDLE &&
+-	    bfqq->entity.service <= 2 * bfqq->entity.budget / 10 )
++	    entity->service <= 2 * entity->budget / 10 )
+ 		bfq_clear_bfqq_IO_bound(bfqq);
+ 
+ 	if (bfqd->low_latency && bfqq->wr_coeff == 1)
+@@ -2285,19 +2817,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ 	if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 &&
+ 	    RB_EMPTY_ROOT(&bfqq->sort_list)) {
+ 		/*
+-		 * If we get here, and there are no outstanding requests,
+-		 * then the request pattern is isochronous (see the comments
+-		 * to the function bfq_bfqq_softrt_next_start()). Hence we
+-		 * can compute soft_rt_next_start. If, instead, the queue
+-		 * still has outstanding requests, then we have to wait
+-		 * for the completion of all the outstanding requests to
++		 * If we get here, and there are no outstanding
++		 * requests, then the request pattern is isochronous
++		 * (see the comments on the function
++		 * bfq_bfqq_softrt_next_start()). Thus we can compute
++		 * soft_rt_next_start. If, instead, the queue still
++		 * has outstanding requests, then we have to wait for
++		 * the completion of all the outstanding requests to
+ 		 * discover whether the request pattern is actually
+ 		 * isochronous.
+ 		 */
+-		if (bfqq->dispatched == 0)
++		BUG_ON(bfqd->busy_queues < 1);
++		if (bfqq->dispatched == 0) {
+ 			bfqq->soft_rt_next_start =
+ 				bfq_bfqq_softrt_next_start(bfqd, bfqq);
+-		else {
++			bfq_log_bfqq(bfqd, bfqq, "new soft_rt_next %lu",
++				     bfqq->soft_rt_next_start);
++		} else {
+ 			/*
+ 			 * The application is still waiting for the
+ 			 * completion of one or more requests:
+@@ -2314,7 +2850,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ 			 *    happened to be in the past.
+ 			 */
+ 			bfqq->soft_rt_next_start =
+-				bfq_infinity_from_now(jiffies);
++				bfq_greatest_from_now();
+ 			/*
+ 			 * Schedule an update of soft_rt_next_start to when
+ 			 * the task may be discovered to be isochronous.
+@@ -2324,8 +2860,9 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ 	}
+ 
+ 	bfq_log_bfqq(bfqd, bfqq,
+-		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
+-		slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
++		"expire (%d, slow %d, num_disp %d, idle_win %d, weight %d)",
++		     reason, slow, bfqq->dispatched,
++		     bfq_bfqq_idle_window(bfqq), entity->weight);
+ 
+ 	/*
+ 	 * Increase, decrease or leave budget unchanged according to
+@@ -2333,6 +2870,14 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+ 	 */
+ 	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
+ 	__bfq_bfqq_expire(bfqd, bfqq);
++
++	BUG_ON(!bfq_bfqq_busy(bfqq) && reason == BFQ_BFQQ_BUDGET_EXHAUSTED &&
++		!bfq_class_idle(bfqq));
++
++	if (!bfq_bfqq_busy(bfqq) &&
++	    reason != BFQ_BFQQ_BUDGET_TIMEOUT &&
++	    reason != BFQ_BFQQ_BUDGET_EXHAUSTED)
++		bfq_mark_bfqq_non_blocking_wait_rq(bfqq);
+ }
+ 
+ /*
+@@ -2342,20 +2887,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
+  */
+ static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
+ {
+-	if (bfq_bfqq_budget_new(bfqq) ||
+-	    time_before(jiffies, bfqq->budget_timeout))
+-		return false;
+-	return true;
++	return time_is_before_eq_jiffies(bfqq->budget_timeout);
+ }
+ 
+ /*
+- * If we expire a queue that is waiting for the arrival of a new
+- * request, we may prevent the fictitious timestamp back-shifting that
+- * allows the guarantees of the queue to be preserved (see [1] for
+- * this tricky aspect). Hence we return true only if this condition
+- * does not hold, or if the queue is slow enough to deserve only to be
+- * kicked off for preserving a high throughput.
+-*/
++ * If we expire a queue that is actively waiting (i.e., with the
++ * device idled) for the arrival of a new request, then we may incur
++ * the timestamp misalignment problem described in the body of the
++ * function __bfq_activate_entity. Hence we return true only if this
++ * condition does not hold, or if the queue is slow enough to deserve
++ * only to be kicked off for preserving a high throughput.
++ */
+ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
+ {
+ 	bfq_log_bfqq(bfqq->bfqd, bfqq,
+@@ -2397,10 +2939,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ {
+ 	struct bfq_data *bfqd = bfqq->bfqd;
+ 	bool idling_boosts_thr, idling_boosts_thr_without_issues,
+-		all_queues_seeky, on_hdd_and_not_all_queues_seeky,
+ 		idling_needed_for_service_guarantees,
+ 		asymmetric_scenario;
+ 
++	if (bfqd->strict_guarantees)
++		return true;
++
+ 	/*
+ 	 * The next variable takes into account the cases where idling
+ 	 * boosts the throughput.
+@@ -2422,7 +2966,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ 	 */
+ 	idling_boosts_thr = !bfqd->hw_tag ||
+ 		(!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
+-		 bfq_bfqq_idle_window(bfqq)) ;
++		 bfq_bfqq_idle_window(bfqq));
+ 
+ 	/*
+ 	 * The value of the next variable,
+@@ -2463,74 +3007,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ 		bfqd->wr_busy_queues == 0;
+ 
+ 	/*
+-	 * There are then two cases where idling must be performed not
++	 * There is then a case where idling must be performed not
+ 	 * for throughput concerns, but to preserve service
+-	 * guarantees. In the description of these cases, we say, for
+-	 * short, that a queue is sequential/random if the process
+-	 * associated to the queue issues sequential/random requests
+-	 * (in the second case the queue may be tagged as seeky or
+-	 * even constantly_seeky).
+-	 *
+-	 * To introduce the first case, we note that, since
+-	 * bfq_bfqq_idle_window(bfqq) is false if the device is
+-	 * NCQ-capable and bfqq is random (see
+-	 * bfq_update_idle_window()), then, from the above two
+-	 * assignments it follows that
+-	 * idling_boosts_thr_without_issues is false if the device is
+-	 * NCQ-capable and bfqq is random. Therefore, for this case,
+-	 * device idling would never be allowed if we used just
+-	 * idling_boosts_thr_without_issues to decide whether to allow
+-	 * it. And, beneficially, this would imply that throughput
+-	 * would always be boosted also with random I/O on NCQ-capable
+-	 * HDDs.
+-	 *
+-	 * But we must be careful on this point, to avoid an unfair
+-	 * treatment for bfqq. In fact, because of the same above
+-	 * assignments, idling_boosts_thr_without_issues is, on the
+-	 * other hand, true if 1) the device is an HDD and bfqq is
+-	 * sequential, and 2) there are no busy weight-raised
+-	 * queues. As a consequence, if we used just
+-	 * idling_boosts_thr_without_issues to decide whether to idle
+-	 * the device, then with an HDD we might easily bump into a
+-	 * scenario where queues that are sequential and I/O-bound
+-	 * would enjoy idling, whereas random queues would not. The
+-	 * latter might then get a low share of the device throughput,
+-	 * simply because the former would get many requests served
+-	 * after being set as in service, while the latter would not.
++	 * guarantees.
+ 	 *
+-	 * To address this issue, we start by setting to true a
+-	 * sentinel variable, on_hdd_and_not_all_queues_seeky, if the
+-	 * device is rotational and not all queues with pending or
+-	 * in-flight requests are constantly seeky (i.e., there are
+-	 * active sequential queues, and bfqq might then be mistreated
+-	 * if it does not enjoy idling because it is random).
+-	 */
+-	all_queues_seeky = bfq_bfqq_constantly_seeky(bfqq) &&
+-			   bfqd->busy_in_flight_queues ==
+-			   bfqd->const_seeky_busy_in_flight_queues;
+-
+-	on_hdd_and_not_all_queues_seeky =
+-		!blk_queue_nonrot(bfqd->queue) && !all_queues_seeky;
+-
+-	/*
+-	 * To introduce the second case where idling needs to be
+-	 * performed to preserve service guarantees, we can note that
+-	 * allowing the drive to enqueue more than one request at a
+-	 * time, and hence delegating de facto final scheduling
+-	 * decisions to the drive's internal scheduler, causes loss of
+-	 * control on the actual request service order. In particular,
+-	 * the critical situation is when requests from different
+-	 * processes happens to be present, at the same time, in the
+-	 * internal queue(s) of the drive. In such a situation, the
+-	 * drive, by deciding the service order of the
+-	 * internally-queued requests, does determine also the actual
+-	 * throughput distribution among these processes. But the
+-	 * drive typically has no notion or concern about per-process
+-	 * throughput distribution, and makes its decisions only on a
+-	 * per-request basis. Therefore, the service distribution
+-	 * enforced by the drive's internal scheduler is likely to
+-	 * coincide with the desired device-throughput distribution
+-	 * only in a completely symmetric scenario where:
++	 * To introduce this case, we can note that allowing the drive
++	 * to enqueue more than one request at a time, and hence
++	 * delegating de facto final scheduling decisions to the
++	 * drive's internal scheduler, entails loss of control on the
++	 * actual request service order. In particular, the critical
++	 * situation is when requests from different processes happen
++	 * to be present, at the same time, in the internal queue(s)
++	 * of the drive. In such a situation, the drive, by deciding
++	 * the service order of the internally-queued requests, does
++	 * determine also the actual throughput distribution among
++	 * these processes. But the drive typically has no notion or
++	 * concern about per-process throughput distribution, and
++	 * makes its decisions only on a per-request basis. Therefore,
++	 * the service distribution enforced by the drive's internal
++	 * scheduler is likely to coincide with the desired
++	 * device-throughput distribution only in a completely
++	 * symmetric scenario where:
+ 	 * (i)  each of these processes must get the same throughput as
+ 	 *      the others;
+ 	 * (ii) all these processes have the same I/O pattern
+@@ -2552,26 +3049,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ 	 * words, only if sub-condition (i) holds, then idling is
+ 	 * allowed, and the device tends to be prevented from queueing
+ 	 * many requests, possibly of several processes. The reason
+-	 * for not controlling also sub-condition (ii) is that, first,
+-	 * in the case of an HDD, the asymmetry in terms of types of
+-	 * I/O patterns is already taken in to account in the above
+-	 * sentinel variable
+-	 * on_hdd_and_not_all_queues_seeky. Secondly, in the case of a
+-	 * flash-based device, we prefer however to privilege
+-	 * throughput (and idling lowers throughput for this type of
+-	 * devices), for the following reasons:
+-	 * 1) differently from HDDs, the service time of random
+-	 *    requests is not orders of magnitudes lower than the service
+-	 *    time of sequential requests; thus, even if processes doing
+-	 *    sequential I/O get a preferential treatment with respect to
+-	 *    others doing random I/O, the consequences are not as
+-	 *    dramatic as with HDDs;
+-	 * 2) if a process doing random I/O does need strong
+-	 *    throughput guarantees, it is hopefully already being
+-	 *    weight-raised, or the user is likely to have assigned it a
+-	 *    higher weight than the other processes (and thus
+-	 *    sub-condition (i) is likely to be false, which triggers
+-	 *    idling).
++	 * for not controlling also sub-condition (ii) is that we
++	 * exploit preemption to preserve guarantees in case of
++	 * symmetric scenarios, even if (ii) does not hold, as
++	 * explained in the next two paragraphs.
++	 *
++	 * Even if a queue, say Q, is expired when it remains idle, Q
++	 * can still preempt the new in-service queue if the next
++	 * request of Q arrives soon (see the comments on
++	 * bfq_bfqq_update_budg_for_activation). If all queues and
++	 * groups have the same weight, this form of preemption,
++	 * combined with the hole-recovery heuristic described in the
++	 * comments on function bfq_bfqq_update_budg_for_activation,
++	 * are enough to preserve a correct bandwidth distribution in
++	 * the mid term, even without idling. In fact, even if not
++	 * idling allows the internal queues of the device to contain
++	 * many requests, and thus to reorder requests, we can rather
++	 * safely assume that the internal scheduler still preserves a
++	 * minimum of mid-term fairness. The motivation for using
++	 * preemption instead of idling is that, by not idling,
++	 * service guarantees are preserved without minimally
++	 * sacrificing throughput. In other words, both a high
++	 * throughput and its desired distribution are obtained.
++	 *
++	 * More precisely, this preemption-based, idleless approach
++	 * provides fairness in terms of IOPS, and not sectors per
++	 * second. This can be seen with a simple example. Suppose
++	 * that there are two queues with the same weight, but that
++	 * the first queue receives requests of 8 sectors, while the
++	 * second queue receives requests of 1024 sectors. In
++	 * addition, suppose that each of the two queues contains at
++	 * most one request at a time, which implies that each queue
++	 * always remains idle after it is served. Finally, after
++	 * remaining idle, each queue receives very quickly a new
++	 * request. It follows that the two queues are served
++	 * alternatively, preempting each other if needed. This
++	 * implies that, although both queues have the same weight,
++	 * the queue with large requests receives a service that is
++	 * 1024/8 times as high as the service received by the other
++	 * queue.
++	 *
++	 * On the other hand, device idling is performed, and thus
++	 * pure sector-domain guarantees are provided, for the
++	 * following queues, which are likely to need stronger
++	 * throughput guarantees: weight-raised queues, and queues
++	 * with a higher weight than other queues. When such queues
++	 * are active, sub-condition (i) is false, which triggers
++	 * device idling.
+ 	 *
+ 	 * According to the above considerations, the next variable is
+ 	 * true (only) if sub-condition (i) holds. To compute the
+@@ -2579,7 +3103,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ 	 * the function bfq_symmetric_scenario(), but also check
+ 	 * whether bfqq is being weight-raised, because
+ 	 * bfq_symmetric_scenario() does not take into account also
+-	 * weight-raised queues (see comments to
++	 * weight-raised queues (see comments on
+ 	 * bfq_weights_tree_add()).
+ 	 *
+ 	 * As a side note, it is worth considering that the above
+@@ -2601,17 +3125,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ 	 * bfqq. Such a case is when bfqq became active in a burst of
+ 	 * queue activations. Queues that became active during a large
+ 	 * burst benefit only from throughput, as discussed in the
+-	 * comments to bfq_handle_burst. Thus, if bfqq became active
++	 * comments on bfq_handle_burst. Thus, if bfqq became active
+ 	 * in a burst and not idling the device maximizes throughput,
+ 	 * then the device must no be idled, because not idling the
+ 	 * device provides bfqq and all other queues in the burst with
+-	 * maximum benefit. Combining this and the two cases above, we
+-	 * can now establish when idling is actually needed to
+-	 * preserve service guarantees.
++	 * maximum benefit. Combining this and the above case, we can
++	 * now establish when idling is actually needed to preserve
++	 * service guarantees.
+ 	 */
+ 	idling_needed_for_service_guarantees =
+-		(on_hdd_and_not_all_queues_seeky || asymmetric_scenario) &&
+-		!bfq_bfqq_in_large_burst(bfqq);
++		asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
+ 
+ 	/*
+ 	 * We have now all the components we need to compute the return
+@@ -2621,6 +3144,14 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+ 	 * 2) idling either boosts the throughput (without issues), or
+ 	 *    is necessary to preserve service guarantees.
+ 	 */
++	bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d "
++		     "wr_busy %d boosts %d IO-bound %d guar %d",
++		     bfq_bfqq_sync(bfqq), idling_boosts_thr,
++		     bfqd->wr_busy_queues,
++		     idling_boosts_thr_without_issues,
++		     bfq_bfqq_IO_bound(bfqq),
++		     idling_needed_for_service_guarantees);
++
+ 	return bfq_bfqq_sync(bfqq) &&
+ 		(idling_boosts_thr_without_issues ||
+ 		 idling_needed_for_service_guarantees);
+@@ -2632,7 +3163,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
+  * 1) the queue must remain in service and cannot be expired, and
+  * 2) the device must be idled to wait for the possible arrival of a new
+  *    request for the queue.
+- * See the comments to the function bfq_bfqq_may_idle for the reasons
++ * See the comments on the function bfq_bfqq_may_idle for the reasons
+  * why performing device idling is the best choice to boost the throughput
+  * and preserve service guarantees when bfq_bfqq_may_idle itself
+  * returns true.
+@@ -2698,9 +3229,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+ 				 */
+ 				bfq_clear_bfqq_wait_request(bfqq);
+ 				del_timer(&bfqd->idle_slice_timer);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 				bfqg_stats_update_idle_time(bfqq_group(bfqq));
+-#endif
+ 			}
+ 			goto keep_queue;
+ 		}
+@@ -2745,14 +3274,11 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 			bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
+ 
+ 		/*
+-		 * If the queue was activated in a burst, or
+-		 * too much time has elapsed from the beginning
+-		 * of this weight-raising period, or the queue has
+-		 * exceeded the acceptable number of cooperations,
+-		 * then end weight raising.
++		 * If the queue was activated in a burst, or too much
++		 * time has elapsed from the beginning of this
++		 * weight-raising period, then end weight raising.
+ 		 */
+ 		if (bfq_bfqq_in_large_burst(bfqq) ||
+-		    bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh ||
+ 		    time_is_before_jiffies(bfqq->last_wr_start_finish +
+ 					   bfqq->wr_cur_max_time)) {
+ 			bfqq->last_wr_start_finish = jiffies;
+@@ -2814,10 +3340,25 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+ 		goto expire;
+ 	}
+ 
++	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+ 	/* Finally, insert request into driver dispatch list. */
+ 	bfq_bfqq_served(bfqq, service_to_charge);
++
++	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
++
+ 	bfq_dispatch_insert(bfqd->queue, rq);
+ 
++	/*
++	 * If weight raising has to terminate for bfqq, then next
++	 * function causes an immediate update of bfqq's weight,
++	 * without waiting for next activation. As a consequence, on
++	 * expiration, bfqq will be timestamped as if has never been
++	 * weight-raised during this service slot, even if it has
++	 * received part or even most of the service as a
++	 * weight-raised queue. This inflates bfqq's timestamps, which
++	 * is beneficial, as bfqq is then more willing to leave the
++	 * device immediately to possible other weight-raised queues.
++	 */
+ 	bfq_update_wr_data(bfqd, bfqq);
+ 
+ 	bfq_log_bfqq(bfqd, bfqq,
+@@ -2833,9 +3374,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
+ 		bfqd->in_service_bic = RQ_BIC(rq);
+ 	}
+ 
+-	if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&
+-	    dispatched >= bfqd->bfq_max_budget_async_rq) ||
+-	    bfq_class_idle(bfqq)))
++	if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq))
+ 		goto expire;
+ 
+ 	return dispatched;
+@@ -2881,8 +3420,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd)
+ 		st = bfq_entity_service_tree(&bfqq->entity);
+ 
+ 		dispatched += __bfq_forced_dispatch_bfqq(bfqq);
+-		bfqq->max_budget = bfq_max_budget(bfqd);
+ 
++		bfqq->max_budget = bfq_max_budget(bfqd);
+ 		bfq_forget_idle(st);
+ 	}
+ 
+@@ -2895,9 +3434,9 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+ {
+ 	struct bfq_data *bfqd = q->elevator->elevator_data;
+ 	struct bfq_queue *bfqq;
+-	int max_dispatch;
+ 
+ 	bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
++
+ 	if (bfqd->busy_queues == 0)
+ 		return 0;
+ 
+@@ -2908,21 +3447,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+ 	if (!bfqq)
+ 		return 0;
+ 
+-	if (bfq_class_idle(bfqq))
+-		max_dispatch = 1;
+-
+-	if (!bfq_bfqq_sync(bfqq))
+-		max_dispatch = bfqd->bfq_max_budget_async_rq;
+-
+-	if (!bfq_bfqq_sync(bfqq) && bfqq->dispatched >= max_dispatch) {
+-		if (bfqd->busy_queues > 1)
+-			return 0;
+-		if (bfqq->dispatched >= 4 * max_dispatch)
+-			return 0;
+-	}
+-
+-	if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))
+-		return 0;
++	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+ 
+ 	bfq_clear_bfqq_wait_request(bfqq);
+ 	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
+@@ -2933,6 +3458,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
+ 	bfq_log_bfqq(bfqd, bfqq, "dispatched %s request",
+ 			bfq_bfqq_sync(bfqq) ? "sync" : "async");
+ 
++	BUG_ON(bfqq->entity.budget < bfqq->entity.service);
+ 	return 1;
+ }
+ 
+@@ -2949,11 +3475,11 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
+ 	struct bfq_group *bfqg = bfqq_group(bfqq);
+ #endif
+ 
+-	BUG_ON(atomic_read(&bfqq->ref) <= 0);
++	BUG_ON(bfqq->ref <= 0);
+ 
+-	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
+-		     atomic_read(&bfqq->ref));
+-	if (!atomic_dec_and_test(&bfqq->ref))
++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, bfqq->ref);
++	bfqq->ref--;
++	if (bfqq->ref)
+ 		return;
+ 
+ 	BUG_ON(rb_first(&bfqq->sort_list));
+@@ -3007,8 +3533,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 		bfq_schedule_dispatch(bfqd);
+ 	}
+ 
+-	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
+-		     atomic_read(&bfqq->ref));
++	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref);
+ 
+ 	bfq_put_cooperator(bfqq);
+ 
+@@ -3019,26 +3544,7 @@ static void bfq_init_icq(struct io_cq *icq)
+ {
+ 	struct bfq_io_cq *bic = icq_to_bic(icq);
+ 
+-	bic->ttime.last_end_request = jiffies;
+-	/*
+-	 * A newly created bic indicates that the process has just
+-	 * started doing I/O, and is probably mapping into memory its
+-	 * executable and libraries: it definitely needs weight raising.
+-	 * There is however the possibility that the process performs,
+-	 * for a while, I/O close to some other process. EQM intercepts
+-	 * this behavior and may merge the queue corresponding to the
+-	 * process  with some other queue, BEFORE the weight of the queue
+-	 * is raised. Merged queues are not weight-raised (they are assumed
+-	 * to belong to processes that benefit only from high throughput).
+-	 * If the merge is basically the consequence of an accident, then
+-	 * the queue will be split soon and will get back its old weight.
+-	 * It is then important to write down somewhere that this queue
+-	 * does need weight raising, even if it did not make it to get its
+-	 * weight raised before being merged. To this purpose, we overload
+-	 * the field raising_time_left and assign 1 to it, to mark the queue
+-	 * as needing weight raising.
+-	 */
+-	bic->wr_time_left = 1;
++	bic->ttime.last_end_request = bfq_smallest_from_now();
+ }
+ 
+ static void bfq_exit_icq(struct io_cq *icq)
+@@ -3046,21 +3552,21 @@ static void bfq_exit_icq(struct io_cq *icq)
+ 	struct bfq_io_cq *bic = icq_to_bic(icq);
+ 	struct bfq_data *bfqd = bic_to_bfqd(bic);
+ 
+-	if (bic->bfqq[BLK_RW_ASYNC]) {
+-		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);
+-		bic->bfqq[BLK_RW_ASYNC] = NULL;
++	if (bic_to_bfqq(bic, false)) {
++		bfq_exit_bfqq(bfqd, bic_to_bfqq(bic, false));
++		bic_set_bfqq(bic, NULL, false);
+ 	}
+ 
+-	if (bic->bfqq[BLK_RW_SYNC]) {
++	if (bic_to_bfqq(bic, true)) {
+ 		/*
+ 		 * If the bic is using a shared queue, put the reference
+ 		 * taken on the io_context when the bic started using a
+ 		 * shared bfq_queue.
+ 		 */
+-		if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))
++		if (bfq_bfqq_coop(bic_to_bfqq(bic, true)))
+ 			put_io_context(icq->ioc);
+-		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
+-		bic->bfqq[BLK_RW_SYNC] = NULL;
++		bfq_exit_bfqq(bfqd, bic_to_bfqq(bic, true));
++		bic_set_bfqq(bic, NULL, true);
+ 	}
+ }
+ 
+@@ -3068,7 +3574,8 @@ static void bfq_exit_icq(struct io_cq *icq)
+  * Update the entity prio values; note that the new values will not
+  * be used until the next (re)activation.
+  */
+-static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
++static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq,
++				     struct bfq_io_cq *bic)
+ {
+ 	struct task_struct *tsk = current;
+ 	int ioprio_class;
+@@ -3100,7 +3607,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
+ 		break;
+ 	}
+ 
+-	if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) {
++	if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
+ 		printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
+ 				 bfqq->new_ioprio);
+ 		BUG();
+@@ -3108,45 +3615,39 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
+ 
+ 	bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
+ 	bfqq->entity.prio_changed = 1;
++	bfq_log_bfqq(bfqq->bfqd, bfqq,
++		     "set_next_ioprio_data: bic_class %d prio %d class %d",
++		     ioprio_class, bfqq->new_ioprio, bfqq->new_ioprio_class);
+ }
+ 
+ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
+ {
+-	struct bfq_data *bfqd;
+-	struct bfq_queue *bfqq, *new_bfqq;
+-	unsigned long uninitialized_var(flags);
++	struct bfq_data *bfqd = bic_to_bfqd(bic);
++	struct bfq_queue *bfqq;
+ 	int ioprio = bic->icq.ioc->ioprio;
+ 
+-	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
+-				   &flags);
+ 	/*
+ 	 * This condition may trigger on a newly created bic, be sure to
+ 	 * drop the lock before returning.
+ 	 */
+ 	if (unlikely(!bfqd) || likely(bic->ioprio == ioprio))
+-		goto out;
++		return;
+ 
+ 	bic->ioprio = ioprio;
+ 
+-	bfqq = bic->bfqq[BLK_RW_ASYNC];
++	bfqq = bic_to_bfqq(bic, false);
+ 	if (bfqq) {
+-		new_bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic,
+-					 GFP_ATOMIC);
+-		if (new_bfqq) {
+-			bic->bfqq[BLK_RW_ASYNC] = new_bfqq;
+-			bfq_log_bfqq(bfqd, bfqq,
+-				     "check_ioprio_change: bfqq %p %d",
+-				     bfqq, atomic_read(&bfqq->ref));
+-			bfq_put_queue(bfqq);
+-		}
++		bfq_put_queue(bfqq);
++		bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
++		bic_set_bfqq(bic, bfqq, false);
++		bfq_log_bfqq(bfqd, bfqq,
++			     "check_ioprio_change: bfqq %p %d",
++			     bfqq, bfqq->ref);
+ 	}
+ 
+-	bfqq = bic->bfqq[BLK_RW_SYNC];
++	bfqq = bic_to_bfqq(bic, true);
+ 	if (bfqq)
+ 		bfq_set_next_ioprio_data(bfqq, bic);
+-
+-out:
+-	bfq_put_bfqd_unlock(bfqd, &flags);
+ }
+ 
+ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+@@ -3155,8 +3656,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	RB_CLEAR_NODE(&bfqq->entity.rb_node);
+ 	INIT_LIST_HEAD(&bfqq->fifo);
+ 	INIT_HLIST_NODE(&bfqq->burst_list_node);
++	BUG_ON(!hlist_unhashed(&bfqq->burst_list_node));
+ 
+-	atomic_set(&bfqq->ref, 0);
++	bfqq->ref = 0;
+ 	bfqq->bfqd = bfqd;
+ 
+ 	if (bic)
+@@ -3166,6 +3668,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 		if (!bfq_class_idle(bfqq))
+ 			bfq_mark_bfqq_idle_window(bfqq);
+ 		bfq_mark_bfqq_sync(bfqq);
++		bfq_mark_bfqq_just_created(bfqq);
+ 	} else
+ 		bfq_clear_bfqq_sync(bfqq);
+ 	bfq_mark_bfqq_IO_bound(bfqq);
+@@ -3175,72 +3678,17 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	bfqq->pid = pid;
+ 
+ 	bfqq->wr_coeff = 1;
+-	bfqq->last_wr_start_finish = 0;
++	bfqq->last_wr_start_finish = bfq_smallest_from_now();
++	bfqq->budget_timeout = bfq_smallest_from_now();
++	bfqq->split_time = bfq_smallest_from_now();
+ 	/*
+ 	 * Set to the value for which bfqq will not be deemed as
+ 	 * soft rt when it becomes backlogged.
+ 	 */
+-	bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies);
+-}
+-
+-static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
+-					      struct bio *bio, int is_sync,
+-					      struct bfq_io_cq *bic,
+-					      gfp_t gfp_mask)
+-{
+-	struct bfq_group *bfqg;
+-	struct bfq_queue *bfqq, *new_bfqq = NULL;
+-	struct blkcg *blkcg;
+-
+-retry:
+-	rcu_read_lock();
+-
+-	blkcg = bio_blkcg(bio);
+-	bfqg = bfq_find_alloc_group(bfqd, blkcg);
+-	/* bic always exists here */
+-	bfqq = bic_to_bfqq(bic, is_sync);
+-
+-	/*
+-	 * Always try a new alloc if we fall back to the OOM bfqq
+-	 * originally, since it should just be a temporary situation.
+-	 */
+-	if (!bfqq || bfqq == &bfqd->oom_bfqq) {
+-		bfqq = NULL;
+-		if (new_bfqq) {
+-			bfqq = new_bfqq;
+-			new_bfqq = NULL;
+-		} else if (gfpflags_allow_blocking(gfp_mask)) {
+-			rcu_read_unlock();
+-			spin_unlock_irq(bfqd->queue->queue_lock);
+-			new_bfqq = kmem_cache_alloc_node(bfq_pool,
+-					gfp_mask | __GFP_ZERO,
+-					bfqd->queue->node);
+-			spin_lock_irq(bfqd->queue->queue_lock);
+-			if (new_bfqq)
+-				goto retry;
+-		} else {
+-			bfqq = kmem_cache_alloc_node(bfq_pool,
+-					gfp_mask | __GFP_ZERO,
+-					bfqd->queue->node);
+-		}
+-
+-		if (bfqq) {
+-			bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
+-                                      is_sync);
+-			bfq_init_entity(&bfqq->entity, bfqg);
+-			bfq_log_bfqq(bfqd, bfqq, "allocated");
+-		} else {
+-			bfqq = &bfqd->oom_bfqq;
+-			bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
+-		}
+-	}
+-
+-	if (new_bfqq)
+-		kmem_cache_free(bfq_pool, new_bfqq);
++	bfqq->soft_rt_next_start = bfq_greatest_from_now();
+ 
+-	rcu_read_unlock();
+-
+-	return bfqq;
++	/* first request is almost certainly seeky */
++	bfqq->seek_history = 1;
+ }
+ 
+ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+@@ -3263,44 +3711,56 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
+ }
+ 
+ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+-				       struct bio *bio, int is_sync,
+-				       struct bfq_io_cq *bic, gfp_t gfp_mask)
++				       struct bio *bio, bool is_sync,
++				       struct bfq_io_cq *bic)
+ {
+ 	const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+ 	const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
+ 	struct bfq_queue **async_bfqq = NULL;
+-	struct bfq_queue *bfqq = NULL;
++	struct bfq_queue *bfqq;
++	struct bfq_group *bfqg;
+ 
+-	if (!is_sync) {
+-		struct blkcg *blkcg;
+-		struct bfq_group *bfqg;
++	rcu_read_lock();
+ 
+-		rcu_read_lock();
+-		blkcg = bio_blkcg(bio);
+-		rcu_read_unlock();
+-		bfqg = bfq_find_alloc_group(bfqd, blkcg);
++	bfqg = bfq_find_alloc_group(bfqd,bio_blkcg(bio));
++
++	if (!is_sync) {
+ 		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
+ 						  ioprio);
+ 		bfqq = *async_bfqq;
++		if (bfqq)
++			goto out;
+ 	}
+ 
+-	if (!bfqq)
+-		bfqq = bfq_find_alloc_queue(bfqd, bio, is_sync, bic, gfp_mask);
++	bfqq = kmem_cache_alloc_node(bfq_pool, GFP_NOWAIT | __GFP_ZERO,
++				     bfqd->queue->node);
++
++	if (bfqq) {
++		bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
++			      is_sync);
++		bfq_init_entity(&bfqq->entity, bfqg);
++		bfq_log_bfqq(bfqd, bfqq, "allocated");
++	} else {
++		bfqq = &bfqd->oom_bfqq;
++		bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
++		goto out;
++	}
+ 
+ 	/*
+ 	 * Pin the queue now that it's allocated, scheduler exit will
+ 	 * prune it.
+ 	 */
+-	if (!is_sync && !(*async_bfqq)) {
+-		atomic_inc(&bfqq->ref);
++	if (async_bfqq) {
++		bfqq->ref++;
+ 		bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
+-			     bfqq, atomic_read(&bfqq->ref));
++			     bfqq, bfqq->ref);
+ 		*async_bfqq = bfqq;
+ 	}
+ 
+-	atomic_inc(&bfqq->ref);
+-	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,
+-		     atomic_read(&bfqq->ref));
++out:
++	bfqq->ref++;
++	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref);
++	rcu_read_unlock();
+ 	return bfqq;
+ }
+ 
+@@ -3316,37 +3776,21 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
+ 				bic->ttime.ttime_samples;
+ }
+ 
+-static void bfq_update_io_seektime(struct bfq_data *bfqd,
+-				   struct bfq_queue *bfqq,
+-				   struct request *rq)
+-{
+-	sector_t sdist;
+-	u64 total;
+-
+-	if (bfqq->last_request_pos < blk_rq_pos(rq))
+-		sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
+-	else
+-		sdist = bfqq->last_request_pos - blk_rq_pos(rq);
+-
+-	/*
+-	 * Don't allow the seek distance to get too large from the
+-	 * odd fragment, pagein, etc.
+-	 */
+-	if (bfqq->seek_samples == 0) /* first request, not really a seek */
+-		sdist = 0;
+-	else if (bfqq->seek_samples <= 60) /* second & third seek */
+-		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);
+-	else
+-		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);
+ 
+-	bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;
+-	bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;
+-	total = bfqq->seek_total + (bfqq->seek_samples/2);
+-	do_div(total, bfqq->seek_samples);
+-	bfqq->seek_mean = (sector_t)total;
++static void
++bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++		       struct request *rq)
++{
++	sector_t sdist = 0;
++	if (bfqq->last_request_pos) {
++		if (bfqq->last_request_pos < blk_rq_pos(rq))
++			sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
++		else
++			sdist = bfqq->last_request_pos - blk_rq_pos(rq);
++	}
+ 
+-	bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,
+-			(u64)bfqq->seek_mean);
++	bfqq->seek_history <<= 1;
++	bfqq->seek_history |= (sdist > BFQQ_SEEK_THR);
+ }
+ 
+ /*
+@@ -3364,7 +3808,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
+ 		return;
+ 
+ 	/* Idle window just restored, statistics are meaningless. */
+-	if (bfq_bfqq_just_split(bfqq))
++	if (time_is_after_eq_jiffies(bfqq->split_time +
++				     bfqd->bfq_wr_min_idle_time))
+ 		return;
+ 
+ 	enable_idle = bfq_bfqq_idle_window(bfqq);
+@@ -3404,22 +3849,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 
+ 	bfq_update_io_thinktime(bfqd, bic);
+ 	bfq_update_io_seektime(bfqd, bfqq, rq);
+-	if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) {
+-		bfq_clear_bfqq_constantly_seeky(bfqq);
+-		if (!blk_queue_nonrot(bfqd->queue)) {
+-			BUG_ON(!bfqd->const_seeky_busy_in_flight_queues);
+-			bfqd->const_seeky_busy_in_flight_queues--;
+-		}
+-	}
+ 	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
+ 	    !BFQQ_SEEKY(bfqq))
+ 		bfq_update_idle_window(bfqd, bfqq, bic);
+-	bfq_clear_bfqq_just_split(bfqq);
+ 
+ 	bfq_log_bfqq(bfqd, bfqq,
+-		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
+-		     bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),
+-		     (long long unsigned)bfqq->seek_mean);
++		     "rq_enqueued: idle_window=%d (seeky %d)",
++		     bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq));
+ 
+ 	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
+ 
+@@ -3433,14 +3869,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 		 * is small and the queue is not to be expired, then
+ 		 * just exit.
+ 		 *
+-		 * In this way, if the disk is being idled to wait for
+-		 * a new request from the in-service queue, we avoid
+-		 * unplugging the device and committing the disk to serve
+-		 * just a small request. On the contrary, we wait for
+-		 * the block layer to decide when to unplug the device:
+-		 * hopefully, new requests will be merged to this one
+-		 * quickly, then the device will be unplugged and
+-		 * larger requests will be dispatched.
++		 * In this way, if the device is being idled to wait
++		 * for a new request from the in-service queue, we
++		 * avoid unplugging the device and committing the
++		 * device to serve just a small request. On the
++		 * contrary, we wait for the block layer to decide
++		 * when to unplug the device: hopefully, new requests
++		 * will be merged to this one quickly, then the device
++		 * will be unplugged and larger requests will be
++		 * dispatched.
+ 		 */
+ 		if (small_req && !budget_timeout)
+ 			return;
+@@ -3453,9 +3890,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 		 */
+ 		bfq_clear_bfqq_wait_request(bfqq);
+ 		del_timer(&bfqd->idle_slice_timer);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 		bfqg_stats_update_idle_time(bfqq_group(bfqq));
+-#endif
+ 
+ 		/*
+ 		 * The queue is not empty, because a new request just
+@@ -3499,27 +3934,19 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+ 			 */
+ 			new_bfqq->allocated[rq_data_dir(rq)]++;
+ 			bfqq->allocated[rq_data_dir(rq)]--;
+-			atomic_inc(&new_bfqq->ref);
++			new_bfqq->ref++;
++			bfq_clear_bfqq_just_created(bfqq);
+ 			bfq_put_queue(bfqq);
+ 			if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
+ 				bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
+ 						bfqq, new_bfqq);
+ 			rq->elv.priv[1] = new_bfqq;
+ 			bfqq = new_bfqq;
+-		} else
+-			bfq_bfqq_increase_failed_cooperations(bfqq);
++		}
+ 	}
+ 
+ 	bfq_add_request(rq);
+ 
+-	/*
+-	 * Here a newly-created bfq_queue has already started a weight-raising
+-	 * period: clear raising_time_left to prevent bfq_bfqq_save_state()
+-	 * from assigning it a full weight-raising period. See the detailed
+-	 * comments about this field in bfq_init_icq().
+-	 */
+-	if (bfqq->bic)
+-		bfqq->bic->wr_time_left = 0;
+ 	rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+ 	list_add_tail(&rq->queuelist, &bfqq->fifo);
+ 
+@@ -3528,8 +3955,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+ 
+ static void bfq_update_hw_tag(struct bfq_data *bfqd)
+ {
+-	bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,
+-				     bfqd->rq_in_driver);
++	bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver,
++				       bfqd->rq_in_driver);
+ 
+ 	if (bfqd->hw_tag == 1)
+ 		return;
+@@ -3560,43 +3987,41 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
+ 	bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)",
+ 		     blk_rq_sectors(rq), sync);
+ 
++	assert_spin_locked(bfqd->queue->queue_lock);
+ 	bfq_update_hw_tag(bfqd);
+ 
+ 	BUG_ON(!bfqd->rq_in_driver);
+ 	BUG_ON(!bfqq->dispatched);
+ 	bfqd->rq_in_driver--;
+ 	bfqq->dispatched--;
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	bfqg_stats_update_completion(bfqq_group(bfqq),
+ 				     rq_start_time_ns(rq),
+ 				     rq_io_start_time_ns(rq), rq->cmd_flags);
+-#endif
+ 
+ 	if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
++		BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++		/*
++		 * Set budget_timeout (which we overload to store the
++		 * time at which the queue remains with no backlog and
++		 * no outstanding request; used by the weight-raising
++		 * mechanism).
++		 */
++		bfqq->budget_timeout = jiffies;
++
+ 		bfq_weights_tree_remove(bfqd, &bfqq->entity,
+ 					&bfqd->queue_weights_tree);
+-		if (!blk_queue_nonrot(bfqd->queue)) {
+-			BUG_ON(!bfqd->busy_in_flight_queues);
+-			bfqd->busy_in_flight_queues--;
+-			if (bfq_bfqq_constantly_seeky(bfqq)) {
+-				BUG_ON(!bfqd->
+-					const_seeky_busy_in_flight_queues);
+-				bfqd->const_seeky_busy_in_flight_queues--;
+-			}
+-		}
+ 	}
+ 
+-	if (sync) {
+-		bfqd->sync_flight--;
+-		RQ_BIC(rq)->ttime.last_end_request = jiffies;
+-	}
++	RQ_BIC(rq)->ttime.last_end_request = jiffies;
+ 
+ 	/*
+-	 * If we are waiting to discover whether the request pattern of the
+-	 * task associated with the queue is actually isochronous, and
+-	 * both requisites for this condition to hold are satisfied, then
+-	 * compute soft_rt_next_start (see the comments to the function
+-	 * bfq_bfqq_softrt_next_start()).
++	 * If we are waiting to discover whether the request pattern
++	 * of the task associated with the queue is actually
++	 * isochronous, and both requisites for this condition to hold
++	 * are now satisfied, then compute soft_rt_next_start (see the
++	 * comments on the function bfq_bfqq_softrt_next_start()). We
++	 * schedule this delayed check when bfqq expires, if it still
++	 * has in-flight requests.
+ 	 */
+ 	if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
+ 	    RB_EMPTY_ROOT(&bfqq->sort_list))
+@@ -3608,10 +4033,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
+ 	 * or if we want to idle in case it has no pending requests.
+ 	 */
+ 	if (bfqd->in_service_queue == bfqq) {
+-		if (bfq_bfqq_budget_new(bfqq))
+-			bfq_set_budget_timeout(bfqd);
+-
+-		if (bfq_bfqq_must_idle(bfqq)) {
++		if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) {
+ 			bfq_arm_slice_timer(bfqd);
+ 			goto out;
+ 		} else if (bfq_may_expire_for_budg_timeout(bfqq))
+@@ -3682,14 +4104,14 @@ static void bfq_put_request(struct request *rq)
+ 		rq->elv.priv[1] = NULL;
+ 
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
+-			     bfqq, atomic_read(&bfqq->ref));
++			     bfqq, bfqq->ref);
+ 		bfq_put_queue(bfqq);
+ 	}
+ }
+ 
+ /*
+  * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
+- * was the last process referring to said bfqq.
++ * was the last process referring to that bfqq.
+  */
+ static struct bfq_queue *
+ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
+@@ -3727,11 +4149,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ 	unsigned long flags;
+ 	bool split = false;
+ 
+-	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+-
+-	bfq_check_ioprio_change(bic, bio);
+-
+ 	spin_lock_irqsave(q->queue_lock, flags);
++	bfq_check_ioprio_change(bic, bio);
+ 
+ 	if (!bic)
+ 		goto queue_fail;
+@@ -3741,23 +4160,47 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ new_queue:
+ 	bfqq = bic_to_bfqq(bic, is_sync);
+ 	if (!bfqq || bfqq == &bfqd->oom_bfqq) {
+-		bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask);
++		if (bfqq)
++			bfq_put_queue(bfqq);
++		bfqq = bfq_get_queue(bfqd, bio, is_sync, bic);
++		BUG_ON(!hlist_unhashed(&bfqq->burst_list_node));
++
+ 		bic_set_bfqq(bic, bfqq, is_sync);
+ 		if (split && is_sync) {
++			bfq_log_bfqq(bfqd, bfqq,
++				     "set_request: was_in_list %d "
++				     "was_in_large_burst %d "
++				     "large burst in progress %d",
++				     bic->was_in_burst_list,
++				     bic->saved_in_large_burst,
++				     bfqd->large_burst);
++
+ 			if ((bic->was_in_burst_list && bfqd->large_burst) ||
+-			    bic->saved_in_large_burst)
++			    bic->saved_in_large_burst) {
++				bfq_log_bfqq(bfqd, bfqq,
++					     "set_request: marking in "
++					     "large burst");
+ 				bfq_mark_bfqq_in_large_burst(bfqq);
+-			else {
+-			    bfq_clear_bfqq_in_large_burst(bfqq);
+-			    if (bic->was_in_burst_list)
+-			       hlist_add_head(&bfqq->burst_list_node,
+-				              &bfqd->burst_list);
++			} else {
++				bfq_log_bfqq(bfqd, bfqq,
++					     "set_request: clearing in "
++					     "large burst");
++				bfq_clear_bfqq_in_large_burst(bfqq);
++				if (bic->was_in_burst_list)
++					hlist_add_head(&bfqq->burst_list_node,
++						       &bfqd->burst_list);
+ 			}
++			bfqq->split_time = jiffies;
+ 		}
+ 	} else {
+ 		/* If the queue was seeky for too long, break it apart. */
+ 		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
+ 			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
++
++			/* Update bic before losing reference to bfqq */
++			if (bfq_bfqq_in_large_burst(bfqq))
++				bic->saved_in_large_burst = true;
++
+ 			bfqq = bfq_split_bfqq(bic, bfqq);
+ 			split = true;
+ 			if (!bfqq)
+@@ -3766,9 +4209,8 @@ new_queue:
+ 	}
+ 
+ 	bfqq->allocated[rw]++;
+-	atomic_inc(&bfqq->ref);
+-	bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,
+-		     atomic_read(&bfqq->ref));
++	bfqq->ref++;
++	bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, bfqq->ref);
+ 
+ 	rq->elv.priv[0] = bic;
+ 	rq->elv.priv[1] = bfqq;
+@@ -3783,7 +4225,6 @@ new_queue:
+ 	if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
+ 		bfqq->bic = bic;
+ 		if (split) {
+-			bfq_mark_bfqq_just_split(bfqq);
+ 			/*
+ 			 * If the queue has just been split from a shared
+ 			 * queue, restore the idle window and the possible
+@@ -3793,6 +4234,9 @@ new_queue:
+ 		}
+ 	}
+ 
++	if (unlikely(bfq_bfqq_just_created(bfqq)))
++		bfq_handle_burst(bfqd, bfqq);
++
+ 	spin_unlock_irqrestore(q->queue_lock, flags);
+ 
+ 	return 0;
+@@ -3872,6 +4316,7 @@ static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
+ 	cancel_work_sync(&bfqd->unplug_work);
+ }
+ 
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+ 					struct bfq_queue **bfqq_ptr)
+ {
+@@ -3880,9 +4325,9 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+ 
+ 	bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
+ 	if (bfqq) {
+-		bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
++		bfq_bfqq_move(bfqd, bfqq, root_group);
+ 		bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
+-			     bfqq, atomic_read(&bfqq->ref));
++			     bfqq, bfqq->ref);
+ 		bfq_put_queue(bfqq);
+ 		*bfqq_ptr = NULL;
+ 	}
+@@ -3904,6 +4349,7 @@ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
+ 
+ 	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
+ }
++#endif
+ 
+ static void bfq_exit_queue(struct elevator_queue *e)
+ {
+@@ -3923,8 +4369,6 @@ static void bfq_exit_queue(struct elevator_queue *e)
+ 
+ 	bfq_shutdown_timer_wq(bfqd);
+ 
+-	synchronize_rcu();
+-
+ 	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
+ 
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+@@ -3973,11 +4417,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ 	 * will not attempt to free it.
+ 	 */
+ 	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
+-	atomic_inc(&bfqd->oom_bfqq.ref);
++	bfqd->oom_bfqq.ref++;
+ 	bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+ 	bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
+ 	bfqd->oom_bfqq.entity.new_weight =
+ 		bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
++
++	/* oom_bfqq does not participate to bursts */
++	bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
+ 	/*
+ 	 * Trigger weight initialization, according to ioprio, at the
+ 	 * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
+@@ -3996,9 +4443,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ 		goto out_free;
+ 	bfq_init_root_group(bfqd->root_group, bfqd);
+ 	bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+-	bfqd->active_numerous_groups = 0;
+-#endif
+ 
+ 	init_timer(&bfqd->idle_slice_timer);
+ 	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
+@@ -4023,20 +4467,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ 	bfqd->bfq_back_penalty = bfq_back_penalty;
+ 	bfqd->bfq_slice_idle = bfq_slice_idle;
+ 	bfqd->bfq_class_idle_last_service = 0;
+-	bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
+-	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
+-	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
++	bfqd->bfq_timeout = bfq_timeout;
+ 
+-	bfqd->bfq_coop_thresh = 2;
+-	bfqd->bfq_failed_cooperations = 7000;
+ 	bfqd->bfq_requests_within_timer = 120;
+ 
+-	bfqd->bfq_large_burst_thresh = 11;
+-	bfqd->bfq_burst_interval = msecs_to_jiffies(500);
++	bfqd->bfq_large_burst_thresh = 8;
++	bfqd->bfq_burst_interval = msecs_to_jiffies(180);
+ 
+ 	bfqd->low_latency = true;
+ 
+-	bfqd->bfq_wr_coeff = 20;
++	/*
++	 * Trade-off between responsiveness and fairness.
++	 */
++	bfqd->bfq_wr_coeff = 30;
+ 	bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300);
+ 	bfqd->bfq_wr_max_time = 0;
+ 	bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
+@@ -4048,16 +4491,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
+ 					      * video.
+ 					      */
+ 	bfqd->wr_busy_queues = 0;
+-	bfqd->busy_in_flight_queues = 0;
+-	bfqd->const_seeky_busy_in_flight_queues = 0;
+ 
+ 	/*
+-	 * Begin by assuming, optimistically, that the device peak rate is
+-	 * equal to the highest reference rate.
++	 * Begin by assuming, optimistically, that the device is a
++	 * high-speed one, and that its peak rate is equal to 2/3 of
++	 * the highest reference rate.
+ 	 */
+ 	bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
+ 			T_fast[blk_queue_nonrot(bfqd->queue)];
+-	bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)];
++	bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
+ 	bfqd->device_speed = BFQ_BFQD_FAST;
+ 
+ 	return 0;
+@@ -4161,10 +4603,8 @@ SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
+ SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
+ SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
+ SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
+-SHOW_FUNCTION(bfq_max_budget_async_rq_show,
+-	      bfqd->bfq_max_budget_async_rq, 0);
+-SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
+-SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
++SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1);
++SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0);
+ SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
+ SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0);
+ SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1);
+@@ -4199,10 +4639,6 @@ STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
+ STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
+ 		INT_MAX, 0);
+ STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
+-STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,
+-		1, INT_MAX, 0);
+-STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,
+-		INT_MAX, 1);
+ STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
+ STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
+ STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
+@@ -4224,10 +4660,8 @@ static ssize_t bfq_weights_store(struct elevator_queue *e,
+ 
+ static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
+ {
+-	u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
+-
+ 	if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)
+-		return bfq_calc_max_budget(bfqd->peak_rate, timeout);
++		return bfq_calc_max_budget(bfqd);
+ 	else
+ 		return bfq_default_max_budget;
+ }
+@@ -4252,6 +4686,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
+ 	return ret;
+ }
+ 
++/* 
++ * Leaving this name to preserve name compatibility with cfq
++ * parameters, but this timeout is used for both sync and async.
++ */
+ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+ 				      const char *page, size_t count)
+ {
+@@ -4264,13 +4702,31 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
+ 	else if (__data > INT_MAX)
+ 		__data = INT_MAX;
+ 
+-	bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);
++	bfqd->bfq_timeout = msecs_to_jiffies(__data);
+ 	if (bfqd->bfq_user_max_budget == 0)
+ 		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
+ 
+ 	return ret;
+ }
+ 
++static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
++				     const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long uninitialized_var(__data);
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data > 1)
++		__data = 1;
++	if (!bfqd->strict_guarantees && __data == 1
++	    && bfqd->bfq_slice_idle < msecs_to_jiffies(8))
++		bfqd->bfq_slice_idle = msecs_to_jiffies(8);
++
++	bfqd->strict_guarantees = __data;
++
++	return ret;
++}
++
+ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
+ 				     const char *page, size_t count)
+ {
+@@ -4297,9 +4753,8 @@ static struct elv_fs_entry bfq_attrs[] = {
+ 	BFQ_ATTR(back_seek_penalty),
+ 	BFQ_ATTR(slice_idle),
+ 	BFQ_ATTR(max_budget),
+-	BFQ_ATTR(max_budget_async_rq),
+ 	BFQ_ATTR(timeout_sync),
+-	BFQ_ATTR(timeout_async),
++	BFQ_ATTR(strict_guarantees),
+ 	BFQ_ATTR(low_latency),
+ 	BFQ_ATTR(wr_coeff),
+ 	BFQ_ATTR(wr_max_time),
+@@ -4342,9 +4797,28 @@ static struct elevator_type iosched_bfq = {
+ 	.elevator_owner =	THIS_MODULE,
+ };
+ 
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static struct blkcg_policy blkcg_policy_bfq = {
++	.dfl_cftypes		= bfq_blkg_files,
++	.legacy_cftypes		= bfq_blkcg_legacy_files,
++
++	.cpd_alloc_fn		= bfq_cpd_alloc,
++	.cpd_init_fn		= bfq_cpd_init,
++	.cpd_bind_fn	        = bfq_cpd_init,
++	.cpd_free_fn		= bfq_cpd_free,
++
++	.pd_alloc_fn		= bfq_pd_alloc,
++	.pd_init_fn		= bfq_pd_init,
++	.pd_offline_fn		= bfq_pd_offline,
++	.pd_free_fn		= bfq_pd_free,
++	.pd_reset_stats_fn	= bfq_pd_reset_stats,
++};
++#endif
++
+ static int __init bfq_init(void)
+ {
+ 	int ret;
++	char msg[50] = "BFQ I/O-scheduler: v8";
+ 
+ 	/*
+ 	 * Can be 0 on HZ < 1000 setups.
+@@ -4352,9 +4826,6 @@ static int __init bfq_init(void)
+ 	if (bfq_slice_idle == 0)
+ 		bfq_slice_idle = 1;
+ 
+-	if (bfq_timeout_async == 0)
+-		bfq_timeout_async = 1;
+-
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	ret = blkcg_policy_register(&blkcg_policy_bfq);
+ 	if (ret)
+@@ -4370,23 +4841,34 @@ static int __init bfq_init(void)
+ 	 * installed on the reference devices (see the comments before the
+ 	 * definitions of the two arrays).
+ 	 */
+-	T_slow[0] = msecs_to_jiffies(2600);
+-	T_slow[1] = msecs_to_jiffies(1000);
+-	T_fast[0] = msecs_to_jiffies(5500);
+-	T_fast[1] = msecs_to_jiffies(2000);
++	T_slow[0] = msecs_to_jiffies(3500);
++	T_slow[1] = msecs_to_jiffies(1500);
++	T_fast[0] = msecs_to_jiffies(8000);
++	T_fast[1] = msecs_to_jiffies(3000);
+ 
+ 	/*
+-	 * Thresholds that determine the switch between speed classes (see
+-	 * the comments before the definition of the array).
++	 * Thresholds that determine the switch between speed classes
++	 * (see the comments before the definition of the array
++	 * device_speed_thresh). These thresholds are biased towards
++	 * transitions to the fast class. This is safer than the
++	 * opposite bias. In fact, a wrong transition to the slow
++	 * class results in short weight-raising periods, because the
++	 * speed of the device then tends to be higher that the
++	 * reference peak rate. On the opposite end, a wrong
++	 * transition to the fast class tends to increase
++	 * weight-raising periods, because of the opposite reason.
+ 	 */
+-	device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2;
+-	device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2;
++	device_speed_thresh[0] = (4 * R_slow[0]) / 3;
++	device_speed_thresh[1] = (4 * R_slow[1]) / 3;
+ 
+ 	ret = elv_register(&iosched_bfq);
+ 	if (ret)
+ 		goto err_pol_unreg;
+ 
+-	pr_info("BFQ I/O-scheduler: v7r11");
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	strcat(msg, " (with cgroups support)");
++#endif
++	pr_info("%s", msg);
+ 
+ 	return 0;
+ 
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index a64fec1..e54b149 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -7,9 +7,11 @@
+  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
+  *		      Paolo Valente <paolo.valente@unimore.it>
+  *
+- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
++ * Copyright (C) 2016 Paolo Valente <paolo.valente@unimore.it>
+  */
+ 
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
++
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ #define for_each_entity(entity)	\
+ 	for (; entity ; entity = entity->parent)
+@@ -22,8 +24,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+ 						 int extract,
+ 						 struct bfq_data *bfqd);
+ 
+-static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+-
+ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ {
+ 	struct bfq_entity *bfqg_entity;
+@@ -48,6 +48,7 @@ static void bfq_update_budget(struct bfq_entity *next_in_service)
+ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ {
+ 	struct bfq_entity *next_in_service;
++	struct bfq_queue *bfqq;
+ 
+ 	if (sd->in_service_entity)
+ 		/* will update/requeue at the end of service */
+@@ -65,14 +66,29 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
+ 
+ 	if (next_in_service)
+ 		bfq_update_budget(next_in_service);
++	else
++		goto exit;
+ 
++	bfqq = bfq_entity_to_bfqq(next_in_service);
++	if (bfqq)
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			     "update_next_in_service: chosen this queue");
++	else {
++		struct bfq_group *bfqg =
++			container_of(next_in_service,
++				     struct bfq_group, entity);
++
++		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++			     "update_next_in_service: chosen this entity");
++	}
++exit:
+ 	return 1;
+ }
+ 
+ static void bfq_check_next_in_service(struct bfq_sched_data *sd,
+ 				      struct bfq_entity *entity)
+ {
+-	BUG_ON(sd->next_in_service != entity);
++	WARN_ON(sd->next_in_service != entity);
+ }
+ #else
+ #define for_each_entity(entity)	\
+@@ -151,20 +167,35 @@ static u64 bfq_delta(unsigned long service, unsigned long weight)
+ static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
+ {
+ 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+-
++	unsigned long long start, finish, delta ;
+ 	BUG_ON(entity->weight == 0);
+ 
+ 	entity->finish = entity->start +
+ 		bfq_delta(service, entity->weight);
+ 
++	start = ((entity->start>>10)*1000)>>12;
++	finish = ((entity->finish>>10)*1000)>>12;
++	delta = ((bfq_delta(service, entity->weight)>>10)*1000)>>12;
++
+ 	if (bfqq) {
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq,
+ 			"calc_finish: serv %lu, w %d",
+ 			service, entity->weight);
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq,
+ 			"calc_finish: start %llu, finish %llu, delta %llu",
+-			entity->start, entity->finish,
+-			bfq_delta(service, entity->weight));
++			start, finish, delta);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	} else {
++		struct bfq_group *bfqg =
++			container_of(entity, struct bfq_group, entity);
++
++		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++			"calc_finish group: serv %lu, w %d",
++			     service, entity->weight);
++		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++			"calc_finish group: start %llu, finish %llu, delta %llu",
++			start, finish, delta);
++#endif
+ 	}
+ }
+ 
+@@ -386,8 +417,6 @@ static void bfq_active_insert(struct bfq_service_tree *st,
+ 		BUG_ON(!bfqg);
+ 		BUG_ON(!bfqd);
+ 		bfqg->active_entities++;
+-		if (bfqg->active_entities == 2)
+-			bfqd->active_numerous_groups++;
+ 	}
+ #endif
+ }
+@@ -399,7 +428,7 @@ static void bfq_active_insert(struct bfq_service_tree *st,
+ static unsigned short bfq_ioprio_to_weight(int ioprio)
+ {
+ 	BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
+-	return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - ioprio;
++	return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF ;
+ }
+ 
+ /**
+@@ -422,9 +451,9 @@ static void bfq_get_entity(struct bfq_entity *entity)
+ 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ 
+ 	if (bfqq) {
+-		atomic_inc(&bfqq->ref);
++		bfqq->ref++;
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
+-			     bfqq, atomic_read(&bfqq->ref));
++			     bfqq, bfqq->ref);
+ 	}
+ }
+ 
+@@ -499,10 +528,6 @@ static void bfq_active_extract(struct bfq_service_tree *st,
+ 		BUG_ON(!bfqd);
+ 		BUG_ON(!bfqg->active_entities);
+ 		bfqg->active_entities--;
+-		if (bfqg->active_entities == 1) {
+-			BUG_ON(!bfqd->active_numerous_groups);
+-			bfqd->active_numerous_groups--;
+-		}
+ 	}
+ #endif
+ }
+@@ -552,7 +577,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
+ 	if (bfqq) {
+ 		sd = entity->sched_data;
+ 		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
+-			     bfqq, atomic_read(&bfqq->ref));
++			     bfqq, bfqq->ref);
+ 		bfq_put_queue(bfqq);
+ 	}
+ }
+@@ -628,12 +653,14 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
+ 		if (entity->new_weight != entity->orig_weight) {
+ 			if (entity->new_weight < BFQ_MIN_WEIGHT ||
+ 			    entity->new_weight > BFQ_MAX_WEIGHT) {
+-				printk(KERN_CRIT "update_weight_prio: "
+-						 "new_weight %d\n",
++				pr_crit("update_weight_prio: new_weight %d\n",
+ 					entity->new_weight);
+-				BUG();
++				if (entity->new_weight < BFQ_MIN_WEIGHT)
++					entity->new_weight = BFQ_MIN_WEIGHT;
++				else
++					entity->new_weight = BFQ_MAX_WEIGHT;
+ 			}
+-			entity->orig_weight = entity->new_weight;
++		       	entity->orig_weight = entity->new_weight;
+ 			if (bfqq)
+ 				bfqq->ioprio =
+ 				  bfq_weight_to_ioprio(entity->orig_weight);
+@@ -708,7 +735,7 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
+ 		st = bfq_entity_service_tree(entity);
+ 
+ 		entity->service += served;
+-		BUG_ON(entity->service > entity->budget);
++
+ 		BUG_ON(st->wsum == 0);
+ 
+ 		st->vtime += bfq_delta(served, st->wsum);
+@@ -717,31 +744,69 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
+ #endif
+-	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
++	st = bfq_entity_service_tree(&bfqq->entity);
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs, vtime %llu on %p",
++		     served,  ((st->vtime>>10)*1000)>>12, st);
+ }
+ 
+ /**
+- * bfq_bfqq_charge_full_budget - set the service to the entity budget.
++ * bfq_bfqq_charge_time - charge an amount of service equivalent to the length
++ *			  of the time interval during which bfqq has been in
++ *			  service.
++ * @bfqd: the device
+  * @bfqq: the queue that needs a service update.
++ * @time_ms: the amount of time during which the queue has received service
++ *
++ * If a queue does not consume its budget fast enough, then providing
++ * the queue with service fairness may impair throughput, more or less
++ * severely. For this reason, queues that consume their budget slowly
++ * are provided with time fairness instead of service fairness. This
++ * goal is achieved through the BFQ scheduling engine, even if such an
++ * engine works in the service, and not in the time domain. The trick
++ * is charging these queues with an inflated amount of service, equal
++ * to the amount of service that they would have received during their
++ * service slot if they had been fast, i.e., if their requests had
++ * been dispatched at a rate equal to the estimated peak rate.
+  *
+- * When it's not possible to be fair in the service domain, because
+- * a queue is not consuming its budget fast enough (the meaning of
+- * fast depends on the timeout parameter), we charge it a full
+- * budget.  In this way we should obtain a sort of time-domain
+- * fairness among all the seeky/slow queues.
++ * It is worth noting that time fairness can cause important
++ * distortions in terms of bandwidth distribution, on devices with
++ * internal queueing. The reason is that I/O requests dispatched
++ * during the service slot of a queue may be served after that service
++ * slot is finished, and may have a total processing time loosely
++ * correlated with the duration of the service slot. This is
++ * especially true for short service slots.
+  */
+-static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
++static void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++				 unsigned long time_ms)
+ {
+ 	struct bfq_entity *entity = &bfqq->entity;
++	int tot_serv_to_charge = entity->service;
++	unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout);
++
++	if (time_ms > 0 && time_ms < timeout_ms)
++		tot_serv_to_charge =
++			(bfqd->bfq_max_budget * time_ms) / timeout_ms;
++
++	if (tot_serv_to_charge < entity->service)
++		tot_serv_to_charge = entity->service;
++
++	bfq_log_bfqq(bfqq->bfqd, bfqq,
++		     "charge_time: %lu/%u ms, %d/%d/%d sectors",
++		     time_ms, timeout_ms, entity->service,
++		     tot_serv_to_charge, entity->budget);
+ 
+-	bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
++	/* Increase budget to avoid inconsistencies */
++	if (tot_serv_to_charge > entity->budget)
++		entity->budget = tot_serv_to_charge;
+ 
+-	bfq_bfqq_served(bfqq, entity->budget - entity->service);
++	bfq_bfqq_served(bfqq,
++			max_t(int, 0, tot_serv_to_charge - entity->service));
+ }
+ 
+ /**
+  * __bfq_activate_entity - activate an entity.
+  * @entity: the entity being activated.
++ * @non_blocking_wait_rq: true if this entity was waiting for a request
+  *
+  * Called whenever an entity is activated, i.e., it is not active and one
+  * of its children receives a new request, or has to be reactivated due to
+@@ -749,11 +814,16 @@ static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
+  * service received if @entity is active) of the queue to calculate its
+  * timestamps.
+  */
+-static void __bfq_activate_entity(struct bfq_entity *entity)
++static void __bfq_activate_entity(struct bfq_entity *entity,
++				  bool non_blocking_wait_rq)
+ {
+ 	struct bfq_sched_data *sd = entity->sched_data;
+ 	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	bool backshifted = false;
+ 
++	BUG_ON(!sd);
++	BUG_ON(!st);
+ 	if (entity == sd->in_service_entity) {
+ 		BUG_ON(entity->tree);
+ 		/*
+@@ -771,45 +841,133 @@ static void __bfq_activate_entity(struct bfq_entity *entity)
+ 		 * old start time.
+ 		 */
+ 		bfq_active_extract(st, entity);
+-	} else if (entity->tree == &st->idle) {
+-		/*
+-		 * Must be on the idle tree, bfq_idle_extract() will
+-		 * check for that.
+-		 */
+-		bfq_idle_extract(st, entity);
+-		entity->start = bfq_gt(st->vtime, entity->finish) ?
+-				       st->vtime : entity->finish;
+ 	} else {
+-		/*
+-		 * The finish time of the entity may be invalid, and
+-		 * it is in the past for sure, otherwise the queue
+-		 * would have been on the idle tree.
+-		 */
+-		entity->start = st->vtime;
+-		st->wsum += entity->weight;
+-		bfq_get_entity(entity);
++		unsigned long long min_vstart;
++
++		/* See comments on bfq_fqq_update_budg_for_activation */
++		if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) {
++			backshifted = true;
++			min_vstart = entity->finish;
++		} else
++			min_vstart = st->vtime;
+ 
+-		BUG_ON(entity->on_st);
+-		entity->on_st = 1;
++		if (entity->tree == &st->idle) {
++			/*
++			 * Must be on the idle tree, bfq_idle_extract() will
++			 * check for that.
++			 */
++			bfq_idle_extract(st, entity);
++			entity->start = bfq_gt(min_vstart, entity->finish) ?
++				min_vstart : entity->finish;
++		} else {
++			/*
++			 * The finish time of the entity may be invalid, and
++			 * it is in the past for sure, otherwise the queue
++			 * would have been on the idle tree.
++			 */
++			entity->start = min_vstart;
++			st->wsum += entity->weight;
++			bfq_get_entity(entity);
++
++			BUG_ON(entity->on_st);
++			entity->on_st = 1;
++		}
+ 	}
+ 
+ 	st = __bfq_entity_update_weight_prio(st, entity);
+ 	bfq_calc_finish(entity, entity->budget);
++
++	/*
++	 * If some queues enjoy backshifting for a while, then their
++	 * (virtual) finish timestamps may happen to become lower and
++	 * lower than the system virtual time.  In particular, if
++	 * these queues often happen to be idle for short time
++	 * periods, and during such time periods other queues with
++	 * higher timestamps happen to be busy, then the backshifted
++	 * timestamps of the former queues can become much lower than
++	 * the system virtual time. In fact, to serve the queues with
++	 * higher timestamps while the ones with lower timestamps are
++	 * idle, the system virtual time may be pushed-up to much
++	 * higher values than the finish timestamps of the idle
++	 * queues. As a consequence, the finish timestamps of all new
++	 * or newly activated queues may end up being much larger than
++	 * those of lucky queues with backshifted timestamps. The
++	 * latter queues may then monopolize the device for a lot of
++	 * time. This would simply break service guarantees.
++	 *
++	 * To reduce this problem, push up a little bit the
++	 * backshifted timestamps of the queue associated with this
++	 * entity (only a queue can happen to have the backshifted
++	 * flag set): just enough to let the finish timestamp of the
++	 * queue be equal to the current value of the system virtual
++	 * time. This may introduce a little unfairness among queues
++	 * with backshifted timestamps, but it does not break
++	 * worst-case fairness guarantees.
++	 *
++	 * As a special case, if bfqq is weight-raised, push up
++	 * timestamps much less, to keep very low the probability that
++	 * this push up causes the backshifted finish timestamps of
++	 * weight-raised queues to become higher than the backshifted
++	 * finish timestamps of non weight-raised queues.
++	 */
++	if (backshifted && bfq_gt(st->vtime, entity->finish)) {
++		unsigned long delta = st->vtime - entity->finish;
++
++		if (bfqq)
++			delta /= bfqq->wr_coeff;
++
++		entity->start += delta;
++		entity->finish += delta;
++
++		if (bfqq) {
++			bfq_log_bfqq(bfqq->bfqd, bfqq,
++				     "__activate_entity: new queue finish %llu",
++				     ((entity->finish>>10)*1000)>>12);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		} else {
++			struct bfq_group *bfqg =
++				container_of(entity, struct bfq_group, entity);
++
++			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++				     "__activate_entity: new group finish %llu",
++				     ((entity->finish>>10)*1000)>>12);
++#endif
++		}
++	}
++
+ 	bfq_active_insert(st, entity);
++
++	if (bfqq) {
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			"__activate_entity: queue %seligible in st %p",
++			     entity->start <= st->vtime ? "" : "non ", st);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	} else {
++		struct bfq_group *bfqg =
++			container_of(entity, struct bfq_group, entity);
++
++		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++			"__activate_entity: group %seligible in st %p",
++			     entity->start <= st->vtime ? "" : "non ", st);
++#endif
++	}
+ }
+ 
+ /**
+  * bfq_activate_entity - activate an entity and its ancestors if necessary.
+  * @entity: the entity to activate.
++ * @non_blocking_wait_rq: true if this entity was waiting for a request
+  *
+  * Activate @entity and all the entities on the path from it to the root.
+  */
+-static void bfq_activate_entity(struct bfq_entity *entity)
++static void bfq_activate_entity(struct bfq_entity *entity,
++				bool non_blocking_wait_rq)
+ {
+ 	struct bfq_sched_data *sd;
+ 
+ 	for_each_entity(entity) {
+-		__bfq_activate_entity(entity);
++		BUG_ON(!entity);
++		__bfq_activate_entity(entity, non_blocking_wait_rq);
+ 
+ 		sd = entity->sched_data;
+ 		if (!bfq_update_next_in_service(sd))
+@@ -890,23 +1048,24 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ 
+ 		if (!__bfq_deactivate_entity(entity, requeue))
+ 			/*
+-			 * The parent entity is still backlogged, and
+-			 * we don't need to update it as it is still
+-			 * in service.
++			 * next_in_service has not been changed, so
++			 * no upwards update is needed
+ 			 */
+ 			break;
+ 
+ 		if (sd->next_in_service)
+ 			/*
+-			 * The parent entity is still backlogged and
+-			 * the budgets on the path towards the root
+-			 * need to be updated.
++			 * The parent entity is still backlogged,
++			 * because next_in_service is not NULL, and
++			 * next_in_service has been updated (see
++			 * comment on the body of the above if):
++			 * upwards update of the schedule is needed.
+ 			 */
+ 			goto update;
+ 
+ 		/*
+-		 * If we reach there the parent is no more backlogged and
+-		 * we want to propagate the dequeue upwards.
++		 * If we get here, then the parent is no more backlogged and
++		 * we want to propagate the deactivation upwards.
+ 		 */
+ 		requeue = 1;
+ 	}
+@@ -916,9 +1075,23 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
+ update:
+ 	entity = parent;
+ 	for_each_entity(entity) {
+-		__bfq_activate_entity(entity);
++		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++		__bfq_activate_entity(entity, false);
+ 
+ 		sd = entity->sched_data;
++		if (bfqq)
++			bfq_log_bfqq(bfqq->bfqd, bfqq,
++				     "invoking udpdate_next for this queue");
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		else {
++			struct bfq_group *bfqg =
++				container_of(entity,
++					     struct bfq_group, entity);
++
++			bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++				     "invoking udpdate_next for this entity");
++		}
++#endif
+ 		if (!bfq_update_next_in_service(sd))
+ 			break;
+ 	}
+@@ -997,10 +1170,11 @@ left:
+  * Update the virtual time in @st and return the first eligible entity
+  * it contains.
+  */
+-static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
+-						   bool force)
++static struct bfq_entity *
++__bfq_lookup_next_entity(struct bfq_service_tree *st, bool force)
+ {
+ 	struct bfq_entity *entity, *new_next_in_service = NULL;
++	struct bfq_queue *bfqq;
+ 
+ 	if (RB_EMPTY_ROOT(&st->active))
+ 		return NULL;
+@@ -1009,6 +1183,24 @@ static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
+ 	entity = bfq_first_active_entity(st);
+ 	BUG_ON(bfq_gt(entity->start, st->vtime));
+ 
++	bfqq = bfq_entity_to_bfqq(entity);
++	if (bfqq)
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			     "__lookup_next: start %llu vtime %llu st %p",
++			     ((entity->start>>10)*1000)>>12,
++			     ((st->vtime>>10)*1000)>>12, st);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	else {
++		struct bfq_group *bfqg =
++			container_of(entity, struct bfq_group, entity);
++
++		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++			     "__lookup_next: start %llu vtime %llu st %p",
++			     ((entity->start>>10)*1000)>>12,
++			     ((st->vtime>>10)*1000)>>12, st);
++	}
++#endif
++
+ 	/*
+ 	 * If the chosen entity does not match with the sched_data's
+ 	 * next_in_service and we are forcedly serving the IDLE priority
+@@ -1045,10 +1237,28 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+ 	BUG_ON(sd->in_service_entity);
+ 
+ 	if (bfqd &&
+-	    jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
++	    jiffies - bfqd->bfq_class_idle_last_service >
++	    BFQ_CL_IDLE_TIMEOUT) {
+ 		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
+ 						  true);
+ 		if (entity) {
++			struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++			if (bfqq)
++				bfq_log_bfqq(bfqd, bfqq,
++					     "idle chosen from st %p %d",
++					     st + BFQ_IOPRIO_CLASSES - 1,
++					BFQ_IOPRIO_CLASSES - 1) ;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++			else {
++				struct bfq_group *bfqg =
++				container_of(entity, struct bfq_group, entity);
++
++				bfq_log_bfqg(bfqd, bfqg,
++					     "idle chosen from st %p %d",
++					     st + BFQ_IOPRIO_CLASSES - 1,
++					BFQ_IOPRIO_CLASSES - 1) ;
++			}
++#endif
+ 			i = BFQ_IOPRIO_CLASSES - 1;
+ 			bfqd->bfq_class_idle_last_service = jiffies;
+ 			sd->next_in_service = entity;
+@@ -1057,6 +1267,24 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+ 	for (; i < BFQ_IOPRIO_CLASSES; i++) {
+ 		entity = __bfq_lookup_next_entity(st + i, false);
+ 		if (entity) {
++			if (bfqd != NULL) {
++			struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++			if (bfqq)
++				bfq_log_bfqq(bfqd, bfqq,
++					     "chosen from st %p %d",
++					     st + i, i) ;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++			else {
++				struct bfq_group *bfqg =
++				container_of(entity, struct bfq_group, entity);
++
++				bfq_log_bfqg(bfqd, bfqg,
++					     "chosen from st %p %d",
++					     st + i, i) ;
++			}
++#endif
++			}
++
+ 			if (extract) {
+ 				bfq_check_next_in_service(sd, entity);
+ 				bfq_active_extract(st + i, entity);
+@@ -1070,6 +1298,13 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
+ 	return entity;
+ }
+ 
++static bool next_queue_may_preempt(struct bfq_data *bfqd)
++{
++	struct bfq_sched_data *sd = &bfqd->root_group->sched_data;
++
++	return sd->next_in_service != sd->in_service_entity;
++}
++
+ /*
+  * Get next queue for service.
+  */
+@@ -1086,7 +1321,36 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ 
+ 	sd = &bfqd->root_group->sched_data;
+ 	for (; sd ; sd = entity->my_sched_data) {
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		if (entity) {
++			struct bfq_group *bfqg =
++				container_of(entity, struct bfq_group, entity);
++
++			bfq_log_bfqg(bfqd, bfqg,
++				     "get_next_queue: lookup in this group");
++		} else
++			bfq_log_bfqg(bfqd, bfqd->root_group,
++				     "get_next_queue: lookup in root group");
++#endif
++
+ 		entity = bfq_lookup_next_entity(sd, 1, bfqd);
++
++		bfqq = bfq_entity_to_bfqq(entity);
++		if (bfqq)
++			bfq_log_bfqq(bfqd, bfqq,
++			     "get_next_queue: this queue, finish %llu",
++				(((entity->finish>>10)*1000)>>10)>>2);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++		else {
++			struct bfq_group *bfqg =
++				container_of(entity, struct bfq_group, entity);
++
++			bfq_log_bfqg(bfqd, bfqg,
++			     "get_next_queue: this entity, finish %llu",
++				(((entity->finish>>10)*1000)>>10)>>2);
++		}
++#endif
++
+ 		BUG_ON(!entity);
+ 		entity->service = 0;
+ 	}
+@@ -1113,9 +1377,7 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ 	struct bfq_entity *entity = &bfqq->entity;
+ 
+-	if (bfqq == bfqd->in_service_queue)
+-		__bfq_bfqd_reset_in_service(bfqd);
+-
++	BUG_ON(bfqq == bfqd->in_service_queue);
+ 	bfq_deactivate_entity(entity, requeue);
+ }
+ 
+@@ -1123,12 +1385,11 @@ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ {
+ 	struct bfq_entity *entity = &bfqq->entity;
+ 
+-	bfq_activate_entity(entity);
++	bfq_activate_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq));
++	bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
+ }
+ 
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
+-#endif
+ 
+ /*
+  * Called when the bfqq no longer has requests pending, remove it from
+@@ -1139,6 +1400,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ {
+ 	BUG_ON(!bfq_bfqq_busy(bfqq));
+ 	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++	BUG_ON(bfqq == bfqd->in_service_queue);
+ 
+ 	bfq_log_bfqq(bfqd, bfqq, "del from busy");
+ 
+@@ -1147,27 +1409,20 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ 	BUG_ON(bfqd->busy_queues == 0);
+ 	bfqd->busy_queues--;
+ 
+-	if (!bfqq->dispatched) {
++	if (!bfqq->dispatched)
+ 		bfq_weights_tree_remove(bfqd, &bfqq->entity,
+ 					&bfqd->queue_weights_tree);
+-		if (!blk_queue_nonrot(bfqd->queue)) {
+-			BUG_ON(!bfqd->busy_in_flight_queues);
+-			bfqd->busy_in_flight_queues--;
+-			if (bfq_bfqq_constantly_seeky(bfqq)) {
+-				BUG_ON(!bfqd->
+-					const_seeky_busy_in_flight_queues);
+-				bfqd->const_seeky_busy_in_flight_queues--;
+-			}
+-		}
+-	}
++
+ 	if (bfqq->wr_coeff > 1)
+ 		bfqd->wr_busy_queues--;
+ 
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	bfqg_stats_update_dequeue(bfqq_group(bfqq));
+-#endif
+ 
++	BUG_ON(bfqq->entity.budget < 0);
++	
+ 	bfq_deactivate_bfqq(bfqd, bfqq, requeue);
++
++	BUG_ON(bfqq->entity.budget < 0);
+ }
+ 
+ /*
+@@ -1185,16 +1440,11 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+ 	bfq_mark_bfqq_busy(bfqq);
+ 	bfqd->busy_queues++;
+ 
+-	if (!bfqq->dispatched) {
++	if (!bfqq->dispatched)
+ 		if (bfqq->wr_coeff == 1)
+ 			bfq_weights_tree_add(bfqd, &bfqq->entity,
+ 					     &bfqd->queue_weights_tree);
+-		if (!blk_queue_nonrot(bfqd->queue)) {
+-			bfqd->busy_in_flight_queues++;
+-			if (bfq_bfqq_constantly_seeky(bfqq))
+-				bfqd->const_seeky_busy_in_flight_queues++;
+-		}
+-	}
++
+ 	if (bfqq->wr_coeff > 1)
+ 		bfqd->wr_busy_queues++;
+ }
+diff --git a/block/bfq.h b/block/bfq.h
+index f73c942..9e76b27 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -1,5 +1,5 @@
+ /*
+- * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes.
++ * BFQ-v8 for 4.6.0: data structures and common functions prototypes.
+  *
+  * Based on ideas and code from CFQ:
+  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
+@@ -28,7 +28,7 @@
+ 
+ #define BFQ_DEFAULT_QUEUE_IOPRIO	4
+ 
+-#define BFQ_DEFAULT_GRP_WEIGHT	10
++#define BFQ_WEIGHT_LEGACY_DFL	100
+ #define BFQ_DEFAULT_GRP_IOPRIO	0
+ #define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE
+ 
+@@ -36,12 +36,6 @@ struct bfq_entity;
+ 
+ /**
+  * struct bfq_service_tree - per ioprio_class service tree.
+- * @active: tree for active entities (i.e., those backlogged).
+- * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
+- * @first_idle: idle entity with minimum F_i.
+- * @last_idle: idle entity with maximum F_i.
+- * @vtime: scheduler virtual time.
+- * @wsum: scheduler weight sum; active and idle entities contribute to it.
+  *
+  * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
+  * ioprio_class has its own independent scheduler, and so its own
+@@ -49,27 +43,28 @@ struct bfq_entity;
+  * of the containing bfqd.
+  */
+ struct bfq_service_tree {
++	/* tree for active entities (i.e., those backlogged) */
+ 	struct rb_root active;
++	/* tree for idle entities (i.e., not backlogged, with V <= F_i)*/
+ 	struct rb_root idle;
+ 
+-	struct bfq_entity *first_idle;
+-	struct bfq_entity *last_idle;
++	struct bfq_entity *first_idle; 	/* idle entity with minimum F_i */
++	struct bfq_entity *last_idle; 	/* idle entity with maximum F_i */
+ 
+-	u64 vtime;
++	u64 vtime; /* scheduler virtual time */
++	/* scheduler weight sum; active and idle entities contribute to it */
+ 	unsigned long wsum;
+ };
+ 
+ /**
+  * struct bfq_sched_data - multi-class scheduler.
+- * @in_service_entity: entity in service.
+- * @next_in_service: head-of-the-line entity in the scheduler.
+- * @service_tree: array of service trees, one per ioprio_class.
+  *
+  * bfq_sched_data is the basic scheduler queue.  It supports three
+- * ioprio_classes, and can be used either as a toplevel queue or as
+- * an intermediate queue on a hierarchical setup.
+- * @next_in_service points to the active entity of the sched_data
+- * service trees that will be scheduled next.
++ * ioprio_classes, and can be used either as a toplevel queue or as an
++ * intermediate queue on a hierarchical setup.  @next_in_service
++ * points to the active entity of the sched_data service trees that
++ * will be scheduled next. It is used to reduce the number of steps
++ * needed for each hierarchical-schedule update.
+  *
+  * The supported ioprio_classes are the same as in CFQ, in descending
+  * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
+@@ -79,48 +74,29 @@ struct bfq_service_tree {
+  * All the fields are protected by the queue lock of the containing bfqd.
+  */
+ struct bfq_sched_data {
+-	struct bfq_entity *in_service_entity;
++	struct bfq_entity *in_service_entity;  /* entity in service */
++	/* head-of-the-line entity in the scheduler (see comments above) */
+ 	struct bfq_entity *next_in_service;
++	/* array of service trees, one per ioprio_class */
+ 	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
+ };
+ 
+ /**
+  * struct bfq_weight_counter - counter of the number of all active entities
+  *                             with a given weight.
+- * @weight: weight of the entities that this counter refers to.
+- * @num_active: number of active entities with this weight.
+- * @weights_node: weights tree member (see bfq_data's @queue_weights_tree
+- *                and @group_weights_tree).
+  */
+ struct bfq_weight_counter {
+-	short int weight;
+-	unsigned int num_active;
++	short int weight; /* weight of the entities this counter refers to */
++	unsigned int num_active; /* nr of active entities with this weight */
++	/*
++	 * Weights tree member (see bfq_data's @queue_weights_tree and
++	 * @group_weights_tree)
++	 */
+ 	struct rb_node weights_node;
+ };
+ 
+ /**
+  * struct bfq_entity - schedulable entity.
+- * @rb_node: service_tree member.
+- * @weight_counter: pointer to the weight counter associated with this entity.
+- * @on_st: flag, true if the entity is on a tree (either the active or
+- *         the idle one of its service_tree).
+- * @finish: B-WF2Q+ finish timestamp (aka F_i).
+- * @start: B-WF2Q+ start timestamp (aka S_i).
+- * @tree: tree the entity is enqueued into; %NULL if not on a tree.
+- * @min_start: minimum start time of the (active) subtree rooted at
+- *             this entity; used for O(log N) lookups into active trees.
+- * @service: service received during the last round of service.
+- * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
+- * @weight: weight of the queue
+- * @parent: parent entity, for hierarchical scheduling.
+- * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
+- *                 associated scheduler queue, %NULL on leaf nodes.
+- * @sched_data: the scheduler queue this entity belongs to.
+- * @ioprio: the ioprio in use.
+- * @new_weight: when a weight change is requested, the new weight value.
+- * @orig_weight: original weight, used to implement weight boosting
+- * @prio_changed: flag, true when the user requested a weight, ioprio or
+- *		  ioprio_class change.
+  *
+  * A bfq_entity is used to represent either a bfq_queue (leaf node in the
+  * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
+@@ -147,27 +123,52 @@ struct bfq_weight_counter {
+  * containing bfqd.
+  */
+ struct bfq_entity {
+-	struct rb_node rb_node;
++	struct rb_node rb_node; /* service_tree member */
++	/* pointer to the weight counter associated with this entity */
+ 	struct bfq_weight_counter *weight_counter;
+ 
++	/*
++	 * flag, true if the entity is on a tree (either the active or
++	 * the idle one of its service_tree).
++	 */
+ 	int on_st;
+ 
+-	u64 finish;
+-	u64 start;
++	u64 finish; /* B-WF2Q+ finish timestamp (aka F_i) */
++	u64 start;  /* B-WF2Q+ start timestamp (aka S_i) */
+ 
++	/* tree the entity is enqueued into; %NULL if not on a tree */
+ 	struct rb_root *tree;
+ 
++	/*
++	 * minimum start time of the (active) subtree rooted at this
++	 * entity; used for O(log N) lookups into active trees
++	 */
+ 	u64 min_start;
+ 
+-	int service, budget;
+-	unsigned short weight, new_weight;
++	/* amount of service received during the last service slot */
++	int service;
++
++	/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
++	int budget;
++
++	unsigned short weight; 	/* weight of the queue */
++	unsigned short new_weight; /* next weight if a change is in progress */
++
++	/* original weight, used to implement weight boosting */
+ 	unsigned short orig_weight;
+ 
++	/* parent entity, for hierarchical scheduling */
+ 	struct bfq_entity *parent;
+ 
++	/*
++	 * For non-leaf nodes in the hierarchy, the associated
++	 * scheduler queue, %NULL on leaf nodes.
++	 */
+ 	struct bfq_sched_data *my_sched_data;
++	/* the scheduler queue this entity belongs to */
+ 	struct bfq_sched_data *sched_data;
+ 
++	/* flag, set to request a weight, ioprio or ioprio_class change  */
+ 	int prio_changed;
+ };
+ 
+@@ -175,56 +176,6 @@ struct bfq_group;
+ 
+ /**
+  * struct bfq_queue - leaf schedulable entity.
+- * @ref: reference counter.
+- * @bfqd: parent bfq_data.
+- * @new_ioprio: when an ioprio change is requested, the new ioprio value.
+- * @ioprio_class: the ioprio_class in use.
+- * @new_ioprio_class: when an ioprio_class change is requested, the new
+- *                    ioprio_class value.
+- * @new_bfqq: shared bfq_queue if queue is cooperating with
+- *           one or more other queues.
+- * @pos_node: request-position tree member (see bfq_group's @rq_pos_tree).
+- * @pos_root: request-position tree root (see bfq_group's @rq_pos_tree).
+- * @sort_list: sorted list of pending requests.
+- * @next_rq: if fifo isn't expired, next request to serve.
+- * @queued: nr of requests queued in @sort_list.
+- * @allocated: currently allocated requests.
+- * @meta_pending: pending metadata requests.
+- * @fifo: fifo list of requests in sort_list.
+- * @entity: entity representing this queue in the scheduler.
+- * @max_budget: maximum budget allowed from the feedback mechanism.
+- * @budget_timeout: budget expiration (in jiffies).
+- * @dispatched: number of requests on the dispatch list or inside driver.
+- * @flags: status flags.
+- * @bfqq_list: node for active/idle bfqq list inside our bfqd.
+- * @burst_list_node: node for the device's burst list.
+- * @seek_samples: number of seeks sampled
+- * @seek_total: sum of the distances of the seeks sampled
+- * @seek_mean: mean seek distance
+- * @last_request_pos: position of the last request enqueued
+- * @requests_within_timer: number of consecutive pairs of request completion
+- *                         and arrival, such that the queue becomes idle
+- *                         after the completion, but the next request arrives
+- *                         within an idle time slice; used only if the queue's
+- *                         IO_bound has been cleared.
+- * @pid: pid of the process owning the queue, used for logging purposes.
+- * @last_wr_start_finish: start time of the current weight-raising period if
+- *                        the @bfq-queue is being weight-raised, otherwise
+- *                        finish time of the last weight-raising period
+- * @wr_cur_max_time: current max raising time for this queue
+- * @soft_rt_next_start: minimum time instant such that, only if a new
+- *                      request is enqueued after this time instant in an
+- *                      idle @bfq_queue with no outstanding requests, then
+- *                      the task associated with the queue it is deemed as
+- *                      soft real-time (see the comments to the function
+- *                      bfq_bfqq_softrt_next_start())
+- * @last_idle_bklogged: time of the last transition of the @bfq_queue from
+- *                      idle to backlogged
+- * @service_from_backlogged: cumulative service received from the @bfq_queue
+- *                           since the last transition from idle to
+- *                           backlogged
+- * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
+- *	 queue is shared
+  *
+  * A bfq_queue is a leaf request queue; it can be associated with an
+  * io_context or more, if it  is  async or shared  between  cooperating
+@@ -235,117 +186,163 @@ struct bfq_group;
+  * All the fields are protected by the queue lock of the containing bfqd.
+  */
+ struct bfq_queue {
+-	atomic_t ref;
++	/* reference counter */
++	int ref;
++	/* parent bfq_data */
+ 	struct bfq_data *bfqd;
+ 
+-	unsigned short ioprio, new_ioprio;
+-	unsigned short ioprio_class, new_ioprio_class;
++	/* current ioprio and ioprio class */
++	unsigned short ioprio, ioprio_class;
++	/* next ioprio and ioprio class if a change is in progress */
++	unsigned short new_ioprio, new_ioprio_class;
+ 
+-	/* fields for cooperating queues handling */
++	/*
++	 * Shared bfq_queue if queue is cooperating with one or more
++	 * other queues.
++	 */
+ 	struct bfq_queue *new_bfqq;
++	/* request-position tree member (see bfq_group's @rq_pos_tree) */
+ 	struct rb_node pos_node;
++	/* request-position tree root (see bfq_group's @rq_pos_tree) */
+ 	struct rb_root *pos_root;
+ 
++	/* sorted list of pending requests */
+ 	struct rb_root sort_list;
++	/* if fifo isn't expired, next request to serve */
+ 	struct request *next_rq;
++	/* number of sync and async requests queued */
+ 	int queued[2];
++	/* number of sync and async requests currently allocated */
+ 	int allocated[2];
++	/* number of pending metadata requests */
+ 	int meta_pending;
++	/* fifo list of requests in sort_list */
+ 	struct list_head fifo;
+ 
++	/* entity representing this queue in the scheduler */
+ 	struct bfq_entity entity;
+ 
++	/* maximum budget allowed from the feedback mechanism */
+ 	int max_budget;
++	/* budget expiration (in jiffies) */
+ 	unsigned long budget_timeout;
+ 
++	/* number of requests on the dispatch list or inside driver */
+ 	int dispatched;
+ 
+-	unsigned int flags;
++	unsigned int flags; /* status flags.*/
+ 
++	/* node for active/idle bfqq list inside parent bfqd */
+ 	struct list_head bfqq_list;
+ 
++	/* bit vector: a 1 for each seeky requests in history */
++	u32 seek_history;
++
++	/* node for the device's burst list */
+ 	struct hlist_node burst_list_node;
+ 
+-	unsigned int seek_samples;
+-	u64 seek_total;
+-	sector_t seek_mean;
++	/* position of the last request enqueued */
+ 	sector_t last_request_pos;
+ 
++	/* Number of consecutive pairs of request completion and
++	 * arrival, such that the queue becomes idle after the
++	 * completion, but the next request arrives within an idle
++	 * time slice; used only if the queue's IO_bound flag has been
++	 * cleared.
++	 */
+ 	unsigned int requests_within_timer;
+ 
++	/* pid of the process owning the queue, used for logging purposes */
+ 	pid_t pid;
++
++	/*
++	 * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
++	 * if the queue is shared.
++	 */
+ 	struct bfq_io_cq *bic;
+ 
+-	/* weight-raising fields */
++	/* current maximum weight-raising time for this queue */
+ 	unsigned long wr_cur_max_time;
++	/*
++	 * Minimum time instant such that, only if a new request is
++	 * enqueued after this time instant in an idle @bfq_queue with
++	 * no outstanding requests, then the task associated with the
++	 * queue it is deemed as soft real-time (see the comments on
++	 * the function bfq_bfqq_softrt_next_start())
++	 */
+ 	unsigned long soft_rt_next_start;
++	/*
++	 * Start time of the current weight-raising period if
++	 * the @bfq-queue is being weight-raised, otherwise
++	 * finish time of the last weight-raising period.
++	 */
+ 	unsigned long last_wr_start_finish;
++	/* factor by which the weight of this queue is multiplied */
+ 	unsigned int wr_coeff;
++	/*
++	 * Time of the last transition of the @bfq_queue from idle to
++	 * backlogged.
++	 */
+ 	unsigned long last_idle_bklogged;
++	/*
++	 * Cumulative service received from the @bfq_queue since the
++	 * last transition from idle to backlogged.
++	 */
+ 	unsigned long service_from_backlogged;
++
++	unsigned long split_time; /* time of last split */
+ };
+ 
+ /**
+  * struct bfq_ttime - per process thinktime stats.
+- * @ttime_total: total process thinktime
+- * @ttime_samples: number of thinktime samples
+- * @ttime_mean: average process thinktime
+  */
+ struct bfq_ttime {
+-	unsigned long last_end_request;
++	unsigned long last_end_request; /* completion time of last request */
++
++	unsigned long ttime_total; /* total process thinktime */
++	unsigned long ttime_samples; /* number of thinktime samples */
++	unsigned long ttime_mean; /* average process thinktime */
+ 
+-	unsigned long ttime_total;
+-	unsigned long ttime_samples;
+-	unsigned long ttime_mean;
+ };
+ 
+ /**
+  * struct bfq_io_cq - per (request_queue, io_context) structure.
+- * @icq: associated io_cq structure
+- * @bfqq: array of two process queues, the sync and the async
+- * @ttime: associated @bfq_ttime struct
+- * @ioprio: per (request_queue, blkcg) ioprio.
+- * @blkcg_id: id of the blkcg the related io_cq belongs to.
+- * @wr_time_left: snapshot of the time left before weight raising ends
+- *                for the sync queue associated to this process; this
+- *		  snapshot is taken to remember this value while the weight
+- *		  raising is suspended because the queue is merged with a
+- *		  shared queue, and is used to set @raising_cur_max_time
+- *		  when the queue is split from the shared queue and its
+- *		  weight is raised again
+- * @saved_idle_window: same purpose as the previous field for the idle
+- *                     window
+- * @saved_IO_bound: same purpose as the previous two fields for the I/O
+- *                  bound classification of a queue
+- * @saved_in_large_burst: same purpose as the previous fields for the
+- *                        value of the field keeping the queue's belonging
+- *                        to a large burst
+- * @was_in_burst_list: true if the queue belonged to a burst list
+- *                     before its merge with another cooperating queue
+- * @cooperations: counter of consecutive successful queue merges underwent
+- *                by any of the process' @bfq_queues
+- * @failed_cooperations: counter of consecutive failed queue merges of any
+- *                       of the process' @bfq_queues
+  */
+ struct bfq_io_cq {
++	/* associated io_cq structure */
+ 	struct io_cq icq; /* must be the first member */
++	/* array of two process queues, the sync and the async */
+ 	struct bfq_queue *bfqq[2];
++	/* associated @bfq_ttime struct */
+ 	struct bfq_ttime ttime;
++	/* per (request_queue, blkcg) ioprio */
+ 	int ioprio;
+-
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+-	uint64_t blkcg_id; /* the current blkcg ID */
++	uint64_t blkcg_serial_nr; /* the current blkcg serial */
+ #endif
+ 
+-	unsigned int wr_time_left;
++	/*
++	 * Snapshot of the idle window before merging; taken to
++	 * remember this value while the queue is merged, so as to be
++	 * able to restore it in case of split.
++	 */
+ 	bool saved_idle_window;
++	/*
++	 * Same purpose as the previous two fields for the I/O bound
++	 * classification of a queue.
++	 */
+ 	bool saved_IO_bound;
+ 
++	/*
++	 * Same purpose as the previous fields for the value of the
++	 * field keeping the queue's belonging to a large burst
++	 */
+ 	bool saved_in_large_burst;
++	/*
++	 * True if the queue belonged to a burst list before its merge
++	 * with another cooperating queue.
++	 */
+ 	bool was_in_burst_list;
+-
+-	unsigned int cooperations;
+-	unsigned int failed_cooperations;
+ };
+ 
+ enum bfq_device_speed {
+@@ -354,224 +351,216 @@ enum bfq_device_speed {
+ };
+ 
+ /**
+- * struct bfq_data - per device data structure.
+- * @queue: request queue for the managed device.
+- * @root_group: root bfq_group for the device.
+- * @active_numerous_groups: number of bfq_groups containing more than one
+- *                          active @bfq_entity.
+- * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by
+- *                      weight. Used to keep track of whether all @bfq_queues
+- *                     have the same weight. The tree contains one counter
+- *                     for each distinct weight associated to some active
+- *                     and not weight-raised @bfq_queue (see the comments to
+- *                      the functions bfq_weights_tree_[add|remove] for
+- *                     further details).
+- * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted
+- *                      by weight. Used to keep track of whether all
+- *                     @bfq_groups have the same weight. The tree contains
+- *                     one counter for each distinct weight associated to
+- *                     some active @bfq_group (see the comments to the
+- *                     functions bfq_weights_tree_[add|remove] for further
+- *                     details).
+- * @busy_queues: number of bfq_queues containing requests (including the
+- *		 queue in service, even if it is idling).
+- * @busy_in_flight_queues: number of @bfq_queues containing pending or
+- *                         in-flight requests, plus the @bfq_queue in
+- *                         service, even if idle but waiting for the
+- *                         possible arrival of its next sync request. This
+- *                         field is updated only if the device is rotational,
+- *                         but used only if the device is also NCQ-capable.
+- *                         The reason why the field is updated also for non-
+- *                         NCQ-capable rotational devices is related to the
+- *                         fact that the value of @hw_tag may be set also
+- *                         later than when busy_in_flight_queues may need to
+- *                         be incremented for the first time(s). Taking also
+- *                         this possibility into account, to avoid unbalanced
+- *                         increments/decrements, would imply more overhead
+- *                         than just updating busy_in_flight_queues
+- *                         regardless of the value of @hw_tag.
+- * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues
+- *                                     (that is, seeky queues that expired
+- *                                     for budget timeout at least once)
+- *                                     containing pending or in-flight
+- *                                     requests, including the in-service
+- *                                     @bfq_queue if constantly seeky. This
+- *                                     field is updated only if the device
+- *                                     is rotational, but used only if the
+- *                                     device is also NCQ-capable (see the
+- *                                     comments to @busy_in_flight_queues).
+- * @wr_busy_queues: number of weight-raised busy @bfq_queues.
+- * @queued: number of queued requests.
+- * @rq_in_driver: number of requests dispatched and waiting for completion.
+- * @sync_flight: number of sync requests in the driver.
+- * @max_rq_in_driver: max number of reqs in driver in the last
+- *                    @hw_tag_samples completed requests.
+- * @hw_tag_samples: nr of samples used to calculate hw_tag.
+- * @hw_tag: flag set to one if the driver is showing a queueing behavior.
+- * @budgets_assigned: number of budgets assigned.
+- * @idle_slice_timer: timer set when idling for the next sequential request
+- *                    from the queue in service.
+- * @unplug_work: delayed work to restart dispatching on the request queue.
+- * @in_service_queue: bfq_queue in service.
+- * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue.
+- * @last_position: on-disk position of the last served request.
+- * @last_budget_start: beginning of the last budget.
+- * @last_idling_start: beginning of the last idle slice.
+- * @peak_rate: peak transfer rate observed for a budget.
+- * @peak_rate_samples: number of samples used to calculate @peak_rate.
+- * @bfq_max_budget: maximum budget allotted to a bfq_queue before
+- *                  rescheduling.
+- * @active_list: list of all the bfq_queues active on the device.
+- * @idle_list: list of all the bfq_queues idle on the device.
+- * @bfq_fifo_expire: timeout for async/sync requests; when it expires
+- *                   requests are served in fifo order.
+- * @bfq_back_penalty: weight of backward seeks wrt forward ones.
+- * @bfq_back_max: maximum allowed backward seek.
+- * @bfq_slice_idle: maximum idling time.
+- * @bfq_user_max_budget: user-configured max budget value
+- *                       (0 for auto-tuning).
+- * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
+- *                           async queues.
+- * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
+- *               to prevent seeky queues to impose long latencies to well
+- *               behaved ones (this also implies that seeky queues cannot
+- *               receive guarantees in the service domain; after a timeout
+- *               they are charged for the whole allocated budget, to try
+- *               to preserve a behavior reasonably fair among them, but
+- *               without service-domain guarantees).
+- * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is
+- *                   no more granted any weight-raising.
+- * @bfq_failed_cooperations: number of consecutive failed cooperation
+- *                           chances after which weight-raising is restored
+- *                           to a queue subject to more than bfq_coop_thresh
+- *                           queue merges.
+- * @bfq_requests_within_timer: number of consecutive requests that must be
+- *                             issued within the idle time slice to set
+- *                             again idling to a queue which was marked as
+- *                             non-I/O-bound (see the definition of the
+- *                             IO_bound flag for further details).
+- * @last_ins_in_burst: last time at which a queue entered the current
+- *                     burst of queues being activated shortly after
+- *                     each other; for more details about this and the
+- *                     following parameters related to a burst of
+- *                     activations, see the comments to the function
+- *                     @bfq_handle_burst.
+- * @bfq_burst_interval: reference time interval used to decide whether a
+- *                      queue has been activated shortly after
+- *                      @last_ins_in_burst.
+- * @burst_size: number of queues in the current burst of queue activations.
+- * @bfq_large_burst_thresh: maximum burst size above which the current
+- * 			    queue-activation burst is deemed as 'large'.
+- * @large_burst: true if a large queue-activation burst is in progress.
+- * @burst_list: head of the burst list (as for the above fields, more details
+- * 		in the comments to the function bfq_handle_burst).
+- * @low_latency: if set to true, low-latency heuristics are enabled.
+- * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised
+- *                queue is multiplied.
+- * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies).
+- * @bfq_wr_rt_max_time: maximum duration for soft real-time processes.
+- * @bfq_wr_min_idle_time: minimum idle period after which weight-raising
+- *			  may be reactivated for a queue (in jiffies).
+- * @bfq_wr_min_inter_arr_async: minimum period between request arrivals
+- *				after which weight-raising may be
+- *				reactivated for an already busy queue
+- *				(in jiffies).
+- * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue,
+- *			    sectors per seconds.
+- * @RT_prod: cached value of the product R*T used for computing the maximum
+- *	     duration of the weight raising automatically.
+- * @device_speed: device-speed class for the low-latency heuristic.
+- * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions.
++ * struct bfq_data - per-device data structure.
+  *
+  * All the fields are protected by the @queue lock.
+  */
+ struct bfq_data {
++	/* request queue for the device */
+ 	struct request_queue *queue;
+ 
++	/* root bfq_group for the device */
+ 	struct bfq_group *root_group;
+ 
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+-	int active_numerous_groups;
+-#endif
+-
++	/*
++	 * rbtree of weight counters of @bfq_queues, sorted by
++	 * weight. Used to keep track of whether all @bfq_queues have
++	 * the same weight. The tree contains one counter for each
++	 * distinct weight associated to some active and not
++	 * weight-raised @bfq_queue (see the comments to the functions
++	 * bfq_weights_tree_[add|remove] for further details).
++	 */
+ 	struct rb_root queue_weights_tree;
++	/*
++	 * rbtree of non-queue @bfq_entity weight counters, sorted by
++	 * weight. Used to keep track of whether all @bfq_groups have
++	 * the same weight. The tree contains one counter for each
++	 * distinct weight associated to some active @bfq_group (see
++	 * the comments to the functions bfq_weights_tree_[add|remove]
++	 * for further details).
++	 */
+ 	struct rb_root group_weights_tree;
+ 
++	/*
++	 * Number of bfq_queues containing requests (including the
++	 * queue in service, even if it is idling).
++	 */
+ 	int busy_queues;
+-	int busy_in_flight_queues;
+-	int const_seeky_busy_in_flight_queues;
++	/* number of weight-raised busy @bfq_queues */
+ 	int wr_busy_queues;
++	/* number of queued requests */
+ 	int queued;
++	/* number of requests dispatched and waiting for completion */
+ 	int rq_in_driver;
+-	int sync_flight;
+ 
++	/*
++	 * Maximum number of requests in driver in the last
++	 * @hw_tag_samples completed requests.
++	 */
+ 	int max_rq_in_driver;
++	/* number of samples used to calculate hw_tag */
+ 	int hw_tag_samples;
++	/* flag set to one if the driver is showing a queueing behavior */
+ 	int hw_tag;
+ 
++	/* number of budgets assigned */
+ 	int budgets_assigned;
+ 
++	/*
++	 * Timer set when idling (waiting) for the next request from
++	 * the queue in service.
++	 */
+ 	struct timer_list idle_slice_timer;
++	/* delayed work to restart dispatching on the request queue */
+ 	struct work_struct unplug_work;
+ 
++	/* bfq_queue in service */
+ 	struct bfq_queue *in_service_queue;
++	/* bfq_io_cq (bic) associated with the @in_service_queue */
+ 	struct bfq_io_cq *in_service_bic;
+ 
++	/* on-disk position of the last served request */
+ 	sector_t last_position;
+ 
++	/* beginning of the last budget */
+ 	ktime_t last_budget_start;
++	/* beginning of the last idle slice */
+ 	ktime_t last_idling_start;
++	/* number of samples used to calculate @peak_rate */
+ 	int peak_rate_samples;
++	/* peak transfer rate observed for a budget */
+ 	u64 peak_rate;
++	/* maximum budget allotted to a bfq_queue before rescheduling */
+ 	int bfq_max_budget;
+ 
++	/* list of all the bfq_queues active on the device */
+ 	struct list_head active_list;
++	/* list of all the bfq_queues idle on the device */
+ 	struct list_head idle_list;
+ 
++	/*
++	 * Timeout for async/sync requests; when it fires, requests
++	 * are served in fifo order.
++	 */
+ 	unsigned int bfq_fifo_expire[2];
++	/* weight of backward seeks wrt forward ones */
+ 	unsigned int bfq_back_penalty;
++	/* maximum allowed backward seek */
+ 	unsigned int bfq_back_max;
++	/* maximum idling time */
+ 	unsigned int bfq_slice_idle;
++	/* last time CLASS_IDLE was served */
+ 	u64 bfq_class_idle_last_service;
+ 
++	/* user-configured max budget value (0 for auto-tuning) */
+ 	int bfq_user_max_budget;
+-	int bfq_max_budget_async_rq;
+-	unsigned int bfq_timeout[2];
+-
+-	unsigned int bfq_coop_thresh;
+-	unsigned int bfq_failed_cooperations;
++	/*
++	 * Timeout for bfq_queues to consume their budget; used to
++	 * prevent seeky queues from imposing long latencies to
++	 * sequential or quasi-sequential ones (this also implies that
++	 * seeky queues cannot receive guarantees in the service
++	 * domain; after a timeout they are charged for the time they
++	 * have been in service, to preserve fairness among them, but
++	 * without service-domain guarantees).
++	 */
++	unsigned int bfq_timeout;
++
++	/*
++	 * Number of consecutive requests that must be issued within
++	 * the idle time slice to set again idling to a queue which
++	 * was marked as non-I/O-bound (see the definition of the
++	 * IO_bound flag for further details).
++	 */
+ 	unsigned int bfq_requests_within_timer;
+ 
++	/*
++	 * Force device idling whenever needed to provide accurate
++	 * service guarantees, without caring about throughput
++	 * issues. CAVEAT: this may even increase latencies, in case
++	 * of useless idling for processes that did stop doing I/O.
++	 */
++	bool strict_guarantees;
++
++	/*
++	 * Last time at which a queue entered the current burst of
++	 * queues being activated shortly after each other; for more
++	 * details about this and the following parameters related to
++	 * a burst of activations, see the comments on the function
++	 * bfq_handle_burst.
++	 */
+ 	unsigned long last_ins_in_burst;
++	/*
++	 * Reference time interval used to decide whether a queue has
++	 * been activated shortly after @last_ins_in_burst.
++	 */
+ 	unsigned long bfq_burst_interval;
++	/* number of queues in the current burst of queue activations */
+ 	int burst_size;
++
++	/* common parent entity for the queues in the burst */
++	struct bfq_entity *burst_parent_entity;
++	/* Maximum burst size above which the current queue-activation
++	 * burst is deemed as 'large'.
++	 */
+ 	unsigned long bfq_large_burst_thresh;
++	/* true if a large queue-activation burst is in progress */
+ 	bool large_burst;
++	/*
++	 * Head of the burst list (as for the above fields, more
++	 * details in the comments on the function bfq_handle_burst).
++	 */
+ 	struct hlist_head burst_list;
+ 
++	/* if set to true, low-latency heuristics are enabled */
+ 	bool low_latency;
+-
+-	/* parameters of the low_latency heuristics */
++	/*
++	 * Maximum factor by which the weight of a weight-raised queue
++	 * is multiplied.
++	 */
+ 	unsigned int bfq_wr_coeff;
++	/* maximum duration of a weight-raising period (jiffies) */
+ 	unsigned int bfq_wr_max_time;
++
++	/* Maximum weight-raising duration for soft real-time processes */
+ 	unsigned int bfq_wr_rt_max_time;
++	/*
++	 * Minimum idle period after which weight-raising may be
++	 * reactivated for a queue (in jiffies).
++	 */
+ 	unsigned int bfq_wr_min_idle_time;
++	/*
++	 * Minimum period between request arrivals after which
++	 * weight-raising may be reactivated for an already busy async
++	 * queue (in jiffies).
++	 */
+ 	unsigned long bfq_wr_min_inter_arr_async;
++
++	/* Max service-rate for a soft real-time queue, in sectors/sec */
+ 	unsigned int bfq_wr_max_softrt_rate;
++	/*
++	 * Cached value of the product R*T, used for computing the
++	 * maximum duration of weight raising automatically.
++	 */
+ 	u64 RT_prod;
++	/* device-speed class for the low-latency heuristic */
+ 	enum bfq_device_speed device_speed;
+ 
++	/* fallback dummy bfqq for extreme OOM conditions */
+ 	struct bfq_queue oom_bfqq;
+ };
+ 
+ enum bfqq_state_flags {
+-	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is in service */
++	BFQ_BFQQ_FLAG_just_created = 0,	/* queue just allocated */
++	BFQ_BFQQ_FLAG_busy,		/* has requests or is in service */
+ 	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
++	BFQ_BFQQ_FLAG_non_blocking_wait_rq, /*
++					     * waiting for a request
++					     * without idling the device
++					     */
+ 	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
+ 	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
+ 	BFQ_BFQQ_FLAG_idle_window,	/* slice idling enabled */
+ 	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
+-	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */
+ 	BFQ_BFQQ_FLAG_IO_bound,		/*
+ 					 * bfqq has timed-out at least once
+ 					 * having consumed at most 2/10 of
+@@ -581,17 +570,12 @@ enum bfqq_state_flags {
+ 					 * bfqq activated in a large burst,
+ 					 * see comments to bfq_handle_burst.
+ 					 */
+-	BFQ_BFQQ_FLAG_constantly_seeky,	/*
+-					 * bfqq has proved to be slow and
+-					 * seeky until budget timeout
+-					 */
+ 	BFQ_BFQQ_FLAG_softrt_update,	/*
+ 					 * may need softrt-next-start
+ 					 * update
+ 					 */
+ 	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
+-	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be split */
+-	BFQ_BFQQ_FLAG_just_split,	/* queue has just been split */
++	BFQ_BFQQ_FLAG_split_coop	/* shared bfqq will be split */
+ };
+ 
+ #define BFQ_BFQQ_FNS(name)						\
+@@ -608,25 +592,53 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\
+ 	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\
+ }
+ 
++BFQ_BFQQ_FNS(just_created);
+ BFQ_BFQQ_FNS(busy);
+ BFQ_BFQQ_FNS(wait_request);
++BFQ_BFQQ_FNS(non_blocking_wait_rq);
+ BFQ_BFQQ_FNS(must_alloc);
+ BFQ_BFQQ_FNS(fifo_expire);
+ BFQ_BFQQ_FNS(idle_window);
+ BFQ_BFQQ_FNS(sync);
+-BFQ_BFQQ_FNS(budget_new);
+ BFQ_BFQQ_FNS(IO_bound);
+ BFQ_BFQQ_FNS(in_large_burst);
+-BFQ_BFQQ_FNS(constantly_seeky);
+ BFQ_BFQQ_FNS(coop);
+ BFQ_BFQQ_FNS(split_coop);
+-BFQ_BFQQ_FNS(just_split);
+ BFQ_BFQQ_FNS(softrt_update);
+ #undef BFQ_BFQQ_FNS
+ 
+ /* Logging facilities. */
+-#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
+-	blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
++static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
++
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
++	char __pbuf[128];						\
++									\
++	assert_spin_locked((bfqd)->queue->queue_lock);			\
++	blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
++	blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, \
++			  (bfqq)->pid,			  \
++			  bfq_bfqq_sync((bfqq)) ? 'S' : 'A',	\
++			  __pbuf, ##args);				\
++} while (0)
++
++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
++	char __pbuf[128];						\
++									\
++	blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf));		\
++	blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args);	\
++} while (0)
++
++#else /* CONFIG_BFQ_GROUP_IOSCHED */
++
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	\
++	blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid,	\
++			bfq_bfqq_sync((bfqq)) ? 'S' : 'A',		\
++				##args)
++#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)		do {} while (0)
++
++#endif /* CONFIG_BFQ_GROUP_IOSCHED */
+ 
+ #define bfq_log(bfqd, fmt, args...) \
+ 	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
+@@ -640,15 +652,12 @@ enum bfqq_expiration {
+ 	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
+ 	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
+ 	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
++	BFQ_BFQQ_PREEMPTED		/* preemption in progress */
+ };
+ 
+-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 
+ struct bfqg_stats {
+-	/* total bytes transferred */
+-	struct blkg_rwstat		service_bytes;
+-	/* total IOs serviced, post merge */
+-	struct blkg_rwstat		serviced;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 	/* number of ios merged */
+ 	struct blkg_rwstat		merged;
+ 	/* total time spent on device in ns, may not be accurate w/ queueing */
+@@ -657,12 +666,8 @@ struct bfqg_stats {
+ 	struct blkg_rwstat		wait_time;
+ 	/* number of IOs queued up */
+ 	struct blkg_rwstat		queued;
+-	/* total sectors transferred */
+-	struct blkg_stat		sectors;
+ 	/* total disk time and nr sectors dispatched by this group */
+ 	struct blkg_stat		time;
+-	/* time not charged to this cgroup */
+-	struct blkg_stat		unaccounted_time;
+ 	/* sum of number of ios queued across all samples */
+ 	struct blkg_stat		avg_queue_size_sum;
+ 	/* count of samples taken for average */
+@@ -680,8 +685,10 @@ struct bfqg_stats {
+ 	uint64_t			start_idle_time;
+ 	uint64_t			start_empty_time;
+ 	uint16_t			flags;
++#endif
+ };
+ 
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ /*
+  * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
+  *
+@@ -712,7 +719,7 @@ struct bfq_group_data {
+  *                   unused for the root group. Used to know whether there
+  *                   are groups with more than one active @bfq_entity
+  *                   (see the comments to the function
+- *                   bfq_bfqq_must_not_expire()).
++ *                   bfq_bfqq_may_idle()).
+  * @rq_pos_tree: rbtree sorted by next_request position, used when
+  *               determining if two or more queues have interleaving
+  *               requests (see bfq_find_close_cooperator()).
+@@ -745,7 +752,6 @@ struct bfq_group {
+ 	struct rb_root rq_pos_tree;
+ 
+ 	struct bfqg_stats stats;
+-	struct bfqg_stats dead_stats;	/* stats pushed from dead children */
+ };
+ 
+ #else
+@@ -767,11 +773,25 @@ bfq_entity_service_tree(struct bfq_entity *entity)
+ 	struct bfq_sched_data *sched_data = entity->sched_data;
+ 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ 	unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
+-				  BFQ_DEFAULT_GRP_CLASS;
++				  BFQ_DEFAULT_GRP_CLASS - 1;
+ 
+ 	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
+ 	BUG_ON(sched_data == NULL);
+ 
++	if (bfqq)
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			     "entity_service_tree %p %d",
++			     sched_data->service_tree + idx, idx) ;
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
++	else {
++		struct bfq_group *bfqg =
++			container_of(entity, struct bfq_group, entity);
++
++		bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
++			     "entity_service_tree %p %d",
++			     sched_data->service_tree + idx, idx) ;
++	}
++#endif
+ 	return sched_data->service_tree + idx;
+ }
+ 
+@@ -791,47 +811,6 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
+ 	return bic->icq.q->elevator->elevator_data;
+ }
+ 
+-/**
+- * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
+- * @ptr: a pointer to a bfqd.
+- * @flags: storage for the flags to be saved.
+- *
+- * This function allows bfqg->bfqd to be protected by the
+- * queue lock of the bfqd they reference; the pointer is dereferenced
+- * under RCU, so the storage for bfqd is assured to be safe as long
+- * as the RCU read side critical section does not end.  After the
+- * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
+- * sure that no other writer accessed it.  If we raced with a writer,
+- * the function returns NULL, with the queue unlocked, otherwise it
+- * returns the dereferenced pointer, with the queue locked.
+- */
+-static struct bfq_data *bfq_get_bfqd_locked(void **ptr, unsigned long *flags)
+-{
+-	struct bfq_data *bfqd;
+-
+-	rcu_read_lock();
+-	bfqd = rcu_dereference(*(struct bfq_data **)ptr);
+-
+-	if (bfqd != NULL) {
+-		spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
+-		if (ptr == NULL)
+-			printk(KERN_CRIT "get_bfqd_locked pointer NULL\n");
+-		else if (*ptr == bfqd)
+-			goto out;
+-		spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
+-	}
+-
+-	bfqd = NULL;
+-out:
+-	rcu_read_unlock();
+-	return bfqd;
+-}
+-
+-static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags)
+-{
+-	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
+-}
+-
+ #ifdef CONFIG_BFQ_GROUP_IOSCHED
+ 
+ static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+@@ -857,11 +836,13 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
+ static void bfq_put_queue(struct bfq_queue *bfqq);
+ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
+ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
+-				       struct bio *bio, int is_sync,
+-				       struct bfq_io_cq *bic, gfp_t gfp_mask);
++				       struct bio *bio, bool is_sync,
++				       struct bfq_io_cq *bic);
+ static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+ 				    struct bfq_group *bfqg);
++#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
++#endif
+ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+ 
+ #endif /* _BFQ_H */
+-- 
+1.9.1
+


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-08-10 12:57 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-08-10 12:57 UTC (permalink / raw
  To: gentoo-commits

commit:     c12def91a2d1b232600418a34f79774f6cb40a2c
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Wed Aug 10 12:57:38 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Wed Aug 10 12:57:38 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=c12def91

Linux patch 4.6.6

 0000_README            |    4 +
 1005_linux-4.6.6.patch | 3625 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 3629 insertions(+)

diff --git a/0000_README b/0000_README
index 9e42d11..1bb262e 100644
--- a/0000_README
+++ b/0000_README
@@ -63,6 +63,10 @@ Patch:  1004_linux-4.6.5.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.6.5
 
+Patch:  1005_linux-4.6.6.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.6.6
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1005_linux-4.6.6.patch b/1005_linux-4.6.6.patch
new file mode 100644
index 0000000..4fe50ff
--- /dev/null
+++ b/1005_linux-4.6.6.patch
@@ -0,0 +1,3625 @@
+diff --git a/Makefile b/Makefile
+index 7d693a825fc7..bee1a1692fed 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 6
+-SUBLEVEL = 5
++SUBLEVEL = 6
+ EXTRAVERSION =
+ NAME = Charred Weasel
+ 
+diff --git a/arch/arc/Makefile b/arch/arc/Makefile
+index def69e347b2d..f5add9784b2d 100644
+--- a/arch/arc/Makefile
++++ b/arch/arc/Makefile
+@@ -66,8 +66,6 @@ endif
+ 
+ endif
+ 
+-cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables
+-
+ # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
+ ifeq ($(atleast_gcc48),y)
+ cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -gdwarf-2
+diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
+index e0efff15a5ae..b9192a653b7e 100644
+--- a/arch/arc/kernel/stacktrace.c
++++ b/arch/arc/kernel/stacktrace.c
+@@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
+ 	 * prelogue is setup (callee regs saved and then fp set and not other
+ 	 * way around
+ 	 */
+-	pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
++	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
+ 	return 0;
+ 
+ #endif
+diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
+index d4635391c36a..ec0510cc5c3c 100644
+--- a/arch/mips/include/asm/mips-cm.h
++++ b/arch/mips/include/asm/mips-cm.h
+@@ -461,7 +461,10 @@ static inline unsigned int mips_cm_max_vp_width(void)
+ 	if (mips_cm_revision() >= CM_REV_CM3)
+ 		return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK;
+ 
+-	return smp_num_siblings;
++	if (config_enabled(CONFIG_SMP))
++		return smp_num_siblings;
++
++	return 1;
+ }
+ 
+ /**
+diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
+index fdcc04020636..538ae944855e 100644
+--- a/arch/x86/include/asm/pvclock.h
++++ b/arch/x86/include/asm/pvclock.h
+@@ -85,6 +85,8 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
+ 	u8 ret_flags;
+ 
+ 	version = src->version;
++	/* Make the latest version visible */
++	smp_rmb();
+ 
+ 	offset = pvclock_get_nsec_offset(src);
+ 	ret = src->system_time + offset;
+diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
+index bca14c899137..57b71373bae3 100644
+--- a/arch/x86/kernel/early-quirks.c
++++ b/arch/x86/kernel/early-quirks.c
+@@ -11,7 +11,11 @@
+ 
+ #include <linux/pci.h>
+ #include <linux/acpi.h>
++#include <linux/delay.h>
++#include <linux/dmi.h>
+ #include <linux/pci_ids.h>
++#include <linux/bcma/bcma.h>
++#include <linux/bcma/bcma_regs.h>
+ #include <drm/i915_drm.h>
+ #include <asm/pci-direct.h>
+ #include <asm/dma.h>
+@@ -21,6 +25,9 @@
+ #include <asm/iommu.h>
+ #include <asm/gart.h>
+ #include <asm/irq_remapping.h>
++#include <asm/early_ioremap.h>
++
++#define dev_err(msg)  pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
+ 
+ static void __init fix_hypertransport_config(int num, int slot, int func)
+ {
+@@ -76,6 +83,13 @@ static void __init nvidia_bugs(int num, int slot, int func)
+ #ifdef CONFIG_ACPI
+ #ifdef CONFIG_X86_IO_APIC
+ 	/*
++	 * Only applies to Nvidia root ports (bus 0) and not to
++	 * Nvidia graphics cards with PCI ports on secondary buses.
++	 */
++	if (num)
++		return;
++
++	/*
+ 	 * All timer overrides on Nvidia are
+ 	 * wrong unless HPET is enabled.
+ 	 * Unfortunately that's not true on many Asus boards.
+@@ -590,6 +604,61 @@ static void __init force_disable_hpet(int num, int slot, int func)
+ #endif
+ }
+ 
++#define BCM4331_MMIO_SIZE	16384
++#define BCM4331_PM_CAP		0x40
++#define bcma_aread32(reg)	ioread32(mmio + 1 * BCMA_CORE_SIZE + reg)
++#define bcma_awrite32(reg, val)	iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg)
++
++static void __init apple_airport_reset(int bus, int slot, int func)
++{
++	void __iomem *mmio;
++	u16 pmcsr;
++	u64 addr;
++	int i;
++
++	if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc."))
++		return;
++
++	/* Card may have been put into PCI_D3hot by grub quirk */
++	pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
++
++	if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
++		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
++		write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr);
++		mdelay(10);
++
++		pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
++		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
++			dev_err("Cannot power up Apple AirPort card\n");
++			return;
++		}
++	}
++
++	addr  =      read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
++	addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32;
++	addr &= PCI_BASE_ADDRESS_MEM_MASK;
++
++	mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
++	if (!mmio) {
++		dev_err("Cannot iomap Apple AirPort card\n");
++		return;
++	}
++
++	pr_info("Resetting Apple AirPort card (left enabled by EFI)\n");
++
++	for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++)
++		udelay(10);
++
++	bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET);
++	bcma_aread32(BCMA_RESET_CTL);
++	udelay(1);
++
++	bcma_awrite32(BCMA_RESET_CTL, 0);
++	bcma_aread32(BCMA_RESET_CTL);
++	udelay(10);
++
++	early_iounmap(mmio, BCM4331_MMIO_SIZE);
++}
+ 
+ #define QFLAG_APPLY_ONCE 	0x1
+ #define QFLAG_APPLIED		0x2
+@@ -603,12 +672,6 @@ struct chipset {
+ 	void (*f)(int num, int slot, int func);
+ };
+ 
+-/*
+- * Only works for devices on the root bus. If you add any devices
+- * not on bus 0 readd another loop level in early_quirks(). But
+- * be careful because at least the Nvidia quirk here relies on
+- * only matching on bus 0.
+- */
+ static struct chipset early_qrk[] __initdata = {
+ 	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+ 	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
+@@ -638,9 +701,13 @@ static struct chipset early_qrk[] __initdata = {
+ 	 */
+ 	{ PCI_VENDOR_ID_INTEL, 0x0f00,
+ 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
++	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
++	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
+ 	{}
+ };
+ 
++static void __init early_pci_scan_bus(int bus);
++
+ /**
+  * check_dev_quirk - apply early quirks to a given PCI device
+  * @num: bus number
+@@ -649,7 +716,7 @@ static struct chipset early_qrk[] __initdata = {
+  *
+  * Check the vendor & device ID against the early quirks table.
+  *
+- * If the device is single function, let early_quirks() know so we don't
++ * If the device is single function, let early_pci_scan_bus() know so we don't
+  * poke at this device again.
+  */
+ static int __init check_dev_quirk(int num, int slot, int func)
+@@ -658,6 +725,7 @@ static int __init check_dev_quirk(int num, int slot, int func)
+ 	u16 vendor;
+ 	u16 device;
+ 	u8 type;
++	u8 sec;
+ 	int i;
+ 
+ 	class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
+@@ -685,25 +753,36 @@ static int __init check_dev_quirk(int num, int slot, int func)
+ 
+ 	type = read_pci_config_byte(num, slot, func,
+ 				    PCI_HEADER_TYPE);
++
++	if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) {
++		sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS);
++		if (sec > num)
++			early_pci_scan_bus(sec);
++	}
++
+ 	if (!(type & 0x80))
+ 		return -1;
+ 
+ 	return 0;
+ }
+ 
+-void __init early_quirks(void)
++static void __init early_pci_scan_bus(int bus)
+ {
+ 	int slot, func;
+ 
+-	if (!early_pci_allowed())
+-		return;
+-
+ 	/* Poor man's PCI discovery */
+-	/* Only scan the root bus */
+ 	for (slot = 0; slot < 32; slot++)
+ 		for (func = 0; func < 8; func++) {
+ 			/* Only probe function 0 on single fn devices */
+-			if (check_dev_quirk(0, slot, func))
++			if (check_dev_quirk(bus, slot, func))
+ 				break;
+ 		}
+ }
++
++void __init early_quirks(void)
++{
++	if (!early_pci_allowed())
++		return;
++
++	early_pci_scan_bus(0);
++}
+diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
+index 99bfc025111d..7f82fe0a6807 100644
+--- a/arch/x86/kernel/pvclock.c
++++ b/arch/x86/kernel/pvclock.c
+@@ -66,6 +66,8 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
+ 
+ 	do {
+ 		version = __pvclock_read_cycles(src, &ret, &flags);
++		/* Make sure that the version double-check is last. */
++		smp_rmb();
+ 	} while ((src->version & 1) || version != src->version);
+ 
+ 	return flags & valid_flags;
+@@ -80,6 +82,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+ 
+ 	do {
+ 		version = __pvclock_read_cycles(src, &ret, &flags);
++		/* Make sure that the version double-check is last. */
++		smp_rmb();
+ 	} while ((src->version & 1) || version != src->version);
+ 
+ 	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
+diff --git a/block/ioprio.c b/block/ioprio.c
+index cc7800e9eb44..01b8116298a1 100644
+--- a/block/ioprio.c
++++ b/block/ioprio.c
+@@ -150,8 +150,10 @@ static int get_task_ioprio(struct task_struct *p)
+ 	if (ret)
+ 		goto out;
+ 	ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM);
++	task_lock(p);
+ 	if (p->io_context)
+ 		ret = p->io_context->ioprio;
++	task_unlock(p);
+ out:
+ 	return ret;
+ }
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index 55e257c268dd..c40deed8c440 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -4141,6 +4141,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
+ 	 */
+ 	{ "ST380013AS",		"3.20",		ATA_HORKAGE_MAX_SEC_1024 },
+ 
++	/*
++	 * Device times out with higher max sects.
++	 * https://bugzilla.kernel.org/show_bug.cgi?id=121671
++	 */
++	{ "LITEON CX1-JB256-HP", NULL,		ATA_HORKAGE_MAX_SEC_1024 },
++
+ 	/* Devices we expect to fail diagnostics */
+ 
+ 	/* Devices where NCQ should be avoided */
+diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
+index eda09090cb52..f642c4264c27 100644
+--- a/drivers/bcma/bcma_private.h
++++ b/drivers/bcma/bcma_private.h
+@@ -8,8 +8,6 @@
+ #include <linux/bcma/bcma.h>
+ #include <linux/delay.h>
+ 
+-#define BCMA_CORE_SIZE		0x1000
+-
+ #define bcma_err(bus, fmt, ...) \
+ 	pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__)
+ #define bcma_warn(bus, fmt, ...) \
+diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
+index 6405b6557792..d6ea31d30bf9 100644
+--- a/drivers/block/xen-blkfront.c
++++ b/drivers/block/xen-blkfront.c
+@@ -207,6 +207,9 @@ struct blkfront_info
+ 	struct blk_mq_tag_set tag_set;
+ 	struct blkfront_ring_info *rinfo;
+ 	unsigned int nr_rings;
++	/* Save uncomplete reqs and bios for migration. */
++	struct list_head requests;
++	struct bio_list bio_list;
+ };
+ 
+ static unsigned int nr_minors;
+@@ -874,8 +877,12 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+ 			  const struct blk_mq_queue_data *qd)
+ {
+ 	unsigned long flags;
+-	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
++	int qid = hctx->queue_num;
++	struct blkfront_info *info = hctx->queue->queuedata;
++	struct blkfront_ring_info *rinfo = NULL;
+ 
++	BUG_ON(info->nr_rings <= qid);
++	rinfo = &info->rinfo[qid];
+ 	blk_mq_start_request(qd->rq);
+ 	spin_lock_irqsave(&rinfo->ring_lock, flags);
+ 	if (RING_FULL(&rinfo->ring))
+@@ -901,20 +908,9 @@ out_busy:
+ 	return BLK_MQ_RQ_QUEUE_BUSY;
+ }
+ 
+-static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+-			    unsigned int index)
+-{
+-	struct blkfront_info *info = (struct blkfront_info *)data;
+-
+-	BUG_ON(info->nr_rings <= index);
+-	hctx->driver_data = &info->rinfo[index];
+-	return 0;
+-}
+-
+ static struct blk_mq_ops blkfront_mq_ops = {
+ 	.queue_rq = blkif_queue_rq,
+ 	.map_queue = blk_mq_map_queue,
+-	.init_hctx = blk_mq_init_hctx,
+ };
+ 
+ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+@@ -950,6 +946,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+ 		return PTR_ERR(rq);
+ 	}
+ 
++	rq->queuedata = info;
+ 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+ 
+ 	if (info->feature_discard) {
+@@ -2007,69 +2004,22 @@ static int blkif_recover(struct blkfront_info *info)
+ {
+ 	unsigned int i, r_index;
+ 	struct request *req, *n;
+-	struct blk_shadow *copy;
+ 	int rc;
+ 	struct bio *bio, *cloned_bio;
+-	struct bio_list bio_list, merge_bio;
+ 	unsigned int segs, offset;
+ 	int pending, size;
+ 	struct split_bio *split_bio;
+-	struct list_head requests;
+ 
+ 	blkfront_gather_backend_features(info);
+ 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ 	blk_queue_max_segments(info->rq, segs);
+-	bio_list_init(&bio_list);
+-	INIT_LIST_HEAD(&requests);
+ 
+ 	for (r_index = 0; r_index < info->nr_rings; r_index++) {
+-		struct blkfront_ring_info *rinfo;
+-
+-		rinfo = &info->rinfo[r_index];
+-		/* Stage 1: Make a safe copy of the shadow state. */
+-		copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
+-			       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+-		if (!copy)
+-			return -ENOMEM;
+-
+-		/* Stage 2: Set up free list. */
+-		memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
+-		for (i = 0; i < BLK_RING_SIZE(info); i++)
+-			rinfo->shadow[i].req.u.rw.id = i+1;
+-		rinfo->shadow_free = rinfo->ring.req_prod_pvt;
+-		rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
++		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
+ 
+ 		rc = blkfront_setup_indirect(rinfo);
+-		if (rc) {
+-			kfree(copy);
++		if (rc)
+ 			return rc;
+-		}
+-
+-		for (i = 0; i < BLK_RING_SIZE(info); i++) {
+-			/* Not in use? */
+-			if (!copy[i].request)
+-				continue;
+-
+-			/*
+-			 * Get the bios in the request so we can re-queue them.
+-			 */
+-			if (copy[i].request->cmd_flags &
+-			    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+-				/*
+-				 * Flush operations don't contain bios, so
+-				 * we need to requeue the whole request
+-				 */
+-				list_add(&copy[i].request->queuelist, &requests);
+-				continue;
+-			}
+-			merge_bio.head = copy[i].request->bio;
+-			merge_bio.tail = copy[i].request->biotail;
+-			bio_list_merge(&bio_list, &merge_bio);
+-			copy[i].request->bio = NULL;
+-			blk_end_request_all(copy[i].request, 0);
+-		}
+-
+-		kfree(copy);
+ 	}
+ 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
+ 
+@@ -2084,7 +2034,7 @@ static int blkif_recover(struct blkfront_info *info)
+ 		kick_pending_request_queues(rinfo);
+ 	}
+ 
+-	list_for_each_entry_safe(req, n, &requests, queuelist) {
++	list_for_each_entry_safe(req, n, &info->requests, queuelist) {
+ 		/* Requeue pending requests (flush or discard) */
+ 		list_del_init(&req->queuelist);
+ 		BUG_ON(req->nr_phys_segments > segs);
+@@ -2092,7 +2042,7 @@ static int blkif_recover(struct blkfront_info *info)
+ 	}
+ 	blk_mq_kick_requeue_list(info->rq);
+ 
+-	while ((bio = bio_list_pop(&bio_list)) != NULL) {
++	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
+ 		/* Traverse the list of pending bios and re-queue them */
+ 		if (bio_segments(bio) > segs) {
+ 			/*
+@@ -2138,9 +2088,42 @@ static int blkfront_resume(struct xenbus_device *dev)
+ {
+ 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+ 	int err = 0;
++	unsigned int i, j;
+ 
+ 	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
+ 
++	bio_list_init(&info->bio_list);
++	INIT_LIST_HEAD(&info->requests);
++	for (i = 0; i < info->nr_rings; i++) {
++		struct blkfront_ring_info *rinfo = &info->rinfo[i];
++		struct bio_list merge_bio;
++		struct blk_shadow *shadow = rinfo->shadow;
++
++		for (j = 0; j < BLK_RING_SIZE(info); j++) {
++			/* Not in use? */
++			if (!shadow[j].request)
++				continue;
++
++			/*
++			 * Get the bios in the request so we can re-queue them.
++			 */
++			if (shadow[j].request->cmd_flags &
++					(REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
++				/*
++				 * Flush operations don't contain bios, so
++				 * we need to requeue the whole request
++				 */
++				list_add(&shadow[j].request->queuelist, &info->requests);
++				continue;
++			}
++			merge_bio.head = shadow[j].request->bio;
++			merge_bio.tail = shadow[j].request->biotail;
++			bio_list_merge(&info->bio_list, &merge_bio);
++			shadow[j].request->bio = NULL;
++			blk_mq_end_request(shadow[j].request, 0);
++		}
++	}
++
+ 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
+ 
+ 	err = negotiate_mq(info);
+@@ -2148,6 +2131,8 @@ static int blkfront_resume(struct xenbus_device *dev)
+ 		return err;
+ 
+ 	err = talk_to_blkback(dev, info);
++	if (!err)
++		blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
+ 
+ 	/*
+ 	 * We have to wait for the backend to switch to
+@@ -2484,10 +2469,23 @@ static void blkback_changed(struct xenbus_device *dev,
+ 		break;
+ 
+ 	case XenbusStateConnected:
+-		if (dev->state != XenbusStateInitialised) {
++		/*
++		 * talk_to_blkback sets state to XenbusStateInitialised
++		 * and blkfront_connect sets it to XenbusStateConnected
++		 * (if connection went OK).
++		 *
++		 * If the backend (or toolstack) decides to poke at backend
++		 * state (and re-trigger the watch by setting the state repeatedly
++		 * to XenbusStateConnected (4)) we need to deal with this.
++		 * This is allowed as this is used to communicate to the guest
++		 * that the size of disk has changed!
++		 */
++		if ((dev->state != XenbusStateInitialised) &&
++		    (dev->state != XenbusStateConnected)) {
+ 			if (talk_to_blkback(dev, info))
+ 				break;
+ 		}
++
+ 		blkfront_connect(info);
+ 		break;
+ 
+diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
+index 10f846cc8db1..25d5906640c3 100644
+--- a/drivers/clk/at91/clk-programmable.c
++++ b/drivers/clk/at91/clk-programmable.c
+@@ -99,7 +99,7 @@ static int clk_programmable_set_parent(struct clk_hw *hw, u8 index)
+ 	struct clk_programmable *prog = to_clk_programmable(hw);
+ 	const struct clk_programmable_layout *layout = prog->layout;
+ 	unsigned int mask = layout->css_mask;
+-	unsigned int pckr = 0;
++	unsigned int pckr = index;
+ 
+ 	if (layout->have_slck_mck)
+ 		mask |= AT91_PMC_CSSMCK_MCK;
+diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
+index e0dc7e83403a..102399f3c550 100644
+--- a/drivers/clk/rockchip/clk-mmc-phase.c
++++ b/drivers/clk/rockchip/clk-mmc-phase.c
+@@ -153,6 +153,7 @@ struct clk *rockchip_clk_register_mmc(const char *name,
+ 		return ERR_PTR(-ENOMEM);
+ 
+ 	init.name = name;
++	init.flags = 0;
+ 	init.num_parents = num_parents;
+ 	init.parent_names = parent_names;
+ 	init.ops = &rockchip_mmc_clk_ops;
+diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
+index c4acfc5273b3..e380457792ae 100644
+--- a/drivers/cpufreq/cpufreq.c
++++ b/drivers/cpufreq/cpufreq.c
+@@ -2169,6 +2169,10 @@ int cpufreq_update_policy(unsigned int cpu)
+ 	 * -> ask driver for current freq and notify governors about a change
+ 	 */
+ 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
++		if (cpufreq_suspended) {
++			ret = -EAGAIN;
++			goto unlock;
++		}
+ 		new_policy.cur = cpufreq_update_current_freq(policy);
+ 		if (WARN_ON(!new_policy.cur)) {
+ 			ret = -EIO;
+diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
+index 8e304b1befc5..75bd6621dc5d 100644
+--- a/drivers/dma/at_xdmac.c
++++ b/drivers/dma/at_xdmac.c
+@@ -242,7 +242,7 @@ struct at_xdmac_lld {
+ 	u32		mbr_dus;	/* Destination Microblock Stride Register */
+ };
+ 
+-
++/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
+ struct at_xdmac_desc {
+ 	struct at_xdmac_lld		lld;
+ 	enum dma_transfer_direction	direction;
+@@ -253,7 +253,7 @@ struct at_xdmac_desc {
+ 	unsigned int			xfer_size;
+ 	struct list_head		descs_list;
+ 	struct list_head		xfer_node;
+-};
++} __aligned(sizeof(u64));
+ 
+ static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
+ {
+@@ -1400,6 +1400,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ 	u32			cur_nda, check_nda, cur_ubc, mask, value;
+ 	u8			dwidth = 0;
+ 	unsigned long		flags;
++	bool			initd;
+ 
+ 	ret = dma_cookie_status(chan, cookie, txstate);
+ 	if (ret == DMA_COMPLETE)
+@@ -1424,7 +1425,16 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ 	residue = desc->xfer_size;
+ 	/*
+ 	 * Flush FIFO: only relevant when the transfer is source peripheral
+-	 * synchronized.
++	 * synchronized. Flush is needed before reading CUBC because data in
++	 * the FIFO are not reported by CUBC. Reporting a residue of the
++	 * transfer length while we have data in FIFO can cause issue.
++	 * Usecase: atmel USART has a timeout which means I have received
++	 * characters but there is no more character received for a while. On
++	 * timeout, it requests the residue. If the data are in the DMA FIFO,
++	 * we will return a residue of the transfer length. It means no data
++	 * received. If an application is waiting for these data, it will hang
++	 * since we won't have another USART timeout without receiving new
++	 * data.
+ 	 */
+ 	mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
+ 	value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
+@@ -1435,34 +1445,43 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ 	}
+ 
+ 	/*
+-	 * When processing the residue, we need to read two registers but we
+-	 * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
+-	 * we stand in the descriptor list and AT_XDMAC_CUBC is used
+-	 * to know how many data are remaining for the current descriptor.
+-	 * Since the dma channel is not paused to not loose data, between the
+-	 * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
+-	 * descriptor.
+-	 * For that reason, after reading AT_XDMAC_CUBC, we check if we are
+-	 * still using the same descriptor by reading a second time
+-	 * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
+-	 * read again AT_XDMAC_CUBC.
++	 * The easiest way to compute the residue should be to pause the DMA
++	 * but doing this can lead to miss some data as some devices don't
++	 * have FIFO.
++	 * We need to read several registers because:
++	 * - DMA is running therefore a descriptor change is possible while
++	 * reading these registers
++	 * - When the block transfer is done, the value of the CUBC register
++	 * is set to its initial value until the fetch of the next descriptor.
++	 * This value will corrupt the residue calculation so we have to skip
++	 * it.
++	 *
++	 * INITD --------                    ------------
++	 *              |____________________|
++	 *       _______________________  _______________
++	 * NDA       @desc2             \/   @desc3
++	 *       _______________________/\_______________
++	 *       __________  ___________  _______________
++	 * CUBC       0    \/ MAX desc1 \/  MAX desc2
++	 *       __________/\___________/\_______________
++	 *
++	 * Since descriptors are aligned on 64 bits, we can assume that
++	 * the update of NDA and CUBC is atomic.
+ 	 * Memory barriers are used to ensure the read order of the registers.
+-	 * A max number of retries is set because unlikely it can never ends if
+-	 * we are transferring a lot of data with small buffers.
++	 * A max number of retries is set because unlikely it could never ends.
+ 	 */
+-	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+-	rmb();
+-	cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+ 	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
+-		rmb();
+ 		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+-
+-		if (likely(cur_nda == check_nda))
+-			break;
+-
+-		cur_nda = check_nda;
++		rmb();
++		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
+ 		rmb();
+ 		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
++		rmb();
++		cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
++		rmb();
++
++		if ((check_nda == cur_nda) && initd)
++			break;
+ 	}
+ 
+ 	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
+@@ -1471,6 +1490,19 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ 	}
+ 
+ 	/*
++	 * Flush FIFO: only relevant when the transfer is source peripheral
++	 * synchronized. Another flush is needed here because CUBC is updated
++	 * when the controller sends the data write command. It can lead to
++	 * report data that are not written in the memory or the device. The
++	 * FIFO flush ensures that data are really written.
++	 */
++	if ((desc->lld.mbr_cfg & mask) == value) {
++		at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask);
++		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
++			cpu_relax();
++	}
++
++	/*
+ 	 * Remove size of all microblocks already transferred and the current
+ 	 * one. Then add the remaining size to transfer of the current
+ 	 * microblock.
+diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
+index 4272f2ce5f6e..dca16540e7fd 100644
+--- a/drivers/hwtracing/intel_th/core.c
++++ b/drivers/hwtracing/intel_th/core.c
+@@ -440,6 +440,38 @@ static struct intel_th_subdevice {
+ 	},
+ };
+ 
++#ifdef CONFIG_MODULES
++static void __intel_th_request_hub_module(struct work_struct *work)
++{
++	struct intel_th *th = container_of(work, struct intel_th,
++					   request_module_work);
++
++	request_module("intel_th_%s", th->hub->name);
++}
++
++static int intel_th_request_hub_module(struct intel_th *th)
++{
++	INIT_WORK(&th->request_module_work, __intel_th_request_hub_module);
++	schedule_work(&th->request_module_work);
++
++	return 0;
++}
++
++static void intel_th_request_hub_module_flush(struct intel_th *th)
++{
++	flush_work(&th->request_module_work);
++}
++#else
++static inline int intel_th_request_hub_module(struct intel_th *th)
++{
++	return -EINVAL;
++}
++
++static inline void intel_th_request_hub_module_flush(struct intel_th *th)
++{
++}
++#endif /* CONFIG_MODULES */
++
+ static int intel_th_populate(struct intel_th *th, struct resource *devres,
+ 			     unsigned int ndevres, int irq)
+ {
+@@ -510,7 +542,7 @@ static int intel_th_populate(struct intel_th *th, struct resource *devres,
+ 		/* need switch driver to be loaded to enumerate the rest */
+ 		if (subdev->type == INTEL_TH_SWITCH && !req) {
+ 			th->hub = thdev;
+-			err = request_module("intel_th_%s", subdev->name);
++			err = intel_th_request_hub_module(th);
+ 			if (!err)
+ 				req++;
+ 		}
+@@ -627,6 +659,7 @@ void intel_th_free(struct intel_th *th)
+ {
+ 	int i;
+ 
++	intel_th_request_hub_module_flush(th);
+ 	for (i = 0; i < TH_SUBDEVICE_MAX; i++)
+ 		if (th->thdev[i] != th->hub)
+ 			intel_th_device_remove(th->thdev[i]);
+diff --git a/drivers/hwtracing/intel_th/intel_th.h b/drivers/hwtracing/intel_th/intel_th.h
+index eedd09332db6..72cd3c6018e1 100644
+--- a/drivers/hwtracing/intel_th/intel_th.h
++++ b/drivers/hwtracing/intel_th/intel_th.h
+@@ -199,6 +199,9 @@ struct intel_th {
+ 
+ 	int			id;
+ 	int			major;
++#ifdef CONFIG_MODULES
++	struct work_struct	request_module_work;
++#endif /* CONFIG_MODULES */
+ #ifdef CONFIG_INTEL_TH_DEBUG
+ 	struct dentry		*dbg;
+ #endif
+diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
+index bca7a2ac00d6..72c9189fac8a 100644
+--- a/drivers/hwtracing/intel_th/pci.c
++++ b/drivers/hwtracing/intel_th/pci.c
+@@ -75,6 +75,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
+ 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0a80),
+ 		.driver_data = (kernel_ulong_t)0,
+ 	},
++	{
++		/* Kaby Lake PCH-H */
++		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6),
++		.driver_data = (kernel_ulong_t)0,
++	},
+ 	{ 0 },
+ };
+ 
+diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
+index 23eaabb19f96..a5eb09c5539f 100644
+--- a/drivers/i2c/busses/i2c-qup.c
++++ b/drivers/i2c/busses/i2c-qup.c
+@@ -1268,6 +1268,8 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
+ 		}
+ 	}
+ 
++	idx = 0;
++
+ 	do {
+ 		if (msgs[idx].len == 0) {
+ 			ret = -EINVAL;
+diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
+index 5fbd5bd0878f..49fc2c7e560a 100644
+--- a/drivers/i2c/muxes/i2c-mux-reg.c
++++ b/drivers/i2c/muxes/i2c-mux-reg.c
+@@ -150,7 +150,7 @@ static int i2c_mux_reg_probe_dt(struct regmux *mux,
+ 		mux->data.idle_in_use = true;
+ 
+ 	/* map address from "reg" if exists */
+-	if (of_address_to_resource(np, 0, &res)) {
++	if (of_address_to_resource(np, 0, &res) == 0) {
+ 		mux->data.reg_size = resource_size(&res);
+ 		mux->data.reg = devm_ioremap_resource(&pdev->dev, &res);
+ 		if (IS_ERR(mux->data.reg))
+diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
+index ca62a6e11846..4a6104beb60d 100644
+--- a/drivers/input/joystick/xpad.c
++++ b/drivers/input/joystick/xpad.c
+@@ -1421,22 +1421,15 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
+ 	int ep_irq_in_idx;
+ 	int i, error;
+ 
++	if (intf->cur_altsetting->desc.bNumEndpoints != 2)
++		return -ENODEV;
++
+ 	for (i = 0; xpad_device[i].idVendor; i++) {
+ 		if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) &&
+ 		    (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct))
+ 			break;
+ 	}
+ 
+-	if (xpad_device[i].xtype == XTYPE_XBOXONE &&
+-	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
+-		/*
+-		 * The Xbox One controller lists three interfaces all with the
+-		 * same interface class, subclass and protocol. Differentiate by
+-		 * interface number.
+-		 */
+-		return -ENODEV;
+-	}
+-
+ 	xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL);
+ 	if (!xpad)
+ 		return -ENOMEM;
+@@ -1468,6 +1461,8 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
+ 		if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
+ 			if (intf->cur_altsetting->desc.bInterfaceProtocol == 129)
+ 				xpad->xtype = XTYPE_XBOX360W;
++			else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208)
++				xpad->xtype = XTYPE_XBOXONE;
+ 			else
+ 				xpad->xtype = XTYPE_XBOX360;
+ 		} else {
+@@ -1482,6 +1477,17 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
+ 			xpad->mapping |= MAP_STICKS_TO_NULL;
+ 	}
+ 
++	if (xpad->xtype == XTYPE_XBOXONE &&
++	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
++		/*
++		 * The Xbox One controller lists three interfaces all with the
++		 * same interface class, subclass and protocol. Differentiate by
++		 * interface number.
++		 */
++		error = -ENODEV;
++		goto err_free_in_urb;
++	}
++
+ 	error = xpad_init_output(intf, xpad);
+ 	if (error)
+ 		goto err_free_in_urb;
+diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
+index 78f93cf68840..be5b399da5d3 100644
+--- a/drivers/input/mouse/elantech.c
++++ b/drivers/input/mouse/elantech.c
+@@ -1568,13 +1568,7 @@ static int elantech_set_properties(struct elantech_data *etd)
+ 		case 5:
+ 			etd->hw_version = 3;
+ 			break;
+-		case 6:
+-		case 7:
+-		case 8:
+-		case 9:
+-		case 10:
+-		case 13:
+-		case 14:
++		case 6 ... 14:
+ 			etd->hw_version = 4;
+ 			break;
+ 		default:
+diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
+index a3f0f5a47490..0f586780ceb4 100644
+--- a/drivers/input/mouse/vmmouse.c
++++ b/drivers/input/mouse/vmmouse.c
+@@ -355,18 +355,11 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
+ 		return -ENXIO;
+ 	}
+ 
+-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
+-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
+-		return -EBUSY;
+-	}
+-
+ 	/* Check if the device is present */
+ 	response = ~VMMOUSE_PROTO_MAGIC;
+ 	VMMOUSE_CMD(GETVERSION, 0, version, response, dummy1, dummy2);
+-	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU) {
+-		release_region(VMMOUSE_PROTO_PORT, 4);
++	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU)
+ 		return -ENXIO;
+-	}
+ 
+ 	if (set_properties) {
+ 		psmouse->vendor = VMMOUSE_VENDOR;
+@@ -374,8 +367,6 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
+ 		psmouse->model = version;
+ 	}
+ 
+-	release_region(VMMOUSE_PROTO_PORT, 4);
+-
+ 	return 0;
+ }
+ 
+@@ -394,7 +385,6 @@ static void vmmouse_disconnect(struct psmouse *psmouse)
+ 	psmouse_reset(psmouse);
+ 	input_unregister_device(priv->abs_dev);
+ 	kfree(priv);
+-	release_region(VMMOUSE_PROTO_PORT, 4);
+ }
+ 
+ /**
+@@ -438,15 +428,10 @@ int vmmouse_init(struct psmouse *psmouse)
+ 	struct input_dev *rel_dev = psmouse->dev, *abs_dev;
+ 	int error;
+ 
+-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
+-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
+-		return -EBUSY;
+-	}
+-
+ 	psmouse_reset(psmouse);
+ 	error = vmmouse_enable(psmouse);
+ 	if (error)
+-		goto release_region;
++		return error;
+ 
+ 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ 	abs_dev = input_allocate_device();
+@@ -502,8 +487,5 @@ init_fail:
+ 	kfree(priv);
+ 	psmouse->private = NULL;
+ 
+-release_region:
+-	release_region(VMMOUSE_PROTO_PORT, 4);
+-
+ 	return error;
+ }
+diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c
+index 8dd3fb5e1f94..88e91559c84e 100644
+--- a/drivers/input/rmi4/rmi_f12.c
++++ b/drivers/input/rmi4/rmi_f12.c
+@@ -66,7 +66,7 @@ static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
+ 	struct rmi_device *rmi_dev = fn->rmi_dev;
+ 	int ret;
+ 	int offset;
+-	u8 buf[14];
++	u8 buf[15];
+ 	int pitch_x = 0;
+ 	int pitch_y = 0;
+ 	int clip_x_low = 0;
+@@ -86,9 +86,10 @@ static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
+ 
+ 	offset = rmi_register_desc_calc_reg_offset(&f12->control_reg_desc, 8);
+ 
+-	if (item->reg_size > 14) {
+-		dev_err(&fn->dev, "F12 control8 should be 14 bytes, not: %ld\n",
+-			item->reg_size);
++	if (item->reg_size > sizeof(buf)) {
++		dev_err(&fn->dev,
++			"F12 control8 should be no bigger than %zd bytes, not: %ld\n",
++			sizeof(buf), item->reg_size);
+ 		return -ENODEV;
+ 	}
+ 
+diff --git a/drivers/input/touchscreen/tsc2004.c b/drivers/input/touchscreen/tsc2004.c
+index 7295c198aa08..6fe55d598fac 100644
+--- a/drivers/input/touchscreen/tsc2004.c
++++ b/drivers/input/touchscreen/tsc2004.c
+@@ -22,6 +22,11 @@
+ #include <linux/regmap.h>
+ #include "tsc200x-core.h"
+ 
++static const struct input_id tsc2004_input_id = {
++	.bustype = BUS_I2C,
++	.product = 2004,
++};
++
+ static int tsc2004_cmd(struct device *dev, u8 cmd)
+ {
+ 	u8 tx = TSC200X_CMD | TSC200X_CMD_12BIT | cmd;
+@@ -42,7 +47,7 @@ static int tsc2004_probe(struct i2c_client *i2c,
+ 			 const struct i2c_device_id *id)
+ 
+ {
+-	return tsc200x_probe(&i2c->dev, i2c->irq, BUS_I2C,
++	return tsc200x_probe(&i2c->dev, i2c->irq, &tsc2004_input_id,
+ 			     devm_regmap_init_i2c(i2c, &tsc200x_regmap_config),
+ 			     tsc2004_cmd);
+ }
+diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
+index b9f593dfd2ef..f2c5f0e47f77 100644
+--- a/drivers/input/touchscreen/tsc2005.c
++++ b/drivers/input/touchscreen/tsc2005.c
+@@ -24,6 +24,11 @@
+ #include <linux/regmap.h>
+ #include "tsc200x-core.h"
+ 
++static const struct input_id tsc2005_input_id = {
++	.bustype = BUS_SPI,
++	.product = 2005,
++};
++
+ static int tsc2005_cmd(struct device *dev, u8 cmd)
+ {
+ 	u8 tx = TSC200X_CMD | TSC200X_CMD_12BIT | cmd;
+@@ -62,7 +67,7 @@ static int tsc2005_probe(struct spi_device *spi)
+ 	if (error)
+ 		return error;
+ 
+-	return tsc200x_probe(&spi->dev, spi->irq, BUS_SPI,
++	return tsc200x_probe(&spi->dev, spi->irq, &tsc2005_input_id,
+ 			     devm_regmap_init_spi(spi, &tsc200x_regmap_config),
+ 			     tsc2005_cmd);
+ }
+diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
+index 15240c1ee850..dfa7f1c4f545 100644
+--- a/drivers/input/touchscreen/tsc200x-core.c
++++ b/drivers/input/touchscreen/tsc200x-core.c
+@@ -450,7 +450,7 @@ static void tsc200x_close(struct input_dev *input)
+ 	mutex_unlock(&ts->mutex);
+ }
+ 
+-int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
++int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
+ 		  struct regmap *regmap,
+ 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd))
+ {
+@@ -547,9 +547,18 @@ int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
+ 	snprintf(ts->phys, sizeof(ts->phys),
+ 		 "%s/input-ts", dev_name(dev));
+ 
+-	input_dev->name = "TSC200X touchscreen";
++	if (tsc_id->product == 2004) {
++		input_dev->name = "TSC200X touchscreen";
++	} else {
++		input_dev->name = devm_kasprintf(dev, GFP_KERNEL,
++						 "TSC%04d touchscreen",
++						 tsc_id->product);
++		if (!input_dev->name)
++			return -ENOMEM;
++	}
++
+ 	input_dev->phys = ts->phys;
+-	input_dev->id.bustype = bustype;
++	input_dev->id = *tsc_id;
+ 	input_dev->dev.parent = dev;
+ 	input_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
+ 	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+diff --git a/drivers/input/touchscreen/tsc200x-core.h b/drivers/input/touchscreen/tsc200x-core.h
+index 7a482d102614..49a63a3c6840 100644
+--- a/drivers/input/touchscreen/tsc200x-core.h
++++ b/drivers/input/touchscreen/tsc200x-core.h
+@@ -70,7 +70,7 @@
+ extern const struct regmap_config tsc200x_regmap_config;
+ extern const struct dev_pm_ops tsc200x_pm_ops;
+ 
+-int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
++int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
+ 		  struct regmap *regmap,
+ 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd));
+ int tsc200x_remove(struct device *dev);
+diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
+index bab3c6acf6a2..b6fc4bde79de 100644
+--- a/drivers/input/touchscreen/wacom_w8001.c
++++ b/drivers/input/touchscreen/wacom_w8001.c
+@@ -27,7 +27,7 @@ MODULE_AUTHOR("Jaya Kumar <jayakumar.lkml@gmail.com>");
+ MODULE_DESCRIPTION(DRIVER_DESC);
+ MODULE_LICENSE("GPL");
+ 
+-#define W8001_MAX_LENGTH	11
++#define W8001_MAX_LENGTH	13
+ #define W8001_LEAD_MASK		0x80
+ #define W8001_LEAD_BYTE		0x80
+ #define W8001_TAB_MASK		0x40
+@@ -155,6 +155,7 @@ static void parse_multi_touch(struct w8001 *w8001)
+ 		bool touch = data[0] & (1 << i);
+ 
+ 		input_mt_slot(dev, i);
++		input_mt_report_slot_state(dev, MT_TOOL_FINGER, touch);
+ 		if (touch) {
+ 			x = (data[6 * i + 1] << 7) | data[6 * i + 2];
+ 			y = (data[6 * i + 3] << 7) | data[6 * i + 4];
+@@ -339,6 +340,15 @@ static irqreturn_t w8001_interrupt(struct serio *serio,
+ 		w8001->idx = 0;
+ 		parse_multi_touch(w8001);
+ 		break;
++
++	default:
++		/*
++		 * ThinkPad X60 Tablet PC (pen only device) sometimes
++		 * sends invalid data packets that are larger than
++		 * W8001_PKTLEN_TPCPEN. Let's start over again.
++		 */
++		if (!w8001->touch_dev && w8001->idx > W8001_PKTLEN_TPCPEN - 1)
++			w8001->idx = 0;
+ 	}
+ 
+ 	return IRQ_HANDLED;
+@@ -513,6 +523,8 @@ static int w8001_setup_touch(struct w8001 *w8001, char *basename,
+ 					0, touch.x, 0, 0);
+ 		input_set_abs_params(dev, ABS_MT_POSITION_Y,
+ 					0, touch.y, 0, 0);
++		input_set_abs_params(dev, ABS_MT_TOOL_TYPE,
++					0, MT_TOOL_MAX, 0, 0);
+ 
+ 		strlcat(basename, " 2FG", basename_sz);
+ 		if (w8001->max_pen_x && w8001->max_pen_y)
+diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
+index 40fb1209d512..83f93404c446 100644
+--- a/drivers/irqchip/irq-mips-gic.c
++++ b/drivers/irqchip/irq-mips-gic.c
+@@ -706,7 +706,7 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ 
+ 	spin_lock_irqsave(&gic_lock, flags);
+ 	gic_map_to_pin(intr, gic_cpu_pin);
+-	gic_map_to_vpe(intr, vpe);
++	gic_map_to_vpe(intr, mips_cm_vp_id(vpe));
+ 	for (i = 0; i < min(gic_vpes, NR_CPUS); i++)
+ 		clear_bit(intr, pcpu_masks[i].pcpu_mask);
+ 	set_bit(intr, pcpu_masks[vpe].pcpu_mask);
+@@ -947,7 +947,7 @@ int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node,
+ 	switch (bus_token) {
+ 	case DOMAIN_BUS_IPI:
+ 		is_ipi = d->bus_token == bus_token;
+-		return to_of_node(d->fwnode) == node && is_ipi;
++		return (!node || to_of_node(d->fwnode) == node) && is_ipi;
+ 		break;
+ 	default:
+ 		return 0;
+diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
+index 41a1bfc5eaa7..3646aaf7188c 100644
+--- a/drivers/media/i2c/adv7604.c
++++ b/drivers/media/i2c/adv7604.c
+@@ -779,11 +779,31 @@ static const struct v4l2_dv_timings_cap adv76xx_timings_cap_digital = {
+ 			V4L2_DV_BT_CAP_CUSTOM)
+ };
+ 
+-static inline const struct v4l2_dv_timings_cap *
+-adv76xx_get_dv_timings_cap(struct v4l2_subdev *sd)
++/*
++ * Return the DV timings capabilities for the requested sink pad. As a special
++ * case, pad value -1 returns the capabilities for the currently selected input.
++ */
++static const struct v4l2_dv_timings_cap *
++adv76xx_get_dv_timings_cap(struct v4l2_subdev *sd, int pad)
+ {
+-	return is_digital_input(sd) ? &adv76xx_timings_cap_digital :
+-				      &adv7604_timings_cap_analog;
++	if (pad == -1) {
++		struct adv76xx_state *state = to_state(sd);
++
++		pad = state->selected_input;
++	}
++
++	switch (pad) {
++	case ADV76XX_PAD_HDMI_PORT_A:
++	case ADV7604_PAD_HDMI_PORT_B:
++	case ADV7604_PAD_HDMI_PORT_C:
++	case ADV7604_PAD_HDMI_PORT_D:
++		return &adv76xx_timings_cap_digital;
++
++	case ADV7604_PAD_VGA_RGB:
++	case ADV7604_PAD_VGA_COMP:
++	default:
++		return &adv7604_timings_cap_analog;
++	}
+ }
+ 
+ 
+@@ -1329,7 +1349,7 @@ static int stdi2dv_timings(struct v4l2_subdev *sd,
+ 		const struct v4l2_bt_timings *bt = &v4l2_dv_timings_presets[i].bt;
+ 
+ 		if (!v4l2_valid_dv_timings(&v4l2_dv_timings_presets[i],
+-					   adv76xx_get_dv_timings_cap(sd),
++					   adv76xx_get_dv_timings_cap(sd, -1),
+ 					   adv76xx_check_dv_timings, NULL))
+ 			continue;
+ 		if (vtotal(bt) != stdi->lcf + 1)
+@@ -1430,18 +1450,22 @@ static int adv76xx_enum_dv_timings(struct v4l2_subdev *sd,
+ 		return -EINVAL;
+ 
+ 	return v4l2_enum_dv_timings_cap(timings,
+-		adv76xx_get_dv_timings_cap(sd), adv76xx_check_dv_timings, NULL);
++		adv76xx_get_dv_timings_cap(sd, timings->pad),
++		adv76xx_check_dv_timings, NULL);
+ }
+ 
+ static int adv76xx_dv_timings_cap(struct v4l2_subdev *sd,
+ 			struct v4l2_dv_timings_cap *cap)
+ {
+ 	struct adv76xx_state *state = to_state(sd);
++	unsigned int pad = cap->pad;
+ 
+ 	if (cap->pad >= state->source_pad)
+ 		return -EINVAL;
+ 
+-	*cap = *adv76xx_get_dv_timings_cap(sd);
++	*cap = *adv76xx_get_dv_timings_cap(sd, pad);
++	cap->pad = pad;
++
+ 	return 0;
+ }
+ 
+@@ -1450,9 +1474,9 @@ static int adv76xx_dv_timings_cap(struct v4l2_subdev *sd,
+ static void adv76xx_fill_optional_dv_timings_fields(struct v4l2_subdev *sd,
+ 		struct v4l2_dv_timings *timings)
+ {
+-	v4l2_find_dv_timings_cap(timings, adv76xx_get_dv_timings_cap(sd),
+-			is_digital_input(sd) ? 250000 : 1000000,
+-			adv76xx_check_dv_timings, NULL);
++	v4l2_find_dv_timings_cap(timings, adv76xx_get_dv_timings_cap(sd, -1),
++				 is_digital_input(sd) ? 250000 : 1000000,
++				 adv76xx_check_dv_timings, NULL);
+ }
+ 
+ static unsigned int adv7604_read_hdmi_pixelclock(struct v4l2_subdev *sd)
+@@ -1620,7 +1644,7 @@ static int adv76xx_s_dv_timings(struct v4l2_subdev *sd,
+ 
+ 	bt = &timings->bt;
+ 
+-	if (!v4l2_valid_dv_timings(timings, adv76xx_get_dv_timings_cap(sd),
++	if (!v4l2_valid_dv_timings(timings, adv76xx_get_dv_timings_cap(sd, -1),
+ 				   adv76xx_check_dv_timings, NULL))
+ 		return -ERANGE;
+ 
+diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c
+index 87c12930416f..92d9d4214c3a 100644
+--- a/drivers/media/usb/airspy/airspy.c
++++ b/drivers/media/usb/airspy/airspy.c
+@@ -1072,7 +1072,7 @@ static int airspy_probe(struct usb_interface *intf,
+ 	if (ret) {
+ 		dev_err(s->dev, "Failed to register as video device (%d)\n",
+ 				ret);
+-		goto err_unregister_v4l2_dev;
++		goto err_free_controls;
+ 	}
+ 	dev_info(s->dev, "Registered as %s\n",
+ 			video_device_node_name(&s->vdev));
+@@ -1081,7 +1081,6 @@ static int airspy_probe(struct usb_interface *intf,
+ 
+ err_free_controls:
+ 	v4l2_ctrl_handler_free(&s->hdl);
+-err_unregister_v4l2_dev:
+ 	v4l2_device_unregister(&s->v4l2_dev);
+ err_free_mem:
+ 	kfree(s);
+diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
+index b0a27413cb13..185bd359ee6e 100644
+--- a/drivers/mmc/card/block.c
++++ b/drivers/mmc/card/block.c
+@@ -352,8 +352,10 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
+ 		goto idata_err;
+ 	}
+ 
+-	if (!idata->buf_bytes)
++	if (!idata->buf_bytes) {
++		idata->buf = NULL;
+ 		return idata;
++	}
+ 
+ 	idata->buf = kmalloc(idata->buf_bytes, GFP_KERNEL);
+ 	if (!idata->buf) {
+@@ -1760,8 +1762,8 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
+ 
+ 	packed_cmd_hdr = packed->cmd_hdr;
+ 	memset(packed_cmd_hdr, 0, sizeof(packed->cmd_hdr));
+-	packed_cmd_hdr[0] = (packed->nr_entries << 16) |
+-		(PACKED_CMD_WR << 8) | PACKED_CMD_VER;
++	packed_cmd_hdr[0] = cpu_to_le32((packed->nr_entries << 16) |
++		(PACKED_CMD_WR << 8) | PACKED_CMD_VER);
+ 	hdr_blocks = mmc_large_sector(card) ? 8 : 1;
+ 
+ 	/*
+@@ -1775,14 +1777,14 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
+ 			((brq->data.blocks * brq->data.blksz) >=
+ 			 card->ext_csd.data_tag_unit_size);
+ 		/* Argument of CMD23 */
+-		packed_cmd_hdr[(i * 2)] =
++		packed_cmd_hdr[(i * 2)] = cpu_to_le32(
+ 			(do_rel_wr ? MMC_CMD23_ARG_REL_WR : 0) |
+ 			(do_data_tag ? MMC_CMD23_ARG_TAG_REQ : 0) |
+-			blk_rq_sectors(prq);
++			blk_rq_sectors(prq));
+ 		/* Argument of CMD18 or CMD25 */
+-		packed_cmd_hdr[((i * 2)) + 1] =
++		packed_cmd_hdr[((i * 2)) + 1] = cpu_to_le32(
+ 			mmc_card_blockaddr(card) ?
+-			blk_rq_pos(prq) : blk_rq_pos(prq) << 9;
++			blk_rq_pos(prq) : blk_rq_pos(prq) << 9);
+ 		packed->blocks += blk_rq_sectors(prq);
+ 		i++;
+ 	}
+diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
+index 8b3275d7792a..8f5e93cb7975 100644
+--- a/drivers/net/can/at91_can.c
++++ b/drivers/net/can/at91_can.c
+@@ -712,9 +712,10 @@ static int at91_poll_rx(struct net_device *dev, int quota)
+ 
+ 	/* upper group completed, look again in lower */
+ 	if (priv->rx_next > get_mb_rx_low_last(priv) &&
+-	    quota > 0 && mb > get_mb_rx_last(priv)) {
++	    mb > get_mb_rx_last(priv)) {
+ 		priv->rx_next = get_mb_rx_first(priv);
+-		goto again;
++		if (quota > 0)
++			goto again;
+ 	}
+ 
+ 	return received;
+diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
+index f91b094288da..e3dccd3200d5 100644
+--- a/drivers/net/can/c_can/c_can.c
++++ b/drivers/net/can/c_can/c_can.c
+@@ -332,9 +332,23 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface,
+ 
+ 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl);
+ 
+-	for (i = 0; i < frame->can_dlc; i += 2) {
+-		priv->write_reg(priv, C_CAN_IFACE(DATA1_REG, iface) + i / 2,
+-				frame->data[i] | (frame->data[i + 1] << 8));
++	if (priv->type == BOSCH_D_CAN) {
++		u32 data = 0, dreg = C_CAN_IFACE(DATA1_REG, iface);
++
++		for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
++			data = (u32)frame->data[i];
++			data |= (u32)frame->data[i + 1] << 8;
++			data |= (u32)frame->data[i + 2] << 16;
++			data |= (u32)frame->data[i + 3] << 24;
++			priv->write_reg32(priv, dreg, data);
++		}
++	} else {
++		for (i = 0; i < frame->can_dlc; i += 2) {
++			priv->write_reg(priv,
++					C_CAN_IFACE(DATA1_REG, iface) + i / 2,
++					frame->data[i] |
++					(frame->data[i + 1] << 8));
++		}
+ 	}
+ }
+ 
+@@ -402,10 +416,20 @@ static int c_can_read_msg_object(struct net_device *dev, int iface, u32 ctrl)
+ 	} else {
+ 		int i, dreg = C_CAN_IFACE(DATA1_REG, iface);
+ 
+-		for (i = 0; i < frame->can_dlc; i += 2, dreg ++) {
+-			data = priv->read_reg(priv, dreg);
+-			frame->data[i] = data;
+-			frame->data[i + 1] = data >> 8;
++		if (priv->type == BOSCH_D_CAN) {
++			for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
++				data = priv->read_reg32(priv, dreg);
++				frame->data[i] = data;
++				frame->data[i + 1] = data >> 8;
++				frame->data[i + 2] = data >> 16;
++				frame->data[i + 3] = data >> 24;
++			}
++		} else {
++			for (i = 0; i < frame->can_dlc; i += 2, dreg++) {
++				data = priv->read_reg(priv, dreg);
++				frame->data[i] = data;
++				frame->data[i + 1] = data >> 8;
++			}
+ 		}
+ 	}
+ 
+diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
+index 910c12e2638e..ad535a854e5c 100644
+--- a/drivers/net/can/dev.c
++++ b/drivers/net/can/dev.c
+@@ -798,6 +798,9 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[])
+ 	 * - control mode with CAN_CTRLMODE_FD set
+ 	 */
+ 
++	if (!data)
++		return 0;
++
+ 	if (data[IFLA_CAN_CTRLMODE]) {
+ 		struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]);
+ 
+@@ -1008,6 +1011,11 @@ static int can_newlink(struct net *src_net, struct net_device *dev,
+ 	return -EOPNOTSUPP;
+ }
+ 
++static void can_dellink(struct net_device *dev, struct list_head *head)
++{
++	return;
++}
++
+ static struct rtnl_link_ops can_link_ops __read_mostly = {
+ 	.kind		= "can",
+ 	.maxtype	= IFLA_CAN_MAX,
+@@ -1016,6 +1024,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = {
+ 	.validate	= can_validate,
+ 	.newlink	= can_newlink,
+ 	.changelink	= can_changelink,
++	.dellink	= can_dellink,
+ 	.get_size	= can_get_size,
+ 	.fill_info	= can_fill_info,
+ 	.get_xstats_size = can_get_xstats_size,
+diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
+index a6d26d351dfc..bfddcab82c29 100644
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -244,7 +244,7 @@
+ /* Various constants */
+ 
+ /* Coalescing */
+-#define MVNETA_TXDONE_COAL_PKTS		1
++#define MVNETA_TXDONE_COAL_PKTS		0	/* interrupt per packet */
+ #define MVNETA_RX_COAL_PKTS		32
+ #define MVNETA_RX_COAL_USEC		100
+ 
+diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
+index 9cfa544072b5..cfde7bc551a1 100644
+--- a/drivers/pinctrl/freescale/pinctrl-imx.c
++++ b/drivers/pinctrl/freescale/pinctrl-imx.c
+@@ -209,9 +209,9 @@ static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+ 		pin_reg = &info->pin_regs[pin_id];
+ 
+ 		if (pin_reg->mux_reg == -1) {
+-			dev_err(ipctl->dev, "Pin(%s) does not support mux function\n",
++			dev_dbg(ipctl->dev, "Pin(%s) does not support mux function\n",
+ 				info->pins[pin_id].name);
+-			return -EINVAL;
++			continue;
+ 		}
+ 
+ 		if (info->flags & SHARE_MUX_CONF_REG) {
+diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
+index cf9bafa10acf..bfdf720db270 100644
+--- a/drivers/pinctrl/pinctrl-single.c
++++ b/drivers/pinctrl/pinctrl-single.c
+@@ -1580,6 +1580,9 @@ static inline void pcs_irq_set(struct pcs_soc_data *pcs_soc,
+ 		else
+ 			mask &= ~soc_mask;
+ 		pcs->write(mask, pcswi->reg);
++
++		/* flush posted write */
++		mask = pcs->read(pcswi->reg);
+ 		raw_spin_unlock(&pcs->lock);
+ 	}
+ 
+diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
+index d45cd254ed1c..2b331d5b9e79 100644
+--- a/drivers/platform/chrome/cros_ec_dev.c
++++ b/drivers/platform/chrome/cros_ec_dev.c
+@@ -147,13 +147,19 @@ static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
+ 		goto exit;
+ 	}
+ 
++	if (u_cmd.outsize != s_cmd->outsize ||
++	    u_cmd.insize != s_cmd->insize) {
++		ret = -EINVAL;
++		goto exit;
++	}
++
+ 	s_cmd->command += ec->cmd_offset;
+ 	ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd);
+ 	/* Only copy data to userland if data was received. */
+ 	if (ret < 0)
+ 		goto exit;
+ 
+-	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + u_cmd.insize))
++	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize))
+ 		ret = -EFAULT;
+ exit:
+ 	kfree(s_cmd);
+diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
+index 456987c88baa..b13cd074c52a 100644
+--- a/drivers/power/power_supply_core.c
++++ b/drivers/power/power_supply_core.c
+@@ -565,11 +565,12 @@ static int power_supply_read_temp(struct thermal_zone_device *tzd,
+ 
+ 	WARN_ON(tzd == NULL);
+ 	psy = tzd->devdata;
+-	ret = psy->desc->get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
++	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
++	if (ret)
++		return ret;
+ 
+ 	/* Convert tenths of degree Celsius to milli degree Celsius. */
+-	if (!ret)
+-		*temp = val.intval * 100;
++	*temp = val.intval * 100;
+ 
+ 	return ret;
+ }
+@@ -612,10 +613,12 @@ static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd,
+ 	int ret;
+ 
+ 	psy = tcd->devdata;
+-	ret = psy->desc->get_property(psy,
+-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
+-	if (!ret)
+-		*state = val.intval;
++	ret = power_supply_get_property(psy,
++			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
++	if (ret)
++		return ret;
++
++	*state = val.intval;
+ 
+ 	return ret;
+ }
+@@ -628,10 +631,12 @@ static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd,
+ 	int ret;
+ 
+ 	psy = tcd->devdata;
+-	ret = psy->desc->get_property(psy,
+-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+-	if (!ret)
+-		*state = val.intval;
++	ret = power_supply_get_property(psy,
++			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
++	if (ret)
++		return ret;
++
++	*state = val.intval;
+ 
+ 	return ret;
+ }
+diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
+index 38a8bbe74810..83797d89c30f 100644
+--- a/drivers/pps/clients/pps_parport.c
++++ b/drivers/pps/clients/pps_parport.c
+@@ -195,7 +195,7 @@ static void parport_detach(struct parport *port)
+ 	struct pps_client_pp *device;
+ 
+ 	/* FIXME: oooh, this is ugly! */
+-	if (strcmp(pardev->name, KBUILD_MODNAME))
++	if (!pardev || strcmp(pardev->name, KBUILD_MODNAME))
+ 		/* not our port */
+ 		return;
+ 
+diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
+index 80b1979e8d95..df036b872b05 100644
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -1051,6 +1051,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
+ 		qeth_l2_set_offline(cgdev);
+ 
+ 	if (card->dev) {
++		netif_napi_del(&card->napi);
+ 		unregister_netdev(card->dev);
+ 		card->dev = NULL;
+ 	}
+diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
+index ac544330daeb..709b52339ff9 100644
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -3226,6 +3226,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
+ 		qeth_l3_set_offline(cgdev);
+ 
+ 	if (card->dev) {
++		netif_napi_del(&card->napi);
+ 		unregister_netdev(card->dev);
+ 		card->dev = NULL;
+ 	}
+diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
+index d6a691e27d33..d6803a9e5ab8 100644
+--- a/drivers/scsi/ipr.c
++++ b/drivers/scsi/ipr.c
+@@ -10093,6 +10093,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
+ 		ioa_cfg->intr_flag = IPR_USE_MSI;
+ 	else {
+ 		ioa_cfg->intr_flag = IPR_USE_LSI;
++		ioa_cfg->clear_isr = 1;
+ 		ioa_cfg->nvectors = 1;
+ 		dev_info(&pdev->dev, "Cannot enable MSI.\n");
+ 	}
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index 5649c200d37c..a92a62dea793 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -2548,7 +2548,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+ 	if (!vha->flags.online)
+ 		return;
+ 
+-	if (rsp->msix->cpuid != smp_processor_id()) {
++	if (rsp->msix && rsp->msix->cpuid != smp_processor_id()) {
+ 		/* if kernel does not notify qla of IRQ's CPU change,
+ 		 * then set it here.
+ 		 */
+diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
+index ff41c310c900..eaccd651ccda 100644
+--- a/drivers/scsi/scsi_devinfo.c
++++ b/drivers/scsi/scsi_devinfo.c
+@@ -429,7 +429,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
+ 	 * here, and we don't know what device it is
+ 	 * trying to work with, leave it as-is.
+ 	 */
+-	vmax = 8;	/* max length of vendor */
++	vmax = sizeof(devinfo->vendor);
+ 	vskip = vendor;
+ 	while (vmax > 0 && *vskip == ' ') {
+ 		vmax--;
+@@ -439,7 +439,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
+ 	while (vmax > 0 && vskip[vmax - 1] == ' ')
+ 		--vmax;
+ 
+-	mmax = 16;	/* max length of model */
++	mmax = sizeof(devinfo->model);
+ 	mskip = model;
+ 	while (mmax > 0 && *mskip == ' ') {
+ 		mmax--;
+@@ -455,10 +455,12 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
+ 			 * Behave like the older version of get_device_flags.
+ 			 */
+ 			if (memcmp(devinfo->vendor, vskip, vmax) ||
+-					devinfo->vendor[vmax])
++					(vmax < sizeof(devinfo->vendor) &&
++						devinfo->vendor[vmax]))
+ 				continue;
+ 			if (memcmp(devinfo->model, mskip, mmax) ||
+-					devinfo->model[mmax])
++					(mmax < sizeof(devinfo->model) &&
++						devinfo->model[mmax]))
+ 				continue;
+ 			return devinfo;
+ 		} else {
+diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
+index 6c6c0013ec7a..8048c2fedf5b 100644
+--- a/drivers/spi/spi-rockchip.c
++++ b/drivers/spi/spi-rockchip.c
+@@ -578,7 +578,7 @@ static int rockchip_spi_transfer_one(
+ 		struct spi_device *spi,
+ 		struct spi_transfer *xfer)
+ {
+-	int ret = 1;
++	int ret = 0;
+ 	struct rockchip_spi *rs = spi_master_get_devdata(master);
+ 
+ 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
+@@ -627,6 +627,8 @@ static int rockchip_spi_transfer_one(
+ 			spi_enable_chip(rs, 1);
+ 			ret = rockchip_spi_prepare_dma(rs);
+ 		}
++		/* successful DMA prepare means the transfer is in progress */
++		ret = ret ? ret : 1;
+ 	} else {
+ 		spi_enable_chip(rs, 1);
+ 		ret = rockchip_spi_pio_transfer(rs);
+diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
+index 1ddd9e2309b6..cf007f3b83ec 100644
+--- a/drivers/spi/spi-sun4i.c
++++ b/drivers/spi/spi-sun4i.c
+@@ -173,13 +173,17 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
+ {
+ 	struct sun4i_spi *sspi = spi_master_get_devdata(master);
+ 	unsigned int mclk_rate, div, timeout;
++	unsigned int start, end, tx_time;
+ 	unsigned int tx_len = 0;
+ 	int ret = 0;
+ 	u32 reg;
+ 
+ 	/* We don't support transfer larger than the FIFO */
+ 	if (tfr->len > SUN4I_FIFO_DEPTH)
+-		return -EINVAL;
++		return -EMSGSIZE;
++
++	if (tfr->tx_buf && tfr->len >= SUN4I_FIFO_DEPTH)
++		return -EMSGSIZE;
+ 
+ 	reinit_completion(&sspi->done);
+ 	sspi->tx_buf = tfr->tx_buf;
+@@ -269,8 +273,12 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
+ 	sun4i_spi_write(sspi, SUN4I_BURST_CNT_REG, SUN4I_BURST_CNT(tfr->len));
+ 	sun4i_spi_write(sspi, SUN4I_XMIT_CNT_REG, SUN4I_XMIT_CNT(tx_len));
+ 
+-	/* Fill the TX FIFO */
+-	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH);
++	/*
++	 * Fill the TX FIFO
++	 * Filling the FIFO fully causes timeout for some reason
++	 * at least on spi2 on A10s
++	 */
++	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH - 1);
+ 
+ 	/* Enable the interrupts */
+ 	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, SUN4I_INT_CTL_TC);
+@@ -279,9 +287,16 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
+ 	reg = sun4i_spi_read(sspi, SUN4I_CTL_REG);
+ 	sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH);
+ 
++	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
++	start = jiffies;
+ 	timeout = wait_for_completion_timeout(&sspi->done,
+-					      msecs_to_jiffies(1000));
++					      msecs_to_jiffies(tx_time));
++	end = jiffies;
+ 	if (!timeout) {
++		dev_warn(&master->dev,
++			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
++			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
++			 jiffies_to_msecs(end - start), tx_time);
+ 		ret = -ETIMEDOUT;
+ 		goto out;
+ 	}
+diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
+index 42e2c4bd690a..7fce79a60608 100644
+--- a/drivers/spi/spi-sun6i.c
++++ b/drivers/spi/spi-sun6i.c
+@@ -160,6 +160,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
+ {
+ 	struct sun6i_spi *sspi = spi_master_get_devdata(master);
+ 	unsigned int mclk_rate, div, timeout;
++	unsigned int start, end, tx_time;
+ 	unsigned int tx_len = 0;
+ 	int ret = 0;
+ 	u32 reg;
+@@ -269,9 +270,16 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
+ 	reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG);
+ 	sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH);
+ 
++	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
++	start = jiffies;
+ 	timeout = wait_for_completion_timeout(&sspi->done,
+-					      msecs_to_jiffies(1000));
++					      msecs_to_jiffies(tx_time));
++	end = jiffies;
+ 	if (!timeout) {
++		dev_warn(&master->dev,
++			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
++			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
++			 jiffies_to_msecs(end - start), tx_time);
+ 		ret = -ETIMEDOUT;
+ 		goto out;
+ 	}
+diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
+index e198996c5b83..9e45c5728d9b 100644
+--- a/drivers/tty/pty.c
++++ b/drivers/tty/pty.c
+@@ -667,8 +667,11 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
+ 		fsi = tty->driver_data;
+ 	else
+ 		fsi = tty->link->driver_data;
+-	devpts_kill_index(fsi, tty->index);
+-	devpts_put_ref(fsi);
++
++	if (fsi) {
++		devpts_kill_index(fsi, tty->index);
++		devpts_put_ref(fsi);
++	}
+ }
+ 
+ static const struct tty_operations ptm_unix98_ops = {
+diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c
+index d029bbe9eb36..641fed609911 100644
+--- a/drivers/usb/host/ohci-q.c
++++ b/drivers/usb/host/ohci-q.c
+@@ -183,7 +183,6 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed)
+ {
+ 	int	branch;
+ 
+-	ed->state = ED_OPER;
+ 	ed->ed_prev = NULL;
+ 	ed->ed_next = NULL;
+ 	ed->hwNextED = 0;
+@@ -259,6 +258,8 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed)
+ 	/* the HC may not see the schedule updates yet, but if it does
+ 	 * then they'll be properly ordered.
+ 	 */
++
++	ed->state = ED_OPER;
+ 	return 0;
+ }
+ 
+diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
+index 8e67336f8ddd..6a25533da237 100644
+--- a/drivers/xen/xen-pciback/conf_space.c
++++ b/drivers/xen/xen-pciback/conf_space.c
+@@ -183,8 +183,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
+ 		field_start = OFFSET(cfg_entry);
+ 		field_end = OFFSET(cfg_entry) + field->size;
+ 
+-		if ((req_start >= field_start && req_start < field_end)
+-		    || (req_end > field_start && req_end <= field_end)) {
++		 if (req_end > field_start && field_end > req_start) {
+ 			err = conf_space_read(dev, cfg_entry, field_start,
+ 					      &tmp_val);
+ 			if (err)
+@@ -230,8 +229,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
+ 		field_start = OFFSET(cfg_entry);
+ 		field_end = OFFSET(cfg_entry) + field->size;
+ 
+-		if ((req_start >= field_start && req_start < field_end)
+-		    || (req_end > field_start && req_end <= field_end)) {
++		 if (req_end > field_start && field_end > req_start) {
+ 			tmp_val = 0;
+ 
+ 			err = xen_pcibk_config_read(dev, field_start,
+diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
+index cacf30d14747..7487971f9f78 100644
+--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
++++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
+@@ -316,11 +316,18 @@ static int xenbus_write_transaction(unsigned msg_type,
+ 			rc = -ENOMEM;
+ 			goto out;
+ 		}
++	} else {
++		list_for_each_entry(trans, &u->transactions, list)
++			if (trans->handle.id == u->u.msg.tx_id)
++				break;
++		if (&trans->list == &u->transactions)
++			return -ESRCH;
+ 	}
+ 
+ 	reply = xenbus_dev_request_and_reply(&u->u.msg);
+ 	if (IS_ERR(reply)) {
+-		kfree(trans);
++		if (msg_type == XS_TRANSACTION_START)
++			kfree(trans);
+ 		rc = PTR_ERR(reply);
+ 		goto out;
+ 	}
+@@ -333,12 +340,7 @@ static int xenbus_write_transaction(unsigned msg_type,
+ 			list_add(&trans->list, &u->transactions);
+ 		}
+ 	} else if (u->u.msg.type == XS_TRANSACTION_END) {
+-		list_for_each_entry(trans, &u->transactions, list)
+-			if (trans->handle.id == u->u.msg.tx_id)
+-				break;
+-		BUG_ON(&trans->list == &u->transactions);
+ 		list_del(&trans->list);
+-
+ 		kfree(trans);
+ 	}
+ 
+diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
+index 374b12af8812..0bd3d47ad24d 100644
+--- a/drivers/xen/xenbus/xenbus_xs.c
++++ b/drivers/xen/xenbus/xenbus_xs.c
+@@ -249,9 +249,6 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+ 
+ 	mutex_unlock(&xs_state.request_mutex);
+ 
+-	if (IS_ERR(ret))
+-		return ret;
+-
+ 	if ((msg->type == XS_TRANSACTION_END) ||
+ 	    ((req_msg.type == XS_TRANSACTION_START) &&
+ 	     (msg->type == XS_ERROR)))
+diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
+index b84c291ba1eb..d7b78d531e63 100644
+--- a/fs/9p/vfs_file.c
++++ b/fs/9p/vfs_file.c
+@@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
+ 					v9fs_proto_dotu(v9ses));
+ 	fid = file->private_data;
+ 	if (!fid) {
+-		fid = v9fs_fid_clone(file->f_path.dentry);
++		fid = v9fs_fid_clone(file_dentry(file));
+ 		if (IS_ERR(fid))
+ 			return PTR_ERR(fid);
+ 
+@@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
+ 		 * because we want write after unlink usecase
+ 		 * to work.
+ 		 */
+-		fid = v9fs_writeback_fid(file->f_path.dentry);
++		fid = v9fs_writeback_fid(file_dentry(file));
+ 		if (IS_ERR(fid)) {
+ 			err = PTR_ERR(fid);
+ 			mutex_unlock(&v9inode->v_mutex);
+@@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
+ 		 * because we want write after unlink usecase
+ 		 * to work.
+ 		 */
+-		fid = v9fs_writeback_fid(filp->f_path.dentry);
++		fid = v9fs_writeback_fid(file_dentry(filp));
+ 		if (IS_ERR(fid)) {
+ 			retval = PTR_ERR(fid);
+ 			mutex_unlock(&v9inode->v_mutex);
+diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
+index f02404052b7b..27794b137b24 100644
+--- a/fs/ecryptfs/file.c
++++ b/fs/ecryptfs/file.c
+@@ -169,6 +169,19 @@ out:
+ 	return rc;
+ }
+ 
++static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	struct file *lower_file = ecryptfs_file_to_lower(file);
++	/*
++	 * Don't allow mmap on top of file systems that don't support it
++	 * natively.  If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs
++	 * allows recursive mounting, this will need to be extended.
++	 */
++	if (!lower_file->f_op->mmap)
++		return -ENODEV;
++	return generic_file_mmap(file, vma);
++}
++
+ /**
+  * ecryptfs_open
+  * @inode: inode speciying file to open
+@@ -403,7 +416,7 @@ const struct file_operations ecryptfs_main_fops = {
+ #ifdef CONFIG_COMPAT
+ 	.compat_ioctl = ecryptfs_compat_ioctl,
+ #endif
+-	.mmap = generic_file_mmap,
++	.mmap = ecryptfs_mmap,
+ 	.open = ecryptfs_open,
+ 	.flush = ecryptfs_flush,
+ 	.release = ecryptfs_release,
+diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
+index e818f5ac7a26..866bb18efefe 100644
+--- a/fs/ecryptfs/kthread.c
++++ b/fs/ecryptfs/kthread.c
+@@ -25,7 +25,6 @@
+ #include <linux/slab.h>
+ #include <linux/wait.h>
+ #include <linux/mount.h>
+-#include <linux/file.h>
+ #include "ecryptfs_kernel.h"
+ 
+ struct ecryptfs_open_req {
+@@ -148,7 +147,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
+ 	flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR;
+ 	(*lower_file) = dentry_open(&req.path, flags, cred);
+ 	if (!IS_ERR(*lower_file))
+-		goto have_file;
++		goto out;
+ 	if ((flags & O_ACCMODE) == O_RDONLY) {
+ 		rc = PTR_ERR((*lower_file));
+ 		goto out;
+@@ -166,16 +165,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
+ 	mutex_unlock(&ecryptfs_kthread_ctl.mux);
+ 	wake_up(&ecryptfs_kthread_ctl.wait);
+ 	wait_for_completion(&req.done);
+-	if (IS_ERR(*lower_file)) {
++	if (IS_ERR(*lower_file))
+ 		rc = PTR_ERR(*lower_file);
+-		goto out;
+-	}
+-have_file:
+-	if ((*lower_file)->f_op->mmap == NULL) {
+-		fput(*lower_file);
+-		*lower_file = NULL;
+-		rc = -EMEDIUMTYPE;
+-	}
+ out:
+ 	return rc;
+ }
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 95bf4679ac54..de692b91c166 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -469,6 +469,10 @@ static int __ext4_ext_check(const char *function, unsigned int line,
+ 		error_msg = "invalid extent entries";
+ 		goto corrupted;
+ 	}
++	if (unlikely(depth > 32)) {
++		error_msg = "too large eh_depth";
++		goto corrupted;
++	}
+ 	/* Verify checksum on non-root extent tree nodes */
+ 	if (ext_depth(inode) != depth &&
+ 	    !ext4_extent_block_csum_verify(inode, eh)) {
+diff --git a/fs/inode.c b/fs/inode.c
+index 69b8b526c194..721fa18ead59 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -1739,8 +1739,8 @@ static int __remove_privs(struct dentry *dentry, int kill)
+  */
+ int file_remove_privs(struct file *file)
+ {
+-	struct dentry *dentry = file->f_path.dentry;
+-	struct inode *inode = d_inode(dentry);
++	struct dentry *dentry = file_dentry(file);
++	struct inode *inode = file_inode(file);
+ 	int kill;
+ 	int error = 0;
+ 
+@@ -1748,7 +1748,7 @@ int file_remove_privs(struct file *file)
+ 	if (IS_NOSEC(inode))
+ 		return 0;
+ 
+-	kill = file_needs_remove_privs(file);
++	kill = dentry_needs_remove_privs(dentry);
+ 	if (kill < 0)
+ 		return kill;
+ 	if (kill)
+diff --git a/fs/ioctl.c b/fs/ioctl.c
+index 116a333e9c77..0f56deb24ce6 100644
+--- a/fs/ioctl.c
++++ b/fs/ioctl.c
+@@ -590,6 +590,7 @@ static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+ 		goto out;
+ 	}
+ 
++	same->dest_count = count;
+ 	ret = vfs_dedupe_file_range(file, same);
+ 	if (ret)
+ 		goto out;
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 154a107cd376..fc4084ef4736 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = {
+ };
+ #endif
+ 
+-static void lockd_svc_exit_thread(void)
++static void lockd_unregister_notifiers(void)
+ {
+ 	unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
+ #if IS_ENABLED(CONFIG_IPV6)
+ 	unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
+ #endif
++}
++
++static void lockd_svc_exit_thread(void)
++{
++	lockd_unregister_notifiers();
+ 	svc_exit_thread(nlmsvc_rqst);
+ }
+ 
+@@ -462,7 +467,7 @@ int lockd_up(struct net *net)
+ 	 * Note: svc_serv structures have an initial use count of 1,
+ 	 * so we exit through here on both success and failure.
+ 	 */
+-err_net:
++err_put:
+ 	svc_destroy(serv);
+ err_create:
+ 	mutex_unlock(&nlmsvc_mutex);
+@@ -470,7 +475,9 @@ err_create:
+ 
+ err_start:
+ 	lockd_down_net(serv, net);
+-	goto err_net;
++err_net:
++	lockd_unregister_notifiers();
++	goto err_put;
+ }
+ EXPORT_SYMBOL_GPL(lockd_up);
+ 
+diff --git a/fs/locks.c b/fs/locks.c
+index 7c5f91be9b65..ee1b15f6fc13 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
+ {
+ 	struct file_lock *fl, *my_fl = NULL, *lease;
+ 	struct dentry *dentry = filp->f_path.dentry;
+-	struct inode *inode = dentry->d_inode;
++	struct inode *inode = file_inode(filp);
+ 	struct file_lock_context *ctx;
+ 	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
+ 	int error;
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 783004af5707..419f746d851d 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry)
+ 		goto out_unlock;
+ 
+ 	lock_mount_hash();
++	event++;
+ 	while (!hlist_empty(&mp->m_list)) {
+ 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
+ 		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
+diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
+index 69bd801afb53..37e49cb2ac4c 100644
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -443,7 +443,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
+ 	if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
+ 		return 0;
+ 	bytes = le16_to_cpu(sbp->s_bytes);
+-	if (bytes > BLOCK_SIZE)
++	if (bytes < sumoff + 4 || bytes > BLOCK_SIZE)
+ 		return 0;
+ 	crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
+ 		       sumoff);
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 339125bb4d2c..2a9f10c8af1b 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -540,15 +540,19 @@
+ 
+ #define INIT_TEXT							\
+ 	*(.init.text)							\
++	*(.text.startup)						\
+ 	MEM_DISCARD(init.text)
+ 
+ #define EXIT_DATA							\
+ 	*(.exit.data)							\
++	*(.fini_array)							\
++	*(.dtors)							\
+ 	MEM_DISCARD(exit.data)						\
+ 	MEM_DISCARD(exit.rodata)
+ 
+ #define EXIT_TEXT							\
+ 	*(.exit.text)							\
++	*(.text.exit)							\
+ 	MEM_DISCARD(exit.text)
+ 
+ #define EXIT_CALL							\
+diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
+index 0367c63f5960..5c37b58445c6 100644
+--- a/include/linux/bcma/bcma.h
++++ b/include/linux/bcma/bcma.h
+@@ -158,6 +158,7 @@ struct bcma_host_ops {
+ #define BCMA_CORE_DEFAULT		0xFFF
+ 
+ #define BCMA_MAX_NR_CORES		16
++#define BCMA_CORE_SIZE			0x1000
+ 
+ /* Chip IDs of PCIe devices */
+ #define BCMA_CHIP_ID_BCM4313	0x4313
+diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
+index 1191d79aa495..4d061681dbf1 100644
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -97,6 +97,11 @@ enum mem_cgroup_events_target {
+ #define MEM_CGROUP_ID_SHIFT	16
+ #define MEM_CGROUP_ID_MAX	USHRT_MAX
+ 
++struct mem_cgroup_id {
++	int id;
++	atomic_t ref;
++};
++
+ struct mem_cgroup_stat_cpu {
+ 	long count[MEMCG_NR_STAT];
+ 	unsigned long events[MEMCG_NR_EVENTS];
+@@ -172,6 +177,9 @@ enum memcg_kmem_state {
+ struct mem_cgroup {
+ 	struct cgroup_subsys_state css;
+ 
++	/* Private memcg ID. Used to ID objects that outlive the cgroup */
++	struct mem_cgroup_id id;
++
+ 	/* Accounted resources */
+ 	struct page_counter memory;
+ 	struct page_counter swap;
+@@ -330,22 +338,9 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
+ 	if (mem_cgroup_disabled())
+ 		return 0;
+ 
+-	return memcg->css.id;
+-}
+-
+-/**
+- * mem_cgroup_from_id - look up a memcg from an id
+- * @id: the id to look up
+- *
+- * Caller must hold rcu_read_lock() and use css_tryget() as necessary.
+- */
+-static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+-{
+-	struct cgroup_subsys_state *css;
+-
+-	css = css_from_id(id, &memory_cgrp_subsys);
+-	return mem_cgroup_from_css(css);
++	return memcg->id.id;
+ }
++struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
+ 
+ /**
+  * parent_mem_cgroup - find the accounting parent of a memcg
+diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
+index 51a97ac8bfbf..495a09bfeb94 100644
+--- a/include/linux/radix-tree.h
++++ b/include/linux/radix-tree.h
+@@ -399,6 +399,7 @@ static inline __must_check
+ void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+ {
+ 	iter->next_index = iter->index;
++	iter->tags = 0;
+ 	return NULL;
+ }
+ 
+diff --git a/include/linux/rmap.h b/include/linux/rmap.h
+index 49eb4f8ebac9..2b0fad83683f 100644
+--- a/include/linux/rmap.h
++++ b/include/linux/rmap.h
+@@ -158,7 +158,7 @@ struct anon_vma *page_get_anon_vma(struct page *page);
+ /*
+  * rmap interfaces called when adding or removing pte of page
+  */
+-void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
++void page_move_anon_rmap(struct page *, struct vm_area_struct *);
+ void page_add_anon_rmap(struct page *, struct vm_area_struct *,
+ 		unsigned long, bool);
+ void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
+diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
+index 813ffb2e22c9..1c589b62c244 100644
+--- a/include/uapi/linux/Kbuild
++++ b/include/uapi/linux/Kbuild
+@@ -244,6 +244,7 @@ endif
+ header-y += hw_breakpoint.h
+ header-y += l2tp.h
+ header-y += libc-compat.h
++header-y += lirc.h
+ header-y += limits.h
+ header-y += llc.h
+ header-y += loop.h
+diff --git a/init/Kconfig b/init/Kconfig
+index 0dfd09d54c65..d895c7a183c6 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1423,6 +1423,7 @@ config KALLSYMS_ALL
+ 
+ config KALLSYMS_ABSOLUTE_PERCPU
+ 	bool
++	depends on KALLSYMS
+ 	default X86_64 && SMP
+ 
+ config KALLSYMS_BASE_RELATIVE
+diff --git a/kernel/cgroup.c b/kernel/cgroup.c
+index 86cb5c6e8932..75c0ff00aca6 100644
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -837,6 +837,8 @@ static void put_css_set_locked(struct css_set *cset)
+ 
+ static void put_css_set(struct css_set *cset)
+ {
++	unsigned long flags;
++
+ 	/*
+ 	 * Ensure that the refcount doesn't hit zero while any readers
+ 	 * can see it. Similar to atomic_dec_and_lock(), but for an
+@@ -845,9 +847,9 @@ static void put_css_set(struct css_set *cset)
+ 	if (atomic_add_unless(&cset->refcount, -1, 1))
+ 		return;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irqsave(&css_set_lock, flags);
+ 	put_css_set_locked(cset);
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irqrestore(&css_set_lock, flags);
+ }
+ 
+ /*
+@@ -1070,11 +1072,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
+ 
+ 	/* First see if we already have a cgroup group that matches
+ 	 * the desired set */
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	cset = find_existing_css_set(old_cset, cgrp, template);
+ 	if (cset)
+ 		get_css_set(cset);
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	if (cset)
+ 		return cset;
+@@ -1102,7 +1104,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
+ 	 * find_existing_css_set() */
+ 	memcpy(cset->subsys, template, sizeof(cset->subsys));
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	/* Add reference counts and links from the new css_set. */
+ 	list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
+ 		struct cgroup *c = link->cgrp;
+@@ -1128,7 +1130,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
+ 		css_get(css);
+ 	}
+ 
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	return cset;
+ }
+@@ -1192,7 +1194,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
+ 	 * Release all the links from cset_links to this hierarchy's
+ 	 * root cgroup
+ 	 */
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 
+ 	list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
+ 		list_del(&link->cset_link);
+@@ -1200,7 +1202,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
+ 		kfree(link);
+ 	}
+ 
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	if (!list_empty(&root->root_list)) {
+ 		list_del(&root->root_list);
+@@ -1600,11 +1602,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
+ 		ss->root = dst_root;
+ 		css->cgroup = dcgrp;
+ 
+-		spin_lock_bh(&css_set_lock);
++		spin_lock_irq(&css_set_lock);
+ 		hash_for_each(css_set_table, i, cset, hlist)
+ 			list_move_tail(&cset->e_cset_node[ss->id],
+ 				       &dcgrp->e_csets[ss->id]);
+-		spin_unlock_bh(&css_set_lock);
++		spin_unlock_irq(&css_set_lock);
+ 
+ 		/* default hierarchy doesn't enable controllers by default */
+ 		dst_root->subsys_mask |= 1 << ssid;
+@@ -1640,10 +1642,10 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
+ 	if (!buf)
+ 		return -ENOMEM;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
+ 	len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	if (len >= PATH_MAX)
+ 		len = -ERANGE;
+@@ -1897,7 +1899,7 @@ static void cgroup_enable_task_cg_lists(void)
+ {
+ 	struct task_struct *p, *g;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 
+ 	if (use_task_css_set_links)
+ 		goto out_unlock;
+@@ -1922,8 +1924,12 @@ static void cgroup_enable_task_cg_lists(void)
+ 		 * entry won't be deleted though the process has exited.
+ 		 * Do it while holding siglock so that we don't end up
+ 		 * racing against cgroup_exit().
++		 *
++		 * Interrupts were already disabled while acquiring
++		 * the css_set_lock, so we do not need to disable it
++		 * again when acquiring the sighand->siglock here.
+ 		 */
+-		spin_lock_irq(&p->sighand->siglock);
++		spin_lock(&p->sighand->siglock);
+ 		if (!(p->flags & PF_EXITING)) {
+ 			struct css_set *cset = task_css_set(p);
+ 
+@@ -1932,11 +1938,11 @@ static void cgroup_enable_task_cg_lists(void)
+ 			list_add_tail(&p->cg_list, &cset->tasks);
+ 			get_css_set(cset);
+ 		}
+-		spin_unlock_irq(&p->sighand->siglock);
++		spin_unlock(&p->sighand->siglock);
+ 	} while_each_thread(g, p);
+ 	read_unlock(&tasklist_lock);
+ out_unlock:
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ }
+ 
+ static void init_cgroup_housekeeping(struct cgroup *cgrp)
+@@ -2043,13 +2049,13 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
+ 	 * Link the root cgroup in this hierarchy into all the css_set
+ 	 * objects.
+ 	 */
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	hash_for_each(css_set_table, i, cset, hlist) {
+ 		link_css_set(&tmp_links, cset, root_cgrp);
+ 		if (css_set_populated(cset))
+ 			cgroup_update_populated(root_cgrp, true);
+ 	}
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	BUG_ON(!list_empty(&root_cgrp->self.children));
+ 	BUG_ON(atomic_read(&root->nr_cgrps) != 1);
+@@ -2256,11 +2262,11 @@ out_mount:
+ 		struct cgroup *cgrp;
+ 
+ 		mutex_lock(&cgroup_mutex);
+-		spin_lock_bh(&css_set_lock);
++		spin_lock_irq(&css_set_lock);
+ 
+ 		cgrp = cset_cgroup_from_root(ns->root_cset, root);
+ 
+-		spin_unlock_bh(&css_set_lock);
++		spin_unlock_irq(&css_set_lock);
+ 		mutex_unlock(&cgroup_mutex);
+ 
+ 		nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
+@@ -2337,11 +2343,11 @@ char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+ 	char *ret;
+ 
+ 	mutex_lock(&cgroup_mutex);
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 
+ 	ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
+ 
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	mutex_unlock(&cgroup_mutex);
+ 
+ 	return ret;
+@@ -2369,7 +2375,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+ 	char *path = NULL;
+ 
+ 	mutex_lock(&cgroup_mutex);
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 
+ 	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
+ 
+@@ -2382,7 +2388,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+ 			path = buf;
+ 	}
+ 
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	mutex_unlock(&cgroup_mutex);
+ 	return path;
+ }
+@@ -2557,7 +2563,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
+ 	 * the new cgroup.  There are no failure cases after here, so this
+ 	 * is the commit point.
+ 	 */
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	list_for_each_entry(cset, &tset->src_csets, mg_node) {
+ 		list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
+ 			struct css_set *from_cset = task_css_set(task);
+@@ -2568,7 +2574,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
+ 			put_css_set_locked(from_cset);
+ 		}
+ 	}
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	/*
+ 	 * Migration is committed, all target tasks are now on dst_csets.
+@@ -2597,13 +2603,13 @@ out_cancel_attach:
+ 		}
+ 	} while_each_subsys_mask();
+ out_release_tset:
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	list_splice_init(&tset->dst_csets, &tset->src_csets);
+ 	list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
+ 		list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
+ 		list_del_init(&cset->mg_node);
+ 	}
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	return ret;
+ }
+ 
+@@ -2634,7 +2640,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
+ 
+ 	lockdep_assert_held(&cgroup_mutex);
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
+ 		cset->mg_src_cgrp = NULL;
+ 		cset->mg_dst_cgrp = NULL;
+@@ -2642,7 +2648,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
+ 		list_del_init(&cset->mg_preload_node);
+ 		put_css_set_locked(cset);
+ 	}
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ }
+ 
+ /**
+@@ -2783,7 +2789,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
+ 	 * already PF_EXITING could be freed from underneath us unless we
+ 	 * take an rcu_read_lock.
+ 	 */
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	rcu_read_lock();
+ 	task = leader;
+ 	do {
+@@ -2792,7 +2798,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
+ 			break;
+ 	} while_each_thread(leader, task);
+ 	rcu_read_unlock();
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	return cgroup_taskset_migrate(&tset, root);
+ }
+@@ -2816,7 +2822,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
+ 		return -EBUSY;
+ 
+ 	/* look up all src csets */
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	rcu_read_lock();
+ 	task = leader;
+ 	do {
+@@ -2826,7 +2832,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
+ 			break;
+ 	} while_each_thread(leader, task);
+ 	rcu_read_unlock();
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	/* prepare dst csets and commit */
+ 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
+@@ -2859,9 +2865,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
+ 		struct cgroup *cgrp;
+ 		struct inode *inode;
+ 
+-		spin_lock_bh(&css_set_lock);
++		spin_lock_irq(&css_set_lock);
+ 		cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
+-		spin_unlock_bh(&css_set_lock);
++		spin_unlock_irq(&css_set_lock);
+ 
+ 		while (!cgroup_is_descendant(dst_cgrp, cgrp))
+ 			cgrp = cgroup_parent(cgrp);
+@@ -2962,9 +2968,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+ 		if (root == &cgrp_dfl_root)
+ 			continue;
+ 
+-		spin_lock_bh(&css_set_lock);
++		spin_lock_irq(&css_set_lock);
+ 		from_cgrp = task_cgroup_from_root(from, root);
+-		spin_unlock_bh(&css_set_lock);
++		spin_unlock_irq(&css_set_lock);
+ 
+ 		retval = cgroup_attach_task(from_cgrp, tsk, false);
+ 		if (retval)
+@@ -3080,7 +3086,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ 	percpu_down_write(&cgroup_threadgroup_rwsem);
+ 
+ 	/* look up all csses currently attached to @cgrp's subtree */
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+ 		struct cgrp_cset_link *link;
+ 
+@@ -3088,14 +3094,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ 			cgroup_migrate_add_src(link->cset, dsct,
+ 					       &preloaded_csets);
+ 	}
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	/* NULL dst indicates self on default hierarchy */
+ 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
+ 	if (ret)
+ 		goto out_finish;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
+ 		struct task_struct *task, *ntask;
+ 
+@@ -3107,7 +3113,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ 		list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
+ 			cgroup_taskset_add(task, &tset);
+ 	}
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	ret = cgroup_taskset_migrate(&tset, cgrp->root);
+ out_finish:
+@@ -3908,10 +3914,10 @@ static int cgroup_task_count(const struct cgroup *cgrp)
+ 	int count = 0;
+ 	struct cgrp_cset_link *link;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
+ 		count += atomic_read(&link->cset->refcount);
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	return count;
+ }
+ 
+@@ -4249,7 +4255,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
+ 
+ 	memset(it, 0, sizeof(*it));
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 
+ 	it->ss = css->ss;
+ 
+@@ -4262,7 +4268,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
+ 
+ 	css_task_iter_advance_css_set(it);
+ 
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ }
+ 
+ /**
+@@ -4280,7 +4286,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
+ 		it->cur_task = NULL;
+ 	}
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 
+ 	if (it->task_pos) {
+ 		it->cur_task = list_entry(it->task_pos, struct task_struct,
+@@ -4289,7 +4295,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
+ 		css_task_iter_advance(it);
+ 	}
+ 
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	return it->cur_task;
+ }
+@@ -4303,10 +4309,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
+ void css_task_iter_end(struct css_task_iter *it)
+ {
+ 	if (it->cur_cset) {
+-		spin_lock_bh(&css_set_lock);
++		spin_lock_irq(&css_set_lock);
+ 		list_del(&it->iters_node);
+ 		put_css_set_locked(it->cur_cset);
+-		spin_unlock_bh(&css_set_lock);
++		spin_unlock_irq(&css_set_lock);
+ 	}
+ 
+ 	if (it->cur_task)
+@@ -4338,10 +4344,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
+ 	mutex_lock(&cgroup_mutex);
+ 
+ 	/* all tasks in @from are being moved, all csets are source */
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	list_for_each_entry(link, &from->cset_links, cset_link)
+ 		cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
+ 	if (ret)
+@@ -5063,6 +5069,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
+ 	memset(css, 0, sizeof(*css));
+ 	css->cgroup = cgrp;
+ 	css->ss = ss;
++	css->id = -1;
+ 	INIT_LIST_HEAD(&css->sibling);
+ 	INIT_LIST_HEAD(&css->children);
+ 	css->serial_nr = css_serial_nr_next++;
+@@ -5150,7 +5157,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
+ 
+ 	err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
+ 	if (err < 0)
+-		goto err_free_percpu_ref;
++		goto err_free_css;
+ 	css->id = err;
+ 
+ 	/* @css is ready to be brought online now, make it visible */
+@@ -5174,9 +5181,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
+ 
+ err_list_del:
+ 	list_del_rcu(&css->sibling);
+-	cgroup_idr_remove(&ss->css_idr, css->id);
+-err_free_percpu_ref:
+-	percpu_ref_exit(&css->refcnt);
+ err_free_css:
+ 	call_rcu(&css->rcu_head, css_free_rcu_fn);
+ 	return ERR_PTR(err);
+@@ -5451,10 +5455,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
+ 	 */
+ 	cgrp->self.flags &= ~CSS_ONLINE;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
+ 		link->cset->dead = true;
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 
+ 	/* initiate massacre of all css's */
+ 	for_each_css(css, ssid, cgrp)
+@@ -5725,7 +5729,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+ 		goto out;
+ 
+ 	mutex_lock(&cgroup_mutex);
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 
+ 	for_each_root(root) {
+ 		struct cgroup_subsys *ss;
+@@ -5778,7 +5782,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+ 
+ 	retval = 0;
+ out_unlock:
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	mutex_unlock(&cgroup_mutex);
+ 	kfree(buf);
+ out:
+@@ -5923,13 +5927,13 @@ void cgroup_post_fork(struct task_struct *child)
+ 	if (use_task_css_set_links) {
+ 		struct css_set *cset;
+ 
+-		spin_lock_bh(&css_set_lock);
++		spin_lock_irq(&css_set_lock);
+ 		cset = task_css_set(current);
+ 		if (list_empty(&child->cg_list)) {
+ 			get_css_set(cset);
+ 			css_set_move_task(child, NULL, cset, false);
+ 		}
+-		spin_unlock_bh(&css_set_lock);
++		spin_unlock_irq(&css_set_lock);
+ 	}
+ 
+ 	/*
+@@ -5974,9 +5978,9 @@ void cgroup_exit(struct task_struct *tsk)
+ 	cset = task_css_set(tsk);
+ 
+ 	if (!list_empty(&tsk->cg_list)) {
+-		spin_lock_bh(&css_set_lock);
++		spin_lock_irq(&css_set_lock);
+ 		css_set_move_task(tsk, cset, NULL, false);
+-		spin_unlock_bh(&css_set_lock);
++		spin_unlock_irq(&css_set_lock);
+ 	} else {
+ 		get_css_set(cset);
+ 	}
+@@ -6044,9 +6048,9 @@ static void cgroup_release_agent(struct work_struct *work)
+ 	if (!pathbuf || !agentbuf)
+ 		goto out;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	if (!path)
+ 		goto out;
+ 
+@@ -6306,12 +6310,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+ 		return ERR_PTR(-EPERM);
+ 
+ 	mutex_lock(&cgroup_mutex);
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 
+ 	cset = task_css_set(current);
+ 	get_css_set(cset);
+ 
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	mutex_unlock(&cgroup_mutex);
+ 
+ 	new_ns = alloc_cgroup_ns();
+@@ -6435,7 +6439,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
+ 	if (!name_buf)
+ 		return -ENOMEM;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	rcu_read_lock();
+ 	cset = rcu_dereference(current->cgroups);
+ 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
+@@ -6446,7 +6450,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
+ 			   c->root->hierarchy_id, name_buf);
+ 	}
+ 	rcu_read_unlock();
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	kfree(name_buf);
+ 	return 0;
+ }
+@@ -6457,7 +6461,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
+ 	struct cgroup_subsys_state *css = seq_css(seq);
+ 	struct cgrp_cset_link *link;
+ 
+-	spin_lock_bh(&css_set_lock);
++	spin_lock_irq(&css_set_lock);
+ 	list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
+ 		struct css_set *cset = link->cset;
+ 		struct task_struct *task;
+@@ -6480,7 +6484,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
+ 	overflow:
+ 		seq_puts(seq, "  ...\n");
+ 	}
+-	spin_unlock_bh(&css_set_lock);
++	spin_unlock_irq(&css_set_lock);
+ 	return 0;
+ }
+ 
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 3e3f6e49eabb..e7cea29bb9e2 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -1218,6 +1218,8 @@ static struct cpuhp_step cpuhp_bp_states[] = {
+ 		.teardown		= takedown_cpu,
+ 		.cant_stop		= true,
+ 	},
++#else
++	[CPUHP_BRINGUP_CPU] = { },
+ #endif
+ };
+ 
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 11546a6ed5df..65716cef21f4 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4993,14 +4993,16 @@ void show_state_filter(unsigned long state_filter)
+ 		/*
+ 		 * reset the NMI-timeout, listing all files on a slow
+ 		 * console might take a lot of time:
++		 * Also, reset softlockup watchdogs on all CPUs, because
++		 * another CPU might be blocked waiting for us to process
++		 * an IPI.
+ 		 */
+ 		touch_nmi_watchdog();
++		touch_all_softlockup_watchdogs();
+ 		if (!state_filter || (p->state & state_filter))
+ 			sched_show_task(p);
+ 	}
+ 
+-	touch_all_softlockup_watchdogs();
+-
+ #ifdef CONFIG_SCHED_DEBUG
+ 	sysrq_sched_debug_show();
+ #endif
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index eeaf920f46b9..0858959cf747 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -687,8 +687,6 @@ void init_entity_runnable_average(struct sched_entity *se)
+ 	/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
+ }
+ 
+-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
+-static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
+ #else
+ void init_entity_runnable_average(struct sched_entity *se)
+ {
+@@ -3194,7 +3192,7 @@ static inline void check_schedstat_required(void)
+ 			trace_sched_stat_iowait_enabled()  ||
+ 			trace_sched_stat_blocked_enabled() ||
+ 			trace_sched_stat_runtime_enabled())  {
+-		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
++		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+ 			     "stat_blocked and stat_runtime require the "
+ 			     "kernel parameter schedstats=enabled or "
+ 			     "kernel.sched_schedstats=1\n");
+@@ -4822,19 +4820,24 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
+ 		return wl;
+ 
+ 	for_each_sched_entity(se) {
+-		long w, W;
++		struct cfs_rq *cfs_rq = se->my_q;
++		long W, w = cfs_rq_load_avg(cfs_rq);
+ 
+-		tg = se->my_q->tg;
++		tg = cfs_rq->tg;
+ 
+ 		/*
+ 		 * W = @wg + \Sum rw_j
+ 		 */
+-		W = wg + calc_tg_weight(tg, se->my_q);
++		W = wg + atomic_long_read(&tg->load_avg);
++
++		/* Ensure \Sum rw_j >= rw_i */
++		W -= cfs_rq->tg_load_avg_contrib;
++		W += w;
+ 
+ 		/*
+ 		 * w = rw_i + @wl
+ 		 */
+-		w = cfs_rq_load_avg(se->my_q) + wl;
++		w += wl;
+ 
+ 		/*
+ 		 * wl = S * s'_i; see (2)
+diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
+index 1cafba860b08..39008d78927a 100644
+--- a/kernel/time/posix-cpu-timers.c
++++ b/kernel/time/posix-cpu-timers.c
+@@ -777,6 +777,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
+ 			timer->it.cpu.expires = 0;
+ 			sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
+ 					   &itp->it_value);
++			return;
+ 		} else {
+ 			cpu_timer_sample_group(timer->it_clock, p, &now);
+ 			unlock_task_sighand(p, &flags);
+diff --git a/mm/compaction.c b/mm/compaction.c
+index f8e925eb479b..c59a3c480f83 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -436,25 +436,23 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
+ 
+ 		/* Found a free page, break it into order-0 pages */
+ 		isolated = split_free_page(page);
++		if (!isolated)
++			break;
++
+ 		total_isolated += isolated;
++		cc->nr_freepages += isolated;
+ 		for (i = 0; i < isolated; i++) {
+ 			list_add(&page->lru, freelist);
+ 			page++;
+ 		}
+-
+-		/* If a page was split, advance to the end of it */
+-		if (isolated) {
+-			cc->nr_freepages += isolated;
+-			if (!strict &&
+-				cc->nr_migratepages <= cc->nr_freepages) {
+-				blockpfn += isolated;
+-				break;
+-			}
+-
+-			blockpfn += isolated - 1;
+-			cursor += isolated - 1;
+-			continue;
++		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
++			blockpfn += isolated;
++			break;
+ 		}
++		/* Advance to the end of split page */
++		blockpfn += isolated - 1;
++		cursor += isolated - 1;
++		continue;
+ 
+ isolate_fail:
+ 		if (strict)
+@@ -464,6 +462,9 @@ isolate_fail:
+ 
+ 	}
+ 
++	if (locked)
++		spin_unlock_irqrestore(&cc->zone->lock, flags);
++
+ 	/*
+ 	 * There is a tiny chance that we have read bogus compound_order(),
+ 	 * so be careful to not go outside of the pageblock.
+@@ -485,9 +486,6 @@ isolate_fail:
+ 	if (strict && blockpfn < end_pfn)
+ 		total_isolated = 0;
+ 
+-	if (locked)
+-		spin_unlock_irqrestore(&cc->zone->lock, flags);
+-
+ 	/* Update the pageblock-skip if the whole pageblock was scanned */
+ 	if (blockpfn == end_pfn)
+ 		update_pageblock_skip(cc, valid_page, total_isolated, false);
+@@ -938,7 +936,6 @@ static void isolate_freepages(struct compact_control *cc)
+ 				block_end_pfn = block_start_pfn,
+ 				block_start_pfn -= pageblock_nr_pages,
+ 				isolate_start_pfn = block_start_pfn) {
+-
+ 		/*
+ 		 * This can iterate a massively long zone without finding any
+ 		 * suitable migration targets, so periodically check if we need
+@@ -962,32 +959,30 @@ static void isolate_freepages(struct compact_control *cc)
+ 			continue;
+ 
+ 		/* Found a block suitable for isolating free pages from. */
+-		isolate_freepages_block(cc, &isolate_start_pfn,
+-					block_end_pfn, freelist, false);
++		isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
++					freelist, false);
+ 
+ 		/*
+-		 * If we isolated enough freepages, or aborted due to async
+-		 * compaction being contended, terminate the loop.
+-		 * Remember where the free scanner should restart next time,
+-		 * which is where isolate_freepages_block() left off.
+-		 * But if it scanned the whole pageblock, isolate_start_pfn
+-		 * now points at block_end_pfn, which is the start of the next
+-		 * pageblock.
+-		 * In that case we will however want to restart at the start
+-		 * of the previous pageblock.
++		 * If we isolated enough freepages, or aborted due to lock
++		 * contention, terminate.
+ 		 */
+ 		if ((cc->nr_freepages >= cc->nr_migratepages)
+ 							|| cc->contended) {
+-			if (isolate_start_pfn >= block_end_pfn)
++			if (isolate_start_pfn >= block_end_pfn) {
++				/*
++				 * Restart at previous pageblock if more
++				 * freepages can be isolated next time.
++				 */
+ 				isolate_start_pfn =
+ 					block_start_pfn - pageblock_nr_pages;
++			}
+ 			break;
+-		} else {
++		} else if (isolate_start_pfn < block_end_pfn) {
+ 			/*
+-			 * isolate_freepages_block() should not terminate
+-			 * prematurely unless contended, or isolated enough
++			 * If isolation failed early, do not continue
++			 * needlessly.
+ 			 */
+-			VM_BUG_ON(isolate_start_pfn < block_end_pfn);
++			break;
+ 		}
+ 	}
+ 
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 19d0d08b396f..6b5058f9a1e3 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3328,7 +3328,7 @@ retry_avoidcopy:
+ 	/* If no-one else is actually using this page, avoid the copy
+ 	 * and just make the page writable */
+ 	if (page_mapcount(old_page) == 1 && PageAnon(old_page)) {
+-		page_move_anon_rmap(old_page, vma, address);
++		page_move_anon_rmap(old_page, vma);
+ 		set_huge_ptep_writable(vma, address, ptep);
+ 		return 0;
+ 	}
+diff --git a/mm/internal.h b/mm/internal.h
+index b79abb6721cf..bb309ad2b5e3 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -24,7 +24,8 @@
+  */
+ #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
+ 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+-			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
++			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
++			__GFP_ATOMIC)
+ 
+ /* The GFP flags allowed during early boot */
+ #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index a2e79b83920f..bf860dbdd26e 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -4038,6 +4038,60 @@ static struct cftype mem_cgroup_legacy_files[] = {
+ 	{ },	/* terminate */
+ };
+ 
++/*
++ * Private memory cgroup IDR
++ *
++ * Swap-out records and page cache shadow entries need to store memcg
++ * references in constrained space, so we maintain an ID space that is
++ * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of
++ * memory-controlled cgroups to 64k.
++ *
++ * However, there usually are many references to the oflline CSS after
++ * the cgroup has been destroyed, such as page cache or reclaimable
++ * slab objects, that don't need to hang on to the ID. We want to keep
++ * those dead CSS from occupying IDs, or we might quickly exhaust the
++ * relatively small ID space and prevent the creation of new cgroups
++ * even when there are much fewer than 64k cgroups - possibly none.
++ *
++ * Maintain a private 16-bit ID space for memcg, and allow the ID to
++ * be freed and recycled when it's no longer needed, which is usually
++ * when the CSS is offlined.
++ *
++ * The only exception to that are records of swapped out tmpfs/shmem
++ * pages that need to be attributed to live ancestors on swapin. But
++ * those references are manageable from userspace.
++ */
++
++static DEFINE_IDR(mem_cgroup_idr);
++
++static void mem_cgroup_id_get(struct mem_cgroup *memcg)
++{
++	atomic_inc(&memcg->id.ref);
++}
++
++static void mem_cgroup_id_put(struct mem_cgroup *memcg)
++{
++	if (atomic_dec_and_test(&memcg->id.ref)) {
++		idr_remove(&mem_cgroup_idr, memcg->id.id);
++		memcg->id.id = 0;
++
++		/* Memcg ID pins CSS */
++		css_put(&memcg->css);
++	}
++}
++
++/**
++ * mem_cgroup_from_id - look up a memcg from a memcg id
++ * @id: the memcg id to look up
++ *
++ * Caller must hold rcu_read_lock().
++ */
++struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
++{
++	WARN_ON_ONCE(!rcu_read_lock_held());
++	return idr_find(&mem_cgroup_idr, id);
++}
++
+ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
+ {
+ 	struct mem_cgroup_per_node *pn;
+@@ -4097,6 +4151,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
+ 	if (!memcg)
+ 		return NULL;
+ 
++	memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
++				 1, MEM_CGROUP_ID_MAX,
++				 GFP_KERNEL);
++	if (memcg->id.id < 0)
++		goto fail;
++
+ 	memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
+ 	if (!memcg->stat)
+ 		goto fail;
+@@ -4123,8 +4183,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
+ #ifdef CONFIG_CGROUP_WRITEBACK
+ 	INIT_LIST_HEAD(&memcg->cgwb_list);
+ #endif
++	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+ 	return memcg;
+ fail:
++	if (memcg->id.id > 0)
++		idr_remove(&mem_cgroup_idr, memcg->id.id);
+ 	mem_cgroup_free(memcg);
+ 	return NULL;
+ }
+@@ -4184,15 +4247,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+ 	return &memcg->css;
+ fail:
+ 	mem_cgroup_free(memcg);
+-	return NULL;
++	return ERR_PTR(-ENOMEM);
+ }
+ 
+-static int
+-mem_cgroup_css_online(struct cgroup_subsys_state *css)
++static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
+ {
+-	if (css->id > MEM_CGROUP_ID_MAX)
+-		return -ENOSPC;
+-
++	/* Online state pins memcg ID, memcg ID pins CSS */
++	mem_cgroup_id_get(mem_cgroup_from_css(css));
++	css_get(css);
+ 	return 0;
+ }
+ 
+@@ -4215,6 +4277,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
+ 
+ 	memcg_offline_kmem(memcg);
+ 	wb_memcg_offline(memcg);
++
++	mem_cgroup_id_put(memcg);
+ }
+ 
+ static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
+@@ -5524,6 +5588,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
+ 	struct mem_cgroup *memcg;
+ 	unsigned int nr_pages;
+ 	bool compound;
++	unsigned long flags;
+ 
+ 	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+ 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+@@ -5554,10 +5619,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
+ 
+ 	commit_charge(newpage, memcg, false);
+ 
+-	local_irq_disable();
++	local_irq_save(flags);
+ 	mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
+ 	memcg_check_events(memcg, newpage);
+-	local_irq_enable();
++	local_irq_restore(flags);
+ }
+ 
+ DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
+@@ -5735,6 +5800,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ 	if (!memcg)
+ 		return;
+ 
++	mem_cgroup_id_get(memcg);
+ 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ 	VM_BUG_ON_PAGE(oldid, page);
+ 	mem_cgroup_swap_statistics(memcg, true);
+@@ -5753,6 +5819,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ 	VM_BUG_ON(!irqs_disabled());
+ 	mem_cgroup_charge_statistics(memcg, page, false, -1);
+ 	memcg_check_events(memcg, page);
++
++	if (!mem_cgroup_is_root(memcg))
++		css_put(&memcg->css);
+ }
+ 
+ /*
+@@ -5783,11 +5852,11 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+ 	    !page_counter_try_charge(&memcg->swap, 1, &counter))
+ 		return -ENOMEM;
+ 
++	mem_cgroup_id_get(memcg);
+ 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ 	VM_BUG_ON_PAGE(oldid, page);
+ 	mem_cgroup_swap_statistics(memcg, true);
+ 
+-	css_get(&memcg->css);
+ 	return 0;
+ }
+ 
+@@ -5816,7 +5885,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
+ 				page_counter_uncharge(&memcg->memsw, 1);
+ 		}
+ 		mem_cgroup_swap_statistics(memcg, false);
+-		css_put(&memcg->css);
++		mem_cgroup_id_put(memcg);
+ 	}
+ 	rcu_read_unlock();
+ }
+diff --git a/mm/memory.c b/mm/memory.c
+index 07493e34ab7e..338cc5406fe0 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2397,8 +2397,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ 				 * Protected against the rmap code by
+ 				 * the page lock.
+ 				 */
+-				page_move_anon_rmap(compound_head(old_page),
+-						    vma, address);
++				page_move_anon_rmap(old_page, vma);
+ 			}
+ 			unlock_page(old_page);
+ 			return wp_page_reuse(mm, vma, address, page_table, ptl,
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 898fe3f909f9..ff50f5974d74 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -286,7 +286,9 @@ static inline void reset_deferred_meminit(pg_data_t *pgdat)
+ /* Returns true if the struct page for the pfn is uninitialised */
+ static inline bool __meminit early_page_uninitialised(unsigned long pfn)
+ {
+-	if (pfn >= NODE_DATA(early_pfn_to_nid(pfn))->first_deferred_pfn)
++	int nid = early_pfn_to_nid(pfn);
++
++	if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn)
+ 		return true;
+ 
+ 	return false;
+@@ -1110,7 +1112,7 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
+ 	spin_lock(&early_pfn_lock);
+ 	nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
+ 	if (nid < 0)
+-		nid = 0;
++		nid = first_online_node;
+ 	spin_unlock(&early_pfn_lock);
+ 
+ 	return nid;
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 3ebf9c4c2f4d..08fc03a31aaa 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1084,23 +1084,20 @@ EXPORT_SYMBOL_GPL(page_mkclean);
+  * page_move_anon_rmap - move a page to our anon_vma
+  * @page:	the page to move to our anon_vma
+  * @vma:	the vma the page belongs to
+- * @address:	the user virtual address mapped
+  *
+  * When a page belongs exclusively to one process after a COW event,
+  * that page can be moved into the anon_vma that belongs to just that
+  * process, so the rmap code will not search the parent or sibling
+  * processes.
+  */
+-void page_move_anon_rmap(struct page *page,
+-	struct vm_area_struct *vma, unsigned long address)
++void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
+ {
+ 	struct anon_vma *anon_vma = vma->anon_vma;
+ 
++	page = compound_head(page);
++
+ 	VM_BUG_ON_PAGE(!PageLocked(page), page);
+ 	VM_BUG_ON_VMA(!anon_vma, vma);
+-	if (IS_ENABLED(CONFIG_DEBUG_VM) && PageTransHuge(page))
+-		address &= HPAGE_PMD_MASK;
+-	VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
+ 
+ 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
+ 	/*
+diff --git a/mm/slab_common.c b/mm/slab_common.c
+index 3239bfd758e6..3ac9e661dc86 100644
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -526,8 +526,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
+ 		goto out_unlock;
+ 
+ 	cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
+-	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
+-			       css->id, memcg_name_buf);
++	cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
++			       css->serial_nr, memcg_name_buf);
+ 	if (!cache_name)
+ 		goto out_unlock;
+ 
+diff --git a/mm/swap.c b/mm/swap.c
+index 03aacbcb013f..374aa6e0c576 100644
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -239,7 +239,7 @@ void rotate_reclaimable_page(struct page *page)
+ 		get_page(page);
+ 		local_irq_save(flags);
+ 		pvec = this_cpu_ptr(&lru_rotate_pvecs);
+-		if (!pagevec_add(pvec, page))
++		if (!pagevec_add(pvec, page) || PageCompound(page))
+ 			pagevec_move_tail(pvec);
+ 		local_irq_restore(flags);
+ 	}
+@@ -295,7 +295,7 @@ void activate_page(struct page *page)
+ 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+ 
+ 		get_page(page);
+-		if (!pagevec_add(pvec, page))
++		if (!pagevec_add(pvec, page) || PageCompound(page))
+ 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
+ 		put_cpu_var(activate_page_pvecs);
+ 	}
+@@ -390,9 +390,8 @@ static void __lru_cache_add(struct page *page)
+ 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
+ 
+ 	get_page(page);
+-	if (!pagevec_space(pvec))
++	if (!pagevec_add(pvec, page) || PageCompound(page))
+ 		__pagevec_lru_add(pvec);
+-	pagevec_add(pvec, page);
+ 	put_cpu_var(lru_add_pvec);
+ }
+ 
+@@ -627,7 +626,7 @@ void deactivate_file_page(struct page *page)
+ 	if (likely(get_page_unless_zero(page))) {
+ 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
+ 
+-		if (!pagevec_add(pvec, page))
++		if (!pagevec_add(pvec, page) || PageCompound(page))
+ 			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
+ 		put_cpu_var(lru_deactivate_file_pvecs);
+ 	}
+@@ -647,7 +646,7 @@ void deactivate_page(struct page *page)
+ 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+ 
+ 		get_page(page);
+-		if (!pagevec_add(pvec, page))
++		if (!pagevec_add(pvec, page) || PageCompound(page))
+ 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+ 		put_cpu_var(lru_deactivate_pvecs);
+ 	}
+diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
+index 243574c8cf33..8a832fa80dde 100644
+--- a/net/ceph/osdmap.c
++++ b/net/ceph/osdmap.c
+@@ -1201,6 +1201,115 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
+ }
+ 
+ /*
++ * Encoding order is (new_up_client, new_state, new_weight).  Need to
++ * apply in the (new_weight, new_state, new_up_client) order, because
++ * an incremental map may look like e.g.
++ *
++ *     new_up_client: { osd=6, addr=... } # set osd_state and addr
++ *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
++ */
++static int decode_new_up_state_weight(void **p, void *end,
++				      struct ceph_osdmap *map)
++{
++	void *new_up_client;
++	void *new_state;
++	void *new_weight_end;
++	u32 len;
++
++	new_up_client = *p;
++	ceph_decode_32_safe(p, end, len, e_inval);
++	len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
++	ceph_decode_need(p, end, len, e_inval);
++	*p += len;
++
++	new_state = *p;
++	ceph_decode_32_safe(p, end, len, e_inval);
++	len *= sizeof(u32) + sizeof(u8);
++	ceph_decode_need(p, end, len, e_inval);
++	*p += len;
++
++	/* new_weight */
++	ceph_decode_32_safe(p, end, len, e_inval);
++	while (len--) {
++		s32 osd;
++		u32 w;
++
++		ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
++		osd = ceph_decode_32(p);
++		w = ceph_decode_32(p);
++		BUG_ON(osd >= map->max_osd);
++		pr_info("osd%d weight 0x%x %s\n", osd, w,
++		     w == CEPH_OSD_IN ? "(in)" :
++		     (w == CEPH_OSD_OUT ? "(out)" : ""));
++		map->osd_weight[osd] = w;
++
++		/*
++		 * If we are marking in, set the EXISTS, and clear the
++		 * AUTOOUT and NEW bits.
++		 */
++		if (w) {
++			map->osd_state[osd] |= CEPH_OSD_EXISTS;
++			map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
++						 CEPH_OSD_NEW);
++		}
++	}
++	new_weight_end = *p;
++
++	/* new_state (up/down) */
++	*p = new_state;
++	len = ceph_decode_32(p);
++	while (len--) {
++		s32 osd;
++		u8 xorstate;
++		int ret;
++
++		osd = ceph_decode_32(p);
++		xorstate = ceph_decode_8(p);
++		if (xorstate == 0)
++			xorstate = CEPH_OSD_UP;
++		BUG_ON(osd >= map->max_osd);
++		if ((map->osd_state[osd] & CEPH_OSD_UP) &&
++		    (xorstate & CEPH_OSD_UP))
++			pr_info("osd%d down\n", osd);
++		if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
++		    (xorstate & CEPH_OSD_EXISTS)) {
++			pr_info("osd%d does not exist\n", osd);
++			map->osd_weight[osd] = CEPH_OSD_IN;
++			ret = set_primary_affinity(map, osd,
++						   CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
++			if (ret)
++				return ret;
++			memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
++			map->osd_state[osd] = 0;
++		} else {
++			map->osd_state[osd] ^= xorstate;
++		}
++	}
++
++	/* new_up_client */
++	*p = new_up_client;
++	len = ceph_decode_32(p);
++	while (len--) {
++		s32 osd;
++		struct ceph_entity_addr addr;
++
++		osd = ceph_decode_32(p);
++		ceph_decode_copy(p, &addr, sizeof(addr));
++		ceph_decode_addr(&addr);
++		BUG_ON(osd >= map->max_osd);
++		pr_info("osd%d up\n", osd);
++		map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
++		map->osd_addr[osd] = addr;
++	}
++
++	*p = new_weight_end;
++	return 0;
++
++e_inval:
++	return -EINVAL;
++}
++
++/*
+  * decode and apply an incremental map update.
+  */
+ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+@@ -1299,49 +1408,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+ 			__remove_pg_pool(&map->pg_pools, pi);
+ 	}
+ 
+-	/* new_up */
+-	ceph_decode_32_safe(p, end, len, e_inval);
+-	while (len--) {
+-		u32 osd;
+-		struct ceph_entity_addr addr;
+-		ceph_decode_32_safe(p, end, osd, e_inval);
+-		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);
+-		ceph_decode_addr(&addr);
+-		pr_info("osd%d up\n", osd);
+-		BUG_ON(osd >= map->max_osd);
+-		map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS;
+-		map->osd_addr[osd] = addr;
+-	}
+-
+-	/* new_state */
+-	ceph_decode_32_safe(p, end, len, e_inval);
+-	while (len--) {
+-		u32 osd;
+-		u8 xorstate;
+-		ceph_decode_32_safe(p, end, osd, e_inval);
+-		xorstate = **(u8 **)p;
+-		(*p)++;  /* clean flag */
+-		if (xorstate == 0)
+-			xorstate = CEPH_OSD_UP;
+-		if (xorstate & CEPH_OSD_UP)
+-			pr_info("osd%d down\n", osd);
+-		if (osd < map->max_osd)
+-			map->osd_state[osd] ^= xorstate;
+-	}
+-
+-	/* new_weight */
+-	ceph_decode_32_safe(p, end, len, e_inval);
+-	while (len--) {
+-		u32 osd, off;
+-		ceph_decode_need(p, end, sizeof(u32)*2, e_inval);
+-		osd = ceph_decode_32(p);
+-		off = ceph_decode_32(p);
+-		pr_info("osd%d weight 0x%x %s\n", osd, off,
+-		     off == CEPH_OSD_IN ? "(in)" :
+-		     (off == CEPH_OSD_OUT ? "(out)" : ""));
+-		if (osd < map->max_osd)
+-			map->osd_weight[osd] = off;
+-	}
++	/* new_up_client, new_state, new_weight */
++	err = decode_new_up_state_weight(p, end, map);
++	if (err)
++		goto bad;
+ 
+ 	/* new_pg_temp */
+ 	err = decode_new_pg_temp(p, end, map);
+diff --git a/net/rds/tcp.c b/net/rds/tcp.c
+index 86187dad1440..f8d94f4042ef 100644
+--- a/net/rds/tcp.c
++++ b/net/rds/tcp.c
+@@ -544,7 +544,7 @@ static int rds_tcp_init(void)
+ 
+ 	ret = rds_tcp_recv_init();
+ 	if (ret)
+-		goto out_slab;
++		goto out_pernet;
+ 
+ 	ret = rds_trans_register(&rds_tcp_transport);
+ 	if (ret)
+@@ -556,8 +556,9 @@ static int rds_tcp_init(void)
+ 
+ out_recv:
+ 	rds_tcp_recv_exit();
+-out_slab:
++out_pernet:
+ 	unregister_pernet_subsys(&rds_tcp_net_ops);
++out_slab:
+ 	kmem_cache_destroy(rds_tcp_conn_slab);
+ out:
+ 	return ret;
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 47b917841623..29b145ed3338 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -721,6 +721,8 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
+ 	 * alignment since sizeof(struct ethhdr) is 14.
+ 	 */
+ 	frame = dev_alloc_skb(hlen + sizeof(struct ethhdr) + 2 + cur_len);
++	if (!frame)
++		return NULL;
+ 
+ 	skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2);
+ 	skb_copy_bits(skb, offset, skb_put(frame, cur_len), cur_len);
+diff --git a/sound/core/timer.c b/sound/core/timer.c
+index 23b73f6ac040..98378539fc59 100644
+--- a/sound/core/timer.c
++++ b/sound/core/timer.c
+@@ -1225,6 +1225,7 @@ static void snd_timer_user_ccallback(struct snd_timer_instance *timeri,
+ 		tu->tstamp = *tstamp;
+ 	if ((tu->filter & (1 << event)) == 0 || !tu->tread)
+ 		return;
++	memset(&r1, 0, sizeof(r1));
+ 	r1.event = event;
+ 	r1.tstamp = *tstamp;
+ 	r1.val = resolution;
+@@ -1267,6 +1268,7 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri,
+ 	}
+ 	if ((tu->filter & (1 << SNDRV_TIMER_EVENT_RESOLUTION)) &&
+ 	    tu->last_resolution != resolution) {
++		memset(&r1, 0, sizeof(r1));
+ 		r1.event = SNDRV_TIMER_EVENT_RESOLUTION;
+ 		r1.tstamp = tstamp;
+ 		r1.val = resolution;
+@@ -1739,6 +1741,7 @@ static int snd_timer_user_params(struct file *file,
+ 	if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) {
+ 		if (tu->tread) {
+ 			struct snd_timer_tread tread;
++			memset(&tread, 0, sizeof(tread));
+ 			tread.event = SNDRV_TIMER_EVENT_EARLY;
+ 			tread.tstamp.tv_sec = 0;
+ 			tread.tstamp.tv_nsec = 0;


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-08-12 11:24 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-08-12 11:24 UTC (permalink / raw
  To: gentoo-commits

commit:     adac5d24f7d9814142c94318ccb82b69e9d91a3c
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Fri Aug 12 11:24:28 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Fri Aug 12 11:24:28 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=adac5d24

Rename one of the BFQ patches. See bug #591078.

 0000_README                                                             | 2 +-
 ... => 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1 | 0
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/0000_README b/0000_README
index 1bb262e..a0547ab 100644
--- a/0000_README
+++ b/0000_README
@@ -107,7 +107,7 @@ Patch:  5003_block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for-4.6.patch
 From:   http://algo.ing.unimo.it/people/paolo/disk_sched/
 Desc:   BFQ v7r11 patch 3 for 4.6: Early Queue Merge (EQM)
 
-Patch:  5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2
+Patch:  5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1
 From:   http://algo.ing.unimo.it/people/paolo/disk_sched/
 Desc:   BFQ v7r11 patch 4 for 4.7: Early Queue Merge (EQM)
 

diff --git a/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2 b/5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1
similarity index 100%
rename from 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch2
rename to 5004_blkck-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8-for-4.patch1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [gentoo-commits] proj/linux-patches:4.6 commit in: /
@ 2016-08-16 23:53 Mike Pagano
  0 siblings, 0 replies; 12+ messages in thread
From: Mike Pagano @ 2016-08-16 23:53 UTC (permalink / raw
  To: gentoo-commits

commit:     df00160fd19ac3ffbd1887ee9bdc64384f4f8421
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Tue Aug 16 23:53:09 2016 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Tue Aug 16 23:53:09 2016 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=df00160f

Linux patch 4.6.7

 0000_README            |    4 +
 1006_linux-4.6.7.patch | 2184 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 2188 insertions(+)

diff --git a/0000_README b/0000_README
index a0547ab..90202fe 100644
--- a/0000_README
+++ b/0000_README
@@ -67,6 +67,10 @@ Patch:  1005_linux-4.6.6.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.6.6
 
+Patch:  1006_linux-4.6.7.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.6.7
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1006_linux-4.6.7.patch b/1006_linux-4.6.7.patch
new file mode 100644
index 0000000..0bd4bce
--- /dev/null
+++ b/1006_linux-4.6.7.patch
@@ -0,0 +1,2184 @@
+diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt
+index 0a94224ad296..9e3c3b33514c 100644
+--- a/Documentation/cpu-freq/pcc-cpufreq.txt
++++ b/Documentation/cpu-freq/pcc-cpufreq.txt
+@@ -159,8 +159,8 @@ to be strictly associated with a P-state.
+ 
+ 2.2 cpuinfo_transition_latency:
+ -------------------------------
+-The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
+-does not include a field to expose this value currently.
++The cpuinfo_transition_latency field is 0. The PCC specification does
++not include a field to expose this value currently.
+ 
+ 2.3 cpuinfo_cur_freq:
+ ---------------------
+diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
+index 54944c71b819..2a4ee6302122 100644
+--- a/Documentation/x86/pat.txt
++++ b/Documentation/x86/pat.txt
+@@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
+ "debugpat" boot parameter. With this parameter, various debug messages are
+ printed to dmesg log.
+ 
++PAT Initialization
++------------------
++
++The following table describes how PAT is initialized under various
++configurations. The PAT MSR must be updated by Linux in order to support WC
++and WT attributes. Otherwise, the PAT MSR has the value programmed in it
++by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
++
++ MTRR PAT   Call Sequence               PAT State  PAT MSR
++ =========================================================
++ E    E     MTRR -> PAT init            Enabled    OS
++ E    D     MTRR -> PAT init            Disabled    -
++ D    E     MTRR -> PAT disable         Disabled   BIOS
++ D    D     MTRR -> PAT disable         Disabled    -
++ -    np/E  PAT  -> PAT disable         Disabled   BIOS
++ -    np/D  PAT  -> PAT disable         Disabled    -
++ E    !P/E  MTRR -> PAT init            Disabled   BIOS
++ D    !P/E  MTRR -> PAT disable         Disabled   BIOS
++ !M   !P/E  MTRR stub -> PAT disable    Disabled   BIOS
++
++ Legend
++ ------------------------------------------------
++ E         Feature enabled in CPU
++ D	   Feature disabled/unsupported in CPU
++ np	   "nopat" boot option specified
++ !P	   CONFIG_X86_PAT option unset
++ !M	   CONFIG_MTRR option unset
++ Enabled   PAT state set to enabled
++ Disabled  PAT state set to disabled
++ OS        PAT initializes PAT MSR with OS setting
++ BIOS      PAT keeps PAT MSR with BIOS setting
++
+diff --git a/Makefile b/Makefile
+index bee1a1692fed..5fe9a7a9cd65 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 6
+-SUBLEVEL = 6
++SUBLEVEL = 7
+ EXTRAVERSION =
+ NAME = Charred Weasel
+ 
+diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
+index 087acb569b63..5f221acd21ae 100644
+--- a/arch/arm/kernel/sys_oabi-compat.c
++++ b/arch/arm/kernel/sys_oabi-compat.c
+@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
+ 	mm_segment_t fs;
+ 	long ret, err, i;
+ 
+-	if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
++	if (maxevents <= 0 ||
++			maxevents > (INT_MAX/sizeof(*kbuf)) ||
++			maxevents > (INT_MAX/sizeof(*events)))
+ 		return -EINVAL;
++	if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
++		return -EFAULT;
+ 	kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
+ 	if (!kbuf)
+ 		return -ENOMEM;
+@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
+ 
+ 	if (nsops < 1 || nsops > SEMOPM)
+ 		return -EINVAL;
++	if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
++		return -EFAULT;
+ 	sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
+ 	if (!sops)
+ 		return -ENOMEM;
+diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
+index 71f99d5f7a06..6021318bfbb0 100644
+--- a/arch/mips/kernel/scall64-n32.S
++++ b/arch/mips/kernel/scall64-n32.S
+@@ -344,7 +344,7 @@ EXPORT(sysn32_call_table)
+ 	PTR	sys_ni_syscall			/* available, was setaltroot */
+ 	PTR	sys_add_key
+ 	PTR	sys_request_key
+-	PTR	sys_keyctl			/* 6245 */
++	PTR	compat_sys_keyctl		/* 6245 */
+ 	PTR	sys_set_thread_area
+ 	PTR	sys_inotify_init
+ 	PTR	sys_inotify_add_watch
+diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
+index 91b43eea2d5a..71fe3259a5e3 100644
+--- a/arch/mips/kernel/scall64-o32.S
++++ b/arch/mips/kernel/scall64-o32.S
+@@ -500,7 +500,7 @@ EXPORT(sys32_call_table)
+ 	PTR	sys_ni_syscall			/* available, was setaltroot */
+ 	PTR	sys_add_key			/* 4280 */
+ 	PTR	sys_request_key
+-	PTR	sys_keyctl
++	PTR	compat_sys_keyctl
+ 	PTR	sys_set_thread_area
+ 	PTR	sys_inotify_init
+ 	PTR	sys_inotify_add_watch		/* 4285 */
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index 7cd32c038286..4e1b060b8481 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -126,7 +126,7 @@ config PPC
+ 	select IRQ_FORCED_THREADING
+ 	select HAVE_RCU_TABLE_FREE if SMP
+ 	select HAVE_SYSCALL_TRACEPOINTS
+-	select HAVE_BPF_JIT
++	select HAVE_BPF_JIT if CPU_BIG_ENDIAN
+ 	select HAVE_ARCH_JUMP_LABEL
+ 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ 	select ARCH_HAS_GCOV_PROFILE_ALL
+diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
+index 31e4c7e1a4b4..c42627645b54 100644
+--- a/arch/powerpc/kernel/eeh_driver.c
++++ b/arch/powerpc/kernel/eeh_driver.c
+@@ -648,7 +648,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
+ 		if (pe->type & EEH_PE_VF) {
+ 			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ 		} else {
+-			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ 			pci_lock_rescan_remove();
+ 			pcibios_remove_pci_devices(bus);
+ 			pci_unlock_rescan_remove();
+@@ -698,10 +697,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
+ 		 */
+ 		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+ 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+-		if (pe->type & EEH_PE_VF)
++		if (pe->type & EEH_PE_VF) {
+ 			eeh_add_virt_device(edev, NULL);
+-		else
++		} else {
++			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ 			pcibios_add_pci_devices(bus);
++		}
+ 	} else if (frozen_bus && rmv_data->removed) {
+ 		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
+ 		ssleep(5);
+diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
+index 4cddd17153fb..f848572169ea 100644
+--- a/arch/x86/entry/syscalls/syscall_32.tbl
++++ b/arch/x86/entry/syscalls/syscall_32.tbl
+@@ -294,7 +294,7 @@
+ # 285 sys_setaltroot
+ 286	i386	add_key			sys_add_key
+ 287	i386	request_key		sys_request_key
+-288	i386	keyctl			sys_keyctl
++288	i386	keyctl			sys_keyctl			compat_sys_keyctl
+ 289	i386	ioprio_set		sys_ioprio_set
+ 290	i386	ioprio_get		sys_ioprio_get
+ 291	i386	inotify_init		sys_inotify_init
+diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
+index 9d3a96c4da78..01c2d14ec05f 100644
+--- a/arch/x86/include/asm/microcode.h
++++ b/arch/x86/include/asm/microcode.h
+@@ -133,13 +133,11 @@ static inline unsigned int x86_cpuid_family(void)
+ #ifdef CONFIG_MICROCODE
+ extern void __init load_ucode_bsp(void);
+ extern void load_ucode_ap(void);
+-extern int __init save_microcode_in_initrd(void);
+ void reload_early_microcode(void);
+ extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
+ #else
+ static inline void __init load_ucode_bsp(void)			{ }
+ static inline void load_ucode_ap(void)				{ }
+-static inline int __init save_microcode_in_initrd(void)		{ return 0; }
+ static inline void reload_early_microcode(void)			{ }
+ static inline bool
+ get_builtin_firmware(struct cpio_data *cd, const char *name)	{ return false; }
+diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
+index b94f6f64e23d..dbff1456d215 100644
+--- a/arch/x86/include/asm/mtrr.h
++++ b/arch/x86/include/asm/mtrr.h
+@@ -24,6 +24,7 @@
+ #define _ASM_X86_MTRR_H
+ 
+ #include <uapi/asm/mtrr.h>
++#include <asm/pat.h>
+ 
+ 
+ /*
+@@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
+ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
+ {
+ }
++static inline void mtrr_bp_init(void)
++{
++	pat_disable("MTRRs disabled, skipping PAT initialization too.");
++}
+ 
+ #define mtrr_ap_init() do {} while (0)
+-#define mtrr_bp_init() do {} while (0)
+ #define set_mtrr_aps_delayed_init() do {} while (0)
+ #define mtrr_aps_init() do {} while (0)
+ #define mtrr_bp_restore() do {} while (0)
+diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
+index ca6c228d5e62..0b1ff4c1c14e 100644
+--- a/arch/x86/include/asm/pat.h
++++ b/arch/x86/include/asm/pat.h
+@@ -5,8 +5,8 @@
+ #include <asm/pgtable_types.h>
+ 
+ bool pat_enabled(void);
++void pat_disable(const char *reason);
+ extern void pat_init(void);
+-void pat_init_cache_modes(u64);
+ 
+ extern int reserve_memtype(u64 start, u64 end,
+ 		enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
+index ac360bfbbdb6..12823b6ebd6d 100644
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -175,7 +175,7 @@ void load_ucode_ap(void)
+ 	}
+ }
+ 
+-int __init save_microcode_in_initrd(void)
++static int __init save_microcode_in_initrd(void)
+ {
+ 	struct cpuinfo_x86 *c = &boot_cpu_data;
+ 
+@@ -691,4 +691,5 @@ int __init microcode_init(void)
+ 	return error;
+ 
+ }
++fs_initcall(save_microcode_in_initrd);
+ late_initcall(microcode_init);
+diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
+index 19f57360dfd2..8d7a29ed9377 100644
+--- a/arch/x86/kernel/cpu/mtrr/generic.c
++++ b/arch/x86/kernel/cpu/mtrr/generic.c
+@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
+ 		pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
+ }
+ 
++/* PAT setup for BP. We need to go through sync steps here */
++void __init mtrr_bp_pat_init(void)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	prepare_set();
++
++	pat_init();
++
++	post_set();
++	local_irq_restore(flags);
++}
++
+ /* Grab all of the MTRR state for this CPU into *state */
+ bool __init get_mtrr_state(void)
+ {
+ 	struct mtrr_var_range *vrs;
+-	unsigned long flags;
+ 	unsigned lo, dummy;
+ 	unsigned int i;
+ 
+@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
+ 
+ 	mtrr_state_set = 1;
+ 
+-	/* PAT setup for BP. We need to go through sync steps here */
+-	local_irq_save(flags);
+-	prepare_set();
+-
+-	pat_init();
+-
+-	post_set();
+-	local_irq_restore(flags);
+-
+ 	return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
+ }
+ 
+diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
+index 10f8d4796240..7d393ecdeee6 100644
+--- a/arch/x86/kernel/cpu/mtrr/main.c
++++ b/arch/x86/kernel/cpu/mtrr/main.c
+@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
+ 			/* BIOS may override */
+ 			__mtrr_enabled = get_mtrr_state();
+ 
++			if (mtrr_enabled())
++				mtrr_bp_pat_init();
++
+ 			if (mtrr_cleanup(phys_addr)) {
+ 				changed_by_mtrr_cleanup = 1;
+ 				mtrr_if->set_all();
+@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
+ 		}
+ 	}
+ 
+-	if (!mtrr_enabled())
++	if (!mtrr_enabled()) {
+ 		pr_info("MTRR: Disabled\n");
++
++		/*
++		 * PAT initialization relies on MTRR's rendezvous handler.
++		 * Skip PAT init until the handler can initialize both
++		 * features independently.
++		 */
++		pat_disable("MTRRs disabled, skipping PAT initialization too.");
++	}
+ }
+ 
+ void mtrr_ap_init(void)
+diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
+index 951884dcc433..6c7ced07d16d 100644
+--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
++++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
+@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
+ void fill_mtrr_var_range(unsigned int index,
+ 		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
+ bool get_mtrr_state(void);
++void mtrr_bp_pat_init(void);
+ 
+ extern void set_mtrr_ops(const struct mtrr_ops *ops);
+ 
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 9d56f271d519..6df291c2987c 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -696,13 +696,6 @@ void free_initmem(void)
+ void __init free_initrd_mem(unsigned long start, unsigned long end)
+ {
+ 	/*
+-	 * Remember, initrd memory may contain microcode or other useful things.
+-	 * Before we lose initrd mem, we need to find a place to hold them
+-	 * now that normal virtual memory is enabled.
+-	 */
+-	save_microcode_in_initrd();
+-
+-	/*
+ 	 * end could be not aligned, and We can not align that,
+ 	 * decompresser could be confused by aligned initrd_end
+ 	 * We already reserve the end partial page before in
+diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
+index faec01e7a17d..fb0604f11eec 100644
+--- a/arch/x86/mm/pat.c
++++ b/arch/x86/mm/pat.c
+@@ -40,11 +40,22 @@
+ static bool boot_cpu_done;
+ 
+ static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
++static void init_cache_modes(void);
+ 
+-static inline void pat_disable(const char *reason)
++void pat_disable(const char *reason)
+ {
++	if (!__pat_enabled)
++		return;
++
++	if (boot_cpu_done) {
++		WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
++		return;
++	}
++
+ 	__pat_enabled = 0;
+ 	pr_info("x86/PAT: %s\n", reason);
++
++	init_cache_modes();
+ }
+ 
+ static int __init nopat(char *str)
+@@ -181,7 +192,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
+  * configuration.
+  * Using lower indices is preferred, so we start with highest index.
+  */
+-void pat_init_cache_modes(u64 pat)
++static void __init_cache_modes(u64 pat)
+ {
+ 	enum page_cache_mode cache;
+ 	char pat_msg[33];
+@@ -202,14 +213,11 @@ static void pat_bsp_init(u64 pat)
+ {
+ 	u64 tmp_pat;
+ 
+-	if (!cpu_has_pat) {
++	if (!boot_cpu_has(X86_FEATURE_PAT)) {
+ 		pat_disable("PAT not supported by CPU.");
+ 		return;
+ 	}
+ 
+-	if (!pat_enabled())
+-		goto done;
+-
+ 	rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
+ 	if (!tmp_pat) {
+ 		pat_disable("PAT MSR is 0, disabled.");
+@@ -218,16 +226,12 @@ static void pat_bsp_init(u64 pat)
+ 
+ 	wrmsrl(MSR_IA32_CR_PAT, pat);
+ 
+-done:
+-	pat_init_cache_modes(pat);
++	__init_cache_modes(pat);
+ }
+ 
+ static void pat_ap_init(u64 pat)
+ {
+-	if (!pat_enabled())
+-		return;
+-
+-	if (!cpu_has_pat) {
++	if (!boot_cpu_has(X86_FEATURE_PAT)) {
+ 		/*
+ 		 * If this happens we are on a secondary CPU, but switched to
+ 		 * PAT on the boot CPU. We have no way to undo PAT.
+@@ -238,18 +242,32 @@ static void pat_ap_init(u64 pat)
+ 	wrmsrl(MSR_IA32_CR_PAT, pat);
+ }
+ 
+-void pat_init(void)
++static void init_cache_modes(void)
+ {
+-	u64 pat;
+-	struct cpuinfo_x86 *c = &boot_cpu_data;
++	u64 pat = 0;
++	static int init_cm_done;
+ 
+-	if (!pat_enabled()) {
++	if (init_cm_done)
++		return;
++
++	if (boot_cpu_has(X86_FEATURE_PAT)) {
++		/*
++		 * CPU supports PAT. Set PAT table to be consistent with
++		 * PAT MSR. This case supports "nopat" boot option, and
++		 * virtual machine environments which support PAT without
++		 * MTRRs. In specific, Xen has unique setup to PAT MSR.
++		 *
++		 * If PAT MSR returns 0, it is considered invalid and emulates
++		 * as No PAT.
++		 */
++		rdmsrl(MSR_IA32_CR_PAT, pat);
++	}
++
++	if (!pat) {
+ 		/*
+ 		 * No PAT. Emulate the PAT table that corresponds to the two
+-		 * cache bits, PWT (Write Through) and PCD (Cache Disable). This
+-		 * setup is the same as the BIOS default setup when the system
+-		 * has PAT but the "nopat" boot option has been specified. This
+-		 * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
++		 * cache bits, PWT (Write Through) and PCD (Cache Disable).
++		 * This setup is also the same as the BIOS default setup.
+ 		 *
+ 		 * PTE encoding:
+ 		 *
+@@ -266,10 +284,36 @@ void pat_init(void)
+ 		 */
+ 		pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ 		      PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
++	}
++
++	__init_cache_modes(pat);
++
++	init_cm_done = 1;
++}
++
++/**
++ * pat_init - Initialize PAT MSR and PAT table
++ *
++ * This function initializes PAT MSR and PAT table with an OS-defined value
++ * to enable additional cache attributes, WC and WT.
++ *
++ * This function must be called on all CPUs using the specific sequence of
++ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
++ * procedure for PAT.
++ */
++void pat_init(void)
++{
++	u64 pat;
++	struct cpuinfo_x86 *c = &boot_cpu_data;
++
++	if (!pat_enabled()) {
++		init_cache_modes();
++		return;
++	}
+ 
+-	} else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+-		   (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+-		    ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
++	if ((c->x86_vendor == X86_VENDOR_INTEL) &&
++	    (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
++	     ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ 		/*
+ 		 * PAT support with the lower four entries. Intel Pentium 2,
+ 		 * 3, M, and 4 are affected by PAT errata, which makes the
+@@ -734,25 +778,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+ 	if (file->f_flags & O_DSYNC)
+ 		pcm = _PAGE_CACHE_MODE_UC_MINUS;
+ 
+-#ifdef CONFIG_X86_32
+-	/*
+-	 * On the PPro and successors, the MTRRs are used to set
+-	 * memory types for physical addresses outside main memory,
+-	 * so blindly setting UC or PWT on those pages is wrong.
+-	 * For Pentiums and earlier, the surround logic should disable
+-	 * caching for the high addresses through the KEN pin, but
+-	 * we maintain the tradition of paranoia in this code.
+-	 */
+-	if (!pat_enabled() &&
+-	    !(boot_cpu_has(X86_FEATURE_MTRR) ||
+-	      boot_cpu_has(X86_FEATURE_K6_MTRR) ||
+-	      boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
+-	      boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
+-	    (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
+-		pcm = _PAGE_CACHE_MODE_UC;
+-	}
+-#endif
+-
+ 	*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
+ 			     cachemode2protval(pcm));
+ 	return 1;
+diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
+index 4400a43b9e28..f2a990285a5c 100644
+--- a/arch/x86/power/hibernate_asm_64.S
++++ b/arch/x86/power/hibernate_asm_64.S
+@@ -24,7 +24,6 @@
+ #include <asm/frame.h>
+ 
+ ENTRY(swsusp_arch_suspend)
+-	FRAME_BEGIN
+ 	movq	$saved_context, %rax
+ 	movq	%rsp, pt_regs_sp(%rax)
+ 	movq	%rbp, pt_regs_bp(%rax)
+@@ -51,6 +50,7 @@ ENTRY(swsusp_arch_suspend)
+ 	movq	%cr3, %rax
+ 	movq	%rax, restore_cr3(%rip)
+ 
++	FRAME_BEGIN
+ 	call swsusp_save
+ 	FRAME_END
+ 	ret
+@@ -111,7 +111,6 @@ ENTRY(core_restore_code)
+ 	 */
+ 
+ ENTRY(restore_registers)
+-	FRAME_BEGIN
+ 	/* go back to the original page tables */
+ 	movq    %rbx, %cr3
+ 
+@@ -152,6 +151,5 @@ ENTRY(restore_registers)
+ 	/* tell the hibernation core that we've just restored the memory */
+ 	movq	%rax, in_suspend(%rip)
+ 
+-	FRAME_END
+ 	ret
+ ENDPROC(restore_registers)
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index 880862c7d9dd..d8cca75e3b3e 100644
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -75,7 +75,6 @@
+ #include <asm/mach_traps.h>
+ #include <asm/mwait.h>
+ #include <asm/pci_x86.h>
+-#include <asm/pat.h>
+ #include <asm/cpu.h>
+ 
+ #ifdef CONFIG_ACPI
+@@ -1511,7 +1510,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
+ {
+ 	struct physdev_set_iopl set_iopl;
+ 	unsigned long initrd_start = 0;
+-	u64 pat;
+ 	int rc;
+ 
+ 	if (!xen_start_info)
+@@ -1618,13 +1616,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
+ 				   xen_start_info->nr_pages);
+ 	xen_reserve_special_pages();
+ 
+-	/*
+-	 * Modify the cache mode translation tables to match Xen's PAT
+-	 * configuration.
+-	 */
+-	rdmsrl(MSR_IA32_CR_PAT, pat);
+-	pat_init_cache_modes(pat);
+-
+ 	/* keep using Xen gdt for now; no urgent need to change it */
+ 
+ #ifdef CONFIG_X86_32
+diff --git a/block/genhd.c b/block/genhd.c
+index 9f42526b4d62..3eebd256b765 100644
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -856,6 +856,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v)
+ 	if (iter) {
+ 		class_dev_iter_exit(iter);
+ 		kfree(iter);
++		seqf->private = NULL;
+ 	}
+ }
+ 
+diff --git a/crypto/gcm.c b/crypto/gcm.c
+index bec329b3de8d..d9ea5f9c0574 100644
+--- a/crypto/gcm.c
++++ b/crypto/gcm.c
+@@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
+ 
+ 	ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
+ 				    CRYPTO_ALG_TYPE_HASH,
+-				    CRYPTO_ALG_TYPE_AHASH_MASK);
++				    CRYPTO_ALG_TYPE_AHASH_MASK |
++				    crypto_requires_sync(algt->type,
++							 algt->mask));
+ 	if (IS_ERR(ghash_alg))
+ 		return PTR_ERR(ghash_alg);
+ 
+diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
+index ea5815c5e128..bc769c448d4a 100644
+--- a/crypto/scatterwalk.c
++++ b/crypto/scatterwalk.c
+@@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
+ 
+ void scatterwalk_done(struct scatter_walk *walk, int out, int more)
+ {
+-	if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
++	if (!more || walk->offset >= walk->sg->offset + walk->sg->length ||
++	    !(walk->offset & (PAGE_SIZE - 1)))
+ 		scatterwalk_pagedone(walk, out, more);
+ }
+ EXPORT_SYMBOL_GPL(scatterwalk_done);
+diff --git a/drivers/char/random.c b/drivers/char/random.c
+index b583e5336630..e511f34be177 100644
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -722,15 +722,18 @@ retry:
+ 	}
+ }
+ 
+-static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
++static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
+ {
+ 	const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));
+ 
++	if (nbits < 0)
++		return -EINVAL;
++
+ 	/* Cap the value to avoid overflows */
+ 	nbits = min(nbits,  nbits_max);
+-	nbits = max(nbits, -nbits_max);
+ 
+ 	credit_entropy_bits(r, nbits);
++	return 0;
+ }
+ 
+ /*********************************************************************
+@@ -1542,8 +1545,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ 			return -EPERM;
+ 		if (get_user(ent_count, p))
+ 			return -EFAULT;
+-		credit_entropy_bits_safe(&input_pool, ent_count);
+-		return 0;
++		return credit_entropy_bits_safe(&input_pool, ent_count);
+ 	case RNDADDENTROPY:
+ 		if (!capable(CAP_SYS_ADMIN))
+ 			return -EPERM;
+@@ -1557,8 +1559,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+ 				    size);
+ 		if (retval < 0)
+ 			return retval;
+-		credit_entropy_bits_safe(&input_pool, ent_count);
+-		return 0;
++		return credit_entropy_bits_safe(&input_pool, ent_count);
+ 	case RNDZAPENTCNT:
+ 	case RNDCLEARPOOL:
+ 		/*
+diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
+index 808a320e9d5d..2a0d58959acf 100644
+--- a/drivers/cpufreq/pcc-cpufreq.c
++++ b/drivers/cpufreq/pcc-cpufreq.c
+@@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
+ 	policy->min = policy->cpuinfo.min_freq =
+ 		ioread32(&pcch_hdr->minimum_frequency) * 1000;
+ 
+-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+-
+ 	pr_debug("init: policy->max is %d, policy->min is %d\n",
+ 		policy->max, policy->min);
+ out:
+diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
+index 585a3b7915bd..f1b0eafecd0d 100644
+--- a/drivers/i2c/busses/i2c-i801.c
++++ b/drivers/i2c/busses/i2c-i801.c
+@@ -244,6 +244,13 @@ struct i801_priv {
+ 	struct platform_device *mux_pdev;
+ #endif
+ 	struct platform_device *tco_pdev;
++
++	/*
++	 * If set to true the host controller registers are reserved for
++	 * ACPI AML use. Protected by acpi_lock.
++	 */
++	bool acpi_reserved;
++	struct mutex acpi_lock;
+ };
+ 
+ #define FEATURE_SMBUS_PEC	(1 << 0)
+@@ -714,9 +721,15 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
+ {
+ 	int hwpec;
+ 	int block = 0;
+-	int ret, xact = 0;
++	int ret = 0, xact = 0;
+ 	struct i801_priv *priv = i2c_get_adapdata(adap);
+ 
++	mutex_lock(&priv->acpi_lock);
++	if (priv->acpi_reserved) {
++		mutex_unlock(&priv->acpi_lock);
++		return -EBUSY;
++	}
++
+ 	hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
+ 		&& size != I2C_SMBUS_QUICK
+ 		&& size != I2C_SMBUS_I2C_BLOCK_DATA;
+@@ -773,7 +786,8 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
+ 	default:
+ 		dev_err(&priv->pci_dev->dev, "Unsupported transaction %d\n",
+ 			size);
+-		return -EOPNOTSUPP;
++		ret = -EOPNOTSUPP;
++		goto out;
+ 	}
+ 
+ 	if (hwpec)	/* enable/disable hardware PEC */
+@@ -796,11 +810,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
+ 		       ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));
+ 
+ 	if (block)
+-		return ret;
++		goto out;
+ 	if (ret)
+-		return ret;
++		goto out;
+ 	if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK))
+-		return 0;
++		goto out;
+ 
+ 	switch (xact & 0x7f) {
+ 	case I801_BYTE:	/* Result put in SMBHSTDAT0 */
+@@ -812,7 +826,10 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
+ 			     (inb_p(SMBHSTDAT1(priv)) << 8);
+ 		break;
+ 	}
+-	return 0;
++
++out:
++	mutex_unlock(&priv->acpi_lock);
++	return ret;
+ }
+ 
+ 
+@@ -1249,6 +1266,72 @@ static void i801_add_tco(struct i801_priv *priv)
+ 	priv->tco_pdev = pdev;
+ }
+ 
++#ifdef CONFIG_ACPI
++static acpi_status
++i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
++		     u64 *value, void *handler_context, void *region_context)
++{
++	struct i801_priv *priv = handler_context;
++	struct pci_dev *pdev = priv->pci_dev;
++	acpi_status status;
++
++	/*
++	 * Once BIOS AML code touches the OpRegion we warn and inhibit any
++	 * further access from the driver itself. This device is now owned
++	 * by the system firmware.
++	 */
++	mutex_lock(&priv->acpi_lock);
++
++	if (!priv->acpi_reserved) {
++		priv->acpi_reserved = true;
++
++		dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
++		dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
++	}
++
++	if ((function & ACPI_IO_MASK) == ACPI_READ)
++		status = acpi_os_read_port(address, (u32 *)value, bits);
++	else
++		status = acpi_os_write_port(address, (u32)*value, bits);
++
++	mutex_unlock(&priv->acpi_lock);
++
++	return status;
++}
++
++static int i801_acpi_probe(struct i801_priv *priv)
++{
++	struct acpi_device *adev;
++	acpi_status status;
++
++	adev = ACPI_COMPANION(&priv->pci_dev->dev);
++	if (adev) {
++		status = acpi_install_address_space_handler(adev->handle,
++				ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler,
++				NULL, priv);
++		if (ACPI_SUCCESS(status))
++			return 0;
++	}
++
++	return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]);
++}
++
++static void i801_acpi_remove(struct i801_priv *priv)
++{
++	struct acpi_device *adev;
++
++	adev = ACPI_COMPANION(&priv->pci_dev->dev);
++	if (!adev)
++		return;
++
++	acpi_remove_address_space_handler(adev->handle,
++		ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
++}
++#else
++static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
++static inline void i801_acpi_remove(struct i801_priv *priv) { }
++#endif
++
+ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ {
+ 	unsigned char temp;
+@@ -1266,6 +1349,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ 	priv->adapter.dev.parent = &dev->dev;
+ 	ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev));
+ 	priv->adapter.retries = 3;
++	mutex_init(&priv->acpi_lock);
+ 
+ 	priv->pci_dev = dev;
+ 	switch (dev->device) {
+@@ -1328,10 +1412,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ 		return -ENODEV;
+ 	}
+ 
+-	err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
+-	if (err) {
++	if (i801_acpi_probe(priv))
+ 		return -ENODEV;
+-	}
+ 
+ 	err = pcim_iomap_regions(dev, 1 << SMBBAR,
+ 				 dev_driver_string(&dev->dev));
+@@ -1340,6 +1422,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ 			"Failed to request SMBus region 0x%lx-0x%Lx\n",
+ 			priv->smba,
+ 			(unsigned long long)pci_resource_end(dev, SMBBAR));
++		i801_acpi_remove(priv);
+ 		return err;
+ 	}
+ 
+@@ -1404,6 +1487,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ 	err = i2c_add_adapter(&priv->adapter);
+ 	if (err) {
+ 		dev_err(&dev->dev, "Failed to add SMBus adapter\n");
++		i801_acpi_remove(priv);
+ 		return err;
+ 	}
+ 
+@@ -1422,6 +1506,7 @@ static void i801_remove(struct pci_dev *dev)
+ 
+ 	i801_del_mux(priv);
+ 	i2c_del_adapter(&priv->adapter);
++	i801_acpi_remove(priv);
+ 	pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
+ 
+ 	platform_device_unregister(priv->tco_pdev);
+diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
+index db760e84119f..b8df0f5e8c25 100644
+--- a/drivers/net/bonding/bond_netlink.c
++++ b/drivers/net/bonding/bond_netlink.c
+@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev,
+ 	if (err < 0)
+ 		return err;
+ 
+-	return register_netdevice(bond_dev);
++	err = register_netdevice(bond_dev);
++
++	netif_carrier_off(bond_dev);
++
++	return err;
+ }
+ 
+ static size_t bond_get_size(const struct net_device *bond_dev)
+diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
+index 38db2e4d7d54..832401b41b98 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -231,7 +231,7 @@ err_dma:
+ 	dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
+ 			 DMA_TO_DEVICE);
+ 
+-	while (i > 0) {
++	while (i-- > 0) {
+ 		int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
+ 		struct bgmac_slot_info *slot = &ring->slots[index];
+ 		u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
+index 89469d5aae25..40e6f6c11f20 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
+@@ -791,13 +791,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
+ 			 * in a bitmap and increasing the chain consumer only
+ 			 * for the first successive completed entries.
+ 			 */
+-			bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
++			__set_bit(pos, p_spq->p_comp_bitmap);
+ 
+ 			while (test_bit(p_spq->comp_bitmap_idx,
+ 					p_spq->p_comp_bitmap)) {
+-				bitmap_clear(p_spq->p_comp_bitmap,
+-					     p_spq->comp_bitmap_idx,
+-					     SPQ_RING_SIZE);
++				__clear_bit(p_spq->comp_bitmap_idx,
++					    p_spq->p_comp_bitmap);
+ 				p_spq->comp_bitmap_idx++;
+ 				qed_chain_return_produced(&p_spq->chain);
+ 			}
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 8f3c55d03d5d..f58858b7972c 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -914,7 +914,6 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
+ 	}
+ 
+ 	macsec_skb_cb(skb)->req = req;
+-	macsec_skb_cb(skb)->rx_sa = rx_sa;
+ 	skb->dev = dev;
+ 	aead_request_set_callback(req, 0, macsec_decrypt_done, skb);
+ 
+@@ -1141,6 +1140,8 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
+ 		}
+ 	}
+ 
++	macsec_skb_cb(skb)->rx_sa = rx_sa;
++
+ 	/* Disabled && !changed text => skip validation */
+ 	if (hdr->tci_an & MACSEC_TCI_C ||
+ 	    secy->validate_frames != MACSEC_VALIDATE_DISABLED)
+diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
+index f572b31a2b20..9ab88e1ed394 100644
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -2404,8 +2404,6 @@ ppp_unregister_channel(struct ppp_channel *chan)
+ 	spin_lock_bh(&pn->all_channels_lock);
+ 	list_del(&pch->list);
+ 	spin_unlock_bh(&pn->all_channels_lock);
+-	put_net(pch->chan_net);
+-	pch->chan_net = NULL;
+ 
+ 	pch->file.dead = 1;
+ 	wake_up_interruptible(&pch->file.rwait);
+@@ -2999,6 +2997,9 @@ ppp_disconnect_channel(struct channel *pch)
+  */
+ static void ppp_destroy_channel(struct channel *pch)
+ {
++	put_net(pch->chan_net);
++	pch->chan_net = NULL;
++
+ 	atomic_dec(&channel_count);
+ 
+ 	if (!pch->file.dead) {
+diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
+index 8e343a3ca873..9d2704c83fa7 100644
+--- a/drivers/nvdimm/pfn.h
++++ b/drivers/nvdimm/pfn.h
+@@ -33,7 +33,9 @@ struct nd_pfn_sb {
+ 	/* minor-version-1 additions for section alignment */
+ 	__le32 start_pad;
+ 	__le32 end_trunc;
+-	u8 padding[4004];
++	/* minor-version-2 record the base alignment of the mapping */
++	__le32 align;
++	u8 padding[4000];
+ 	__le64 checksum;
+ };
+ 
+diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
+index e071e214feba..84f2372dd0bb 100644
+--- a/drivers/nvdimm/pfn_devs.c
++++ b/drivers/nvdimm/pfn_devs.c
+@@ -329,6 +329,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
+ int nd_pfn_validate(struct nd_pfn *nd_pfn)
+ {
+ 	u64 checksum, offset;
++	unsigned long align;
++	enum nd_pfn_mode mode;
+ 	struct nd_namespace_io *nsio;
+ 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ 	struct nd_namespace_common *ndns = nd_pfn->ndns;
+@@ -360,6 +362,9 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
+ 		pfn_sb->end_trunc = 0;
+ 	}
+ 
++	if (__le16_to_cpu(pfn_sb->version_minor) < 2)
++		pfn_sb->align = 0;
++
+ 	switch (le32_to_cpu(pfn_sb->mode)) {
+ 	case PFN_MODE_RAM:
+ 	case PFN_MODE_PMEM:
+@@ -368,20 +373,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
+ 		return -ENXIO;
+ 	}
+ 
++	align = le32_to_cpu(pfn_sb->align);
++	offset = le64_to_cpu(pfn_sb->dataoff);
++	if (align == 0)
++		align = 1UL << ilog2(offset);
++	mode = le32_to_cpu(pfn_sb->mode);
++
+ 	if (!nd_pfn->uuid) {
+-		/* from probe we allocate */
++		/*
++		 * When probing a namepace via nd_pfn_probe() the uuid
++		 * is NULL (see: nd_pfn_devinit()) we init settings from
++		 * pfn_sb
++		 */
+ 		nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
+ 		if (!nd_pfn->uuid)
+ 			return -ENOMEM;
++		nd_pfn->align = align;
++		nd_pfn->mode = mode;
+ 	} else {
+-		/* from init we validate */
++		/*
++		 * When probing a pfn / dax instance we validate the
++		 * live settings against the pfn_sb
++		 */
+ 		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
+ 			return -ENODEV;
++
++		/*
++		 * If the uuid validates, but other settings mismatch
++		 * return EINVAL because userspace has managed to change
++		 * the configuration without specifying new
++		 * identification.
++		 */
++		if (nd_pfn->align != align || nd_pfn->mode != mode) {
++			dev_err(&nd_pfn->dev,
++					"init failed, settings mismatch\n");
++			dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
++					nd_pfn->align, align, nd_pfn->mode,
++					mode);
++			return -EINVAL;
++		}
+ 	}
+ 
+-	if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
++	if (align > nvdimm_namespace_capacity(ndns)) {
+ 		dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
+-				nd_pfn->align, nvdimm_namespace_capacity(ndns));
++				align, nvdimm_namespace_capacity(ndns));
+ 		return -EINVAL;
+ 	}
+ 
+@@ -391,7 +426,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
+ 	 * namespace has changed since the pfn superblock was
+ 	 * established.
+ 	 */
+-	offset = le64_to_cpu(pfn_sb->dataoff);
+ 	nsio = to_nd_namespace_io(&ndns->dev);
+ 	if (offset >= resource_size(&nsio->res)) {
+ 		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
+@@ -399,10 +433,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
+ 		return -EBUSY;
+ 	}
+ 
+-	nd_pfn->align = 1UL << ilog2(offset);
+-	if (!is_power_of_2(offset) || offset < PAGE_SIZE) {
+-		dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
+-				offset);
++	if ((align && !IS_ALIGNED(offset, align))
++			|| !IS_ALIGNED(offset, PAGE_SIZE)) {
++		dev_err(&nd_pfn->dev,
++				"bad offset: %#llx dax disabled align: %#lx\n",
++				offset, align);
+ 		return -ENXIO;
+ 	}
+ 
+diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
+index 92f536596b24..368efac7a950 100644
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -426,9 +426,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
+ 	memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
+ 	memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
+ 	pfn_sb->version_major = cpu_to_le16(1);
+-	pfn_sb->version_minor = cpu_to_le16(1);
++	pfn_sb->version_minor = cpu_to_le16(2);
+ 	pfn_sb->start_pad = cpu_to_le32(start_pad);
+ 	pfn_sb->end_trunc = cpu_to_le32(end_trunc);
++	pfn_sb->align = cpu_to_le32(nd_pfn->align);
+ 	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
+ 	pfn_sb->checksum = cpu_to_le64(checksum);
+ 
+@@ -501,7 +502,6 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn)
+ 	pmem = dev_get_drvdata(dev);
+ 	pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
+ 	pmem->pfn_pad = start_pad + end_trunc;
+-	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
+ 	if (nd_pfn->mode == PFN_MODE_RAM) {
+ 		if (pmem->data_offset < SZ_8K)
+ 			return -EINVAL;
+diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
+index 6c7fe4778793..7e156a0b65dd 100644
+--- a/drivers/regulator/qcom_smd-regulator.c
++++ b/drivers/regulator/qcom_smd-regulator.c
+@@ -140,7 +140,6 @@ static const struct regulator_ops rpm_smps_ldo_ops = {
+ 	.enable = rpm_reg_enable,
+ 	.disable = rpm_reg_disable,
+ 	.is_enabled = rpm_reg_is_enabled,
+-	.list_voltage = regulator_list_voltage_linear_range,
+ 
+ 	.get_voltage = rpm_reg_get_voltage,
+ 	.set_voltage = rpm_reg_set_voltage,
+diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
+index ae8a70f703eb..23e60feb81bb 100644
+--- a/drivers/staging/rdma/hfi1/ud.c
++++ b/drivers/staging/rdma/hfi1/ud.c
+@@ -678,8 +678,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
+ 	u32 tlen = packet->tlen;
+ 	struct rvt_qp *qp = packet->qp;
+ 	bool has_grh = rcv_flags & HFI1_HAS_GRH;
+-	bool sc4_bit = has_sc4_bit(packet);
+-	u8 sc;
++	u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ 	u32 bth1;
+ 	int is_mcast;
+ 	struct ib_grh *grh = NULL;
+@@ -697,10 +696,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
+ 		 */
+ 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ 		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+-		u8 sl, sc5;
++		u8 sl;
+ 
+-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
+-		sc5 |= sc4_bit;
+ 		sl = ibp->sc_to_sl[sc5];
+ 
+ 		process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD);
+@@ -717,10 +714,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
+ 
+ 	if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) {
+ 		u16 slid = be16_to_cpu(hdr->lrh[3]);
+-		u8 sc5;
+-
+-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
+-		sc5 |= sc4_bit;
+ 
+ 		return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
+ 	}
+@@ -745,10 +738,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
+ 		if (qp->ibqp.qp_num > 1) {
+ 			struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ 			u16 slid;
+-			u8 sc5;
+-
+-			sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
+-			sc5 |= sc4_bit;
+ 
+ 			slid = be16_to_cpu(hdr->lrh[3]);
+ 			if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
+@@ -790,10 +779,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
+ 		/* Received on QP0, and so by definition, this is an SMP */
+ 		struct opa_smp *smp = (struct opa_smp *)data;
+ 		u16 slid = be16_to_cpu(hdr->lrh[3]);
+-		u8 sc5;
+-
+-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
+-		sc5 |= sc4_bit;
+ 
+ 		if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
+ 			goto drop;
+@@ -890,9 +875,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
+ 	}
+ 
+ 	wc.slid = be16_to_cpu(hdr->lrh[3]);
+-	sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
+-	sc |= sc4_bit;
+-	wc.sl = ibp->sc_to_sl[sc];
++	wc.sl = ibp->sc_to_sl[sc5];
+ 
+ 	/*
+ 	 * Save the LMC lower bits if the destination LID is a unicast LID.
+diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/staging/rdma/hfi1/verbs_txreq.c
+index bc95c4112c61..d8fb056526f8 100644
+--- a/drivers/staging/rdma/hfi1/verbs_txreq.c
++++ b/drivers/staging/rdma/hfi1/verbs_txreq.c
+@@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
+ 
+ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+ 				struct rvt_qp *qp)
++	__must_hold(&qp->s_lock)
+ {
+ 	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
+-	unsigned long flags;
+ 
+-	spin_lock_irqsave(&qp->s_lock, flags);
+ 	write_seqlock(&dev->iowait_lock);
+ 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+ 		struct hfi1_qp_priv *priv;
+@@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+ 	}
+ out:
+ 	write_sequnlock(&dev->iowait_lock);
+-	spin_unlock_irqrestore(&qp->s_lock, flags);
+ 	return tx;
+ }
+ 
+diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h
+index 1cf69b2fe4a5..a1d6e0807f97 100644
+--- a/drivers/staging/rdma/hfi1/verbs_txreq.h
++++ b/drivers/staging/rdma/hfi1/verbs_txreq.h
+@@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+ 
+ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
+ 					    struct rvt_qp *qp)
++	__must_hold(&qp->slock)
+ {
+ 	struct verbs_txreq *tx;
+ 	struct hfi1_qp_priv *priv = qp->priv;
+diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
+index 0ff27818bb87..25b9f178c8d3 100644
+--- a/drivers/tty/serial/mvebu-uart.c
++++ b/drivers/tty/serial/mvebu-uart.c
+@@ -299,6 +299,8 @@ static int mvebu_uart_startup(struct uart_port *port)
+ static void mvebu_uart_shutdown(struct uart_port *port)
+ {
+ 	writel(0, port->membase + UART_CTRL);
++
++	free_irq(port->irq, port);
+ }
+ 
+ static void mvebu_uart_set_termios(struct uart_port *port,
+diff --git a/fs/dcache.c b/fs/dcache.c
+index 44008e3fafc4..5612631b7b46 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -578,7 +578,6 @@ static struct dentry *dentry_kill(struct dentry *dentry)
+ 
+ failed:
+ 	spin_unlock(&dentry->d_lock);
+-	cpu_relax();
+ 	return dentry; /* try again with same dentry */
+ }
+ 
+@@ -752,6 +751,8 @@ void dput(struct dentry *dentry)
+ 		return;
+ 
+ repeat:
++	might_sleep();
++
+ 	rcu_read_lock();
+ 	if (likely(fast_dput(dentry))) {
+ 		rcu_read_unlock();
+@@ -783,8 +784,10 @@ repeat:
+ 
+ kill_it:
+ 	dentry = dentry_kill(dentry);
+-	if (dentry)
++	if (dentry) {
++		cond_resched();
+ 		goto repeat;
++	}
+ }
+ EXPORT_SYMBOL(dput);
+ 
+diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
+index fe1f50fe764f..f97110461c19 100644
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb,
+ 	memset(bh->b_data, 0, sb->s_blocksize);
+ 
+ 	bit_max = ext4_num_base_meta_clusters(sb, block_group);
++	if ((bit_max >> 3) >= bh->b_size)
++		return -EFSCORRUPTED;
++
+ 	for (bit = 0; bit < bit_max; bit++)
+ 		ext4_set_bit(bit, bh->b_data);
+ 
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index de692b91c166..8211698600c2 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -376,9 +376,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
+ 	ext4_fsblk_t block = ext4_ext_pblock(ext);
+ 	int len = ext4_ext_get_actual_len(ext);
+ 	ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+-	ext4_lblk_t last = lblock + len - 1;
+ 
+-	if (len == 0 || lblock > last)
++	/*
++	 * We allow neither:
++	 *  - zero length
++	 *  - overflow/wrap-around
++	 */
++	if (lblock + len <= lblock)
+ 		return 0;
+ 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ }
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 250c2df04a92..58197a7c3c2c 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inode)
+ 		 * Note that directories do not have this problem because they
+ 		 * don't use page cache.
+ 		 */
+-		if (ext4_should_journal_data(inode) &&
+-		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+-		    inode->i_ino != EXT4_JOURNAL_INO) {
++		if (inode->i_ino != EXT4_JOURNAL_INO &&
++		    ext4_should_journal_data(inode) &&
++		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ 			journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+ 			tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
+ 
+@@ -2741,13 +2741,36 @@ retry:
+ 				done = true;
+ 			}
+ 		}
+-		ext4_journal_stop(handle);
++		/*
++		 * Caution: If the handle is synchronous,
++		 * ext4_journal_stop() can wait for transaction commit
++		 * to finish which may depend on writeback of pages to
++		 * complete or on page lock to be released.  In that
++		 * case, we have to wait until after after we have
++		 * submitted all the IO, released page locks we hold,
++		 * and dropped io_end reference (for extent conversion
++		 * to be able to complete) before stopping the handle.
++		 */
++		if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
++			ext4_journal_stop(handle);
++			handle = NULL;
++		}
+ 		/* Submit prepared bio */
+ 		ext4_io_submit(&mpd.io_submit);
+ 		/* Unlock pages we didn't use */
+ 		mpage_release_unused_pages(&mpd, give_up_on_write);
+-		/* Drop our io_end reference we got from init */
+-		ext4_put_io_end(mpd.io_submit.io_end);
++		/*
++		 * Drop our io_end reference we got from init. We have
++		 * to be careful and use deferred io_end finishing if
++		 * we are still holding the transaction as we can
++		 * release the last reference to io_end which may end
++		 * up doing unwritten extent conversion.
++		 */
++		if (handle) {
++			ext4_put_io_end_defer(mpd.io_submit.io_end);
++			ext4_journal_stop(handle);
++		} else
++			ext4_put_io_end(mpd.io_submit.io_end);
+ 
+ 		if (ret == -ENOSPC && sbi->s_journal) {
+ 			/*
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 9d26fa2188f6..5f7ae0898ef7 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2939,7 +2939,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ 		ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
+ 			   "fs metadata", block, block+len);
+ 		/* File system mounted not to panic on error
+-		 * Fix the bitmap and repeat the block allocation
++		 * Fix the bitmap and return EFSCORRUPTED
+ 		 * We leak some of the blocks here.
+ 		 */
+ 		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+@@ -2948,7 +2948,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ 		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+ 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+ 		if (!err)
+-			err = -EAGAIN;
++			err = -EFSCORRUPTED;
+ 		goto out_err;
+ 	}
+ 
+@@ -4513,18 +4513,7 @@ repeat:
+ 	}
+ 	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+ 		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
+-		if (*errp == -EAGAIN) {
+-			/*
+-			 * drop the reference that we took
+-			 * in ext4_mb_use_best_found
+-			 */
+-			ext4_mb_release_context(ac);
+-			ac->ac_b_ex.fe_group = 0;
+-			ac->ac_b_ex.fe_start = 0;
+-			ac->ac_b_ex.fe_len = 0;
+-			ac->ac_status = AC_STATUS_CONTINUE;
+-			goto repeat;
+-		} else if (*errp) {
++		if (*errp) {
+ 			ext4_discard_allocated_blocks(ac);
+ 			goto errout;
+ 		} else {
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 304c712dbe12..7fca76b6cd61 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2277,6 +2277,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
+ 	while (es->s_last_orphan) {
+ 		struct inode *inode;
+ 
++		/*
++		 * We may have encountered an error during cleanup; if
++		 * so, skip the rest.
++		 */
++		if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
++			jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
++			es->s_last_orphan = 0;
++			break;
++		}
++
+ 		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
+ 		if (IS_ERR(inode)) {
+ 			es->s_last_orphan = 0;
+@@ -3415,6 +3425,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ 		goto failed_mount;
+ 	}
+ 
++	if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
++		ext4_msg(sb, KERN_ERR,
++			 "Number of reserved GDT blocks insanely large: %d",
++			 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
++		goto failed_mount;
++	}
++
+ 	if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ 		if (blocksize != PAGE_SIZE) {
+ 			ext4_msg(sb, KERN_ERR,
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index dcad5e210525..3c7675fa664f 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -417,6 +417,15 @@ static int fuse_flush(struct file *file, fl_owner_t id)
+ 	fuse_sync_writes(inode);
+ 	inode_unlock(inode);
+ 
++	if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
++	    test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
++		err = -ENOSPC;
++	if (test_bit(AS_EIO, &file->f_mapping->flags) &&
++	    test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
++		err = -EIO;
++	if (err)
++		return err;
++
+ 	req = fuse_get_req_nofail_nopages(fc, file);
+ 	memset(&inarg, 0, sizeof(inarg));
+ 	inarg.fh = ff->fh;
+@@ -462,6 +471,21 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
+ 		goto out;
+ 
+ 	fuse_sync_writes(inode);
++
++	/*
++	 * Due to implementation of fuse writeback
++	 * filemap_write_and_wait_range() does not catch errors.
++	 * We have to do this directly after fuse_sync_writes()
++	 */
++	if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
++	    test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
++		err = -ENOSPC;
++	if (test_bit(AS_EIO, &file->f_mapping->flags) &&
++	    test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
++		err = -EIO;
++	if (err)
++		goto out;
++
+ 	err = sync_inode_metadata(inode, 1);
+ 	if (err)
+ 		goto out;
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index 1ce67668a8e1..d302a5fff9ba 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -926,7 +926,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
+ 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+ 		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+ 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
+-		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
++		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+ 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
+ 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
+ 	req->in.h.opcode = FUSE_INIT;
+diff --git a/fs/inode.c b/fs/inode.c
+index 721fa18ead59..fd832eba4596 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -344,7 +344,7 @@ EXPORT_SYMBOL(inc_nlink);
+ void address_space_init_once(struct address_space *mapping)
+ {
+ 	memset(mapping, 0, sizeof(*mapping));
+-	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
++	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
+ 	spin_lock_init(&mapping->tree_lock);
+ 	init_rwsem(&mapping->i_mmap_rwsem);
+ 	INIT_LIST_HEAD(&mapping->private_list);
+diff --git a/fs/udf/super.c b/fs/udf/super.c
+index 36661acaf33b..5e2c8c814e1b 100644
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -78,6 +78,15 @@
+ #define VSD_FIRST_SECTOR_OFFSET		32768
+ #define VSD_MAX_SECTOR_OFFSET		0x800000
+ 
++/*
++ * Maximum number of Terminating Descriptor / Logical Volume Integrity
++ * Descriptor redirections. The chosen numbers are arbitrary - just that we
++ * hopefully don't limit any real use of rewritten inode on write-once media
++ * but avoid looping for too long on corrupted media.
++ */
++#define UDF_MAX_TD_NESTING 64
++#define UDF_MAX_LVID_NESTING 1000
++
+ enum { UDF_MAX_LINKS = 0xffff };
+ 
+ /* These are the "meat" - everything else is stuffing */
+@@ -1541,42 +1550,52 @@ out_bh:
+ }
+ 
+ /*
+- * udf_load_logicalvolint
+- *
++ * Find the prevailing Logical Volume Integrity Descriptor.
+  */
+ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ad loc)
+ {
+-	struct buffer_head *bh = NULL;
++	struct buffer_head *bh, *final_bh;
+ 	uint16_t ident;
+ 	struct udf_sb_info *sbi = UDF_SB(sb);
+ 	struct logicalVolIntegrityDesc *lvid;
++	int indirections = 0;
++
++	while (++indirections <= UDF_MAX_LVID_NESTING) {
++		final_bh = NULL;
++		while (loc.extLength > 0 &&
++			(bh = udf_read_tagged(sb, loc.extLocation,
++					loc.extLocation, &ident))) {
++			if (ident != TAG_IDENT_LVID) {
++				brelse(bh);
++				break;
++			}
++
++			brelse(final_bh);
++			final_bh = bh;
+ 
+-	while (loc.extLength > 0 &&
+-	       (bh = udf_read_tagged(sb, loc.extLocation,
+-				     loc.extLocation, &ident)) &&
+-	       ident == TAG_IDENT_LVID) {
+-		sbi->s_lvid_bh = bh;
+-		lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
++			loc.extLength -= sb->s_blocksize;
++			loc.extLocation++;
++		}
+ 
+-		if (lvid->nextIntegrityExt.extLength)
+-			udf_load_logicalvolint(sb,
+-				leea_to_cpu(lvid->nextIntegrityExt));
++		if (!final_bh)
++			return;
+ 
+-		if (sbi->s_lvid_bh != bh)
+-			brelse(bh);
+-		loc.extLength -= sb->s_blocksize;
+-		loc.extLocation++;
++		brelse(sbi->s_lvid_bh);
++		sbi->s_lvid_bh = final_bh;
++
++		lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data;
++		if (lvid->nextIntegrityExt.extLength == 0)
++			return;
++
++		loc = leea_to_cpu(lvid->nextIntegrityExt);
+ 	}
+-	if (sbi->s_lvid_bh != bh)
+-		brelse(bh);
++
++	udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n",
++		UDF_MAX_LVID_NESTING);
++	brelse(sbi->s_lvid_bh);
++	sbi->s_lvid_bh = NULL;
+ }
+ 
+-/*
+- * Maximum number of Terminating Descriptor redirections. The chosen number is
+- * arbitrary - just that we hopefully don't limit any real use of rewritten
+- * inode on write-once media but avoid looping for too long on corrupted media.
+- */
+-#define UDF_MAX_TD_NESTING 64
+ 
+ /*
+  * Process a main/reserve volume descriptor sequence.
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 78181a88903b..54355a7e46de 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -4156,6 +4156,13 @@ static inline void netif_keep_dst(struct net_device *dev)
+ 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
+ }
+ 
++/* return true if dev can't cope with mtu frames that need vlan tag insertion */
++static inline bool netif_reduces_vlan_mtu(struct net_device *dev)
++{
++	/* TODO: reserve and use an additional IFF bit, if we get more users */
++	return dev->priv_flags & IFF_MACSEC;
++}
++
+ extern struct pernet_operations __net_initdata loopback_net_ops;
+ 
+ /* Logging, debugging and troubleshooting/diagnostic helpers. */
+diff --git a/ipc/msg.c b/ipc/msg.c
+index 1471db9a7e61..c6521c205cb4 100644
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
+ 		rcu_read_lock();
+ 		ipc_lock_object(&msq->q_perm);
+ 
+-		ipc_rcu_putref(msq, ipc_rcu_free);
++		ipc_rcu_putref(msq, msg_rcu_free);
+ 		/* raced with RMID? */
+ 		if (!ipc_valid_object(&msq->q_perm)) {
+ 			err = -EIDRM;
+diff --git a/ipc/sem.c b/ipc/sem.c
+index b3757ea0694b..5d2f875e8e2e 100644
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -449,7 +449,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
+ static inline void sem_lock_and_putref(struct sem_array *sma)
+ {
+ 	sem_lock(sma, NULL, -1);
+-	ipc_rcu_putref(sma, ipc_rcu_free);
++	ipc_rcu_putref(sma, sem_rcu_free);
+ }
+ 
+ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+@@ -1392,7 +1392,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+ 			rcu_read_unlock();
+ 			sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ 			if (sem_io == NULL) {
+-				ipc_rcu_putref(sma, ipc_rcu_free);
++				ipc_rcu_putref(sma, sem_rcu_free);
+ 				return -ENOMEM;
+ 			}
+ 
+@@ -1426,20 +1426,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+ 		if (nsems > SEMMSL_FAST) {
+ 			sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ 			if (sem_io == NULL) {
+-				ipc_rcu_putref(sma, ipc_rcu_free);
++				ipc_rcu_putref(sma, sem_rcu_free);
+ 				return -ENOMEM;
+ 			}
+ 		}
+ 
+ 		if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
+-			ipc_rcu_putref(sma, ipc_rcu_free);
++			ipc_rcu_putref(sma, sem_rcu_free);
+ 			err = -EFAULT;
+ 			goto out_free;
+ 		}
+ 
+ 		for (i = 0; i < nsems; i++) {
+ 			if (sem_io[i] > SEMVMX) {
+-				ipc_rcu_putref(sma, ipc_rcu_free);
++				ipc_rcu_putref(sma, sem_rcu_free);
+ 				err = -ERANGE;
+ 				goto out_free;
+ 			}
+@@ -1731,7 +1731,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
+ 	/* step 2: allocate new undo structure */
+ 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+ 	if (!new) {
+-		ipc_rcu_putref(sma, ipc_rcu_free);
++		ipc_rcu_putref(sma, sem_rcu_free);
+ 		return ERR_PTR(-ENOMEM);
+ 	}
+ 
+diff --git a/lib/radix-tree.c b/lib/radix-tree.c
+index 1624c4117961..9b9be3ffa1f6 100644
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -228,10 +228,11 @@ radix_tree_node_alloc(struct radix_tree_root *root)
+ 
+ 		/*
+ 		 * Even if the caller has preloaded, try to allocate from the
+-		 * cache first for the new node to get accounted.
++		 * cache first for the new node to get accounted to the memory
++		 * cgroup.
+ 		 */
+ 		ret = kmem_cache_alloc(radix_tree_node_cachep,
+-				       gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN);
++				       gfp_mask | __GFP_NOWARN);
+ 		if (ret)
+ 			goto out;
+ 
+@@ -254,8 +255,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
+ 		kmemleak_update_trace(ret);
+ 		goto out;
+ 	}
+-	ret = kmem_cache_alloc(radix_tree_node_cachep,
+-			       gfp_mask | __GFP_ACCOUNT);
++	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+ out:
+ 	BUG_ON(radix_tree_is_indirect_ptr(ret));
+ 	return ret;
+@@ -302,6 +302,12 @@ static int __radix_tree_preload(gfp_t gfp_mask)
+ 	struct radix_tree_node *node;
+ 	int ret = -ENOMEM;
+ 
++	/*
++	 * Nodes preloaded by one cgroup can be be used by another cgroup, so
++	 * they should never be accounted to any particular memory cgroup.
++	 */
++	gfp_mask &= ~__GFP_ACCOUNT;
++
+ 	preempt_disable();
+ 	rtp = this_cpu_ptr(&radix_tree_preloads);
+ 	while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index bf860dbdd26e..eb2d761a83a2 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -4064,14 +4064,32 @@ static struct cftype mem_cgroup_legacy_files[] = {
+ 
+ static DEFINE_IDR(mem_cgroup_idr);
+ 
+-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
++static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
+ {
+-	atomic_inc(&memcg->id.ref);
++	atomic_add(n, &memcg->id.ref);
+ }
+ 
+-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
++static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
+ {
+-	if (atomic_dec_and_test(&memcg->id.ref)) {
++	while (!atomic_inc_not_zero(&memcg->id.ref)) {
++		/*
++		 * The root cgroup cannot be destroyed, so it's refcount must
++		 * always be >= 1.
++		 */
++		if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
++			VM_BUG_ON(1);
++			break;
++		}
++		memcg = parent_mem_cgroup(memcg);
++		if (!memcg)
++			memcg = root_mem_cgroup;
++	}
++	return memcg;
++}
++
++static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
++{
++	if (atomic_sub_and_test(n, &memcg->id.ref)) {
+ 		idr_remove(&mem_cgroup_idr, memcg->id.id);
+ 		memcg->id.id = 0;
+ 
+@@ -4080,6 +4098,16 @@ static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+ 	}
+ }
+ 
++static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
++{
++	mem_cgroup_id_get_many(memcg, 1);
++}
++
++static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
++{
++	mem_cgroup_id_put_many(memcg, 1);
++}
++
+ /**
+  * mem_cgroup_from_id - look up a memcg from a memcg id
+  * @id: the memcg id to look up
+@@ -4716,6 +4744,8 @@ static void __mem_cgroup_clear_mc(void)
+ 		if (!mem_cgroup_is_root(mc.from))
+ 			page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
+ 
++		mem_cgroup_id_put_many(mc.from, mc.moved_swap);
++
+ 		/*
+ 		 * we charged both to->memory and to->memsw, so we
+ 		 * should uncharge to->memory.
+@@ -4723,9 +4753,9 @@ static void __mem_cgroup_clear_mc(void)
+ 		if (!mem_cgroup_is_root(mc.to))
+ 			page_counter_uncharge(&mc.to->memory, mc.moved_swap);
+ 
+-		css_put_many(&mc.from->css, mc.moved_swap);
++		mem_cgroup_id_get_many(mc.to, mc.moved_swap);
++		css_put_many(&mc.to->css, mc.moved_swap);
+ 
+-		/* we've already done css_get(mc.to) */
+ 		mc.moved_swap = 0;
+ 	}
+ 	memcg_oom_recover(from);
+@@ -5785,7 +5815,7 @@ subsys_initcall(mem_cgroup_init);
+  */
+ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ {
+-	struct mem_cgroup *memcg;
++	struct mem_cgroup *memcg, *swap_memcg;
+ 	unsigned short oldid;
+ 
+ 	VM_BUG_ON_PAGE(PageLRU(page), page);
+@@ -5800,16 +5830,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ 	if (!memcg)
+ 		return;
+ 
+-	mem_cgroup_id_get(memcg);
+-	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
++	/*
++	 * In case the memcg owning these pages has been offlined and doesn't
++	 * have an ID allocated to it anymore, charge the closest online
++	 * ancestor for the swap instead and transfer the memory+swap charge.
++	 */
++	swap_memcg = mem_cgroup_id_get_online(memcg);
++	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
+ 	VM_BUG_ON_PAGE(oldid, page);
+-	mem_cgroup_swap_statistics(memcg, true);
++	mem_cgroup_swap_statistics(swap_memcg, true);
+ 
+ 	page->mem_cgroup = NULL;
+ 
+ 	if (!mem_cgroup_is_root(memcg))
+ 		page_counter_uncharge(&memcg->memory, 1);
+ 
++	if (memcg != swap_memcg) {
++		if (!mem_cgroup_is_root(swap_memcg))
++			page_counter_charge(&swap_memcg->memsw, 1);
++		page_counter_uncharge(&memcg->memsw, 1);
++	}
++
+ 	/*
+ 	 * Interrupts should be disabled here because the caller holds the
+ 	 * mapping->tree_lock lock which is taken with interrupts-off. It is
+@@ -5848,11 +5889,14 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+ 	if (!memcg)
+ 		return 0;
+ 
++	memcg = mem_cgroup_id_get_online(memcg);
++
+ 	if (!mem_cgroup_is_root(memcg) &&
+-	    !page_counter_try_charge(&memcg->swap, 1, &counter))
++	    !page_counter_try_charge(&memcg->swap, 1, &counter)) {
++		mem_cgroup_id_put(memcg);
+ 		return -ENOMEM;
++	}
+ 
+-	mem_cgroup_id_get(memcg);
+ 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ 	VM_BUG_ON_PAGE(oldid, page);
+ 	mem_cgroup_swap_statistics(memcg, true);
+diff --git a/mm/mempool.c b/mm/mempool.c
+index 9b7a14a791cc..fd3a393b9eea 100644
+--- a/mm/mempool.c
++++ b/mm/mempool.c
+@@ -310,7 +310,7 @@ EXPORT_SYMBOL(mempool_resize);
+  * returns NULL. Note that due to preallocation, this function
+  * *never* fails when called from process contexts. (it might
+  * fail if called from an IRQ context.)
+- * Note: neither __GFP_NOMEMALLOC nor __GFP_ZERO are supported.
++ * Note: using __GFP_ZERO is not supported.
+  */
+ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+ {
+@@ -319,27 +319,16 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+ 	wait_queue_t wait;
+ 	gfp_t gfp_temp;
+ 
+-	/* If oom killed, memory reserves are essential to prevent livelock */
+-	VM_WARN_ON_ONCE(gfp_mask & __GFP_NOMEMALLOC);
+-	/* No element size to zero on allocation */
+ 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
+-
+ 	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
+ 
++	gfp_mask |= __GFP_NOMEMALLOC;	/* don't allocate emergency reserves */
+ 	gfp_mask |= __GFP_NORETRY;	/* don't loop in __alloc_pages */
+ 	gfp_mask |= __GFP_NOWARN;	/* failures are OK */
+ 
+ 	gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
+ 
+ repeat_alloc:
+-	if (likely(pool->curr_nr)) {
+-		/*
+-		 * Don't allocate from emergency reserves if there are
+-		 * elements available.  This check is racy, but it will
+-		 * be rechecked each loop.
+-		 */
+-		gfp_temp |= __GFP_NOMEMALLOC;
+-	}
+ 
+ 	element = pool->alloc(gfp_temp, pool->pool_data);
+ 	if (likely(element != NULL))
+@@ -363,12 +352,11 @@ repeat_alloc:
+ 	 * We use gfp mask w/o direct reclaim or IO for the first round.  If
+ 	 * alloc failed with that and @pool was empty, retry immediately.
+ 	 */
+-	if ((gfp_temp & ~__GFP_NOMEMALLOC) != gfp_mask) {
++	if (gfp_temp != gfp_mask) {
+ 		spin_unlock_irqrestore(&pool->lock, flags);
+ 		gfp_temp = gfp_mask;
+ 		goto repeat_alloc;
+ 	}
+-	gfp_temp = gfp_mask;
+ 
+ 	/* We must not sleep if !__GFP_DIRECT_RECLAIM */
+ 	if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
+diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
+index e7e62570bdb8..3a573a2dcee2 100644
+--- a/net/8021q/vlan_dev.c
++++ b/net/8021q/vlan_dev.c
+@@ -146,10 +146,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
+ 
+ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
+ {
+-	/* TODO: gotta make sure the underlying layer can handle it,
+-	 * maybe an IFF_VLAN_CAPABLE flag for devices?
+-	 */
+-	if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
++	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
++	unsigned int max_mtu = real_dev->mtu;
++
++	if (netif_reduces_vlan_mtu(real_dev))
++		max_mtu -= VLAN_HLEN;
++	if (max_mtu < new_mtu)
+ 		return -ERANGE;
+ 
+ 	dev->mtu = new_mtu;
+diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
+index c92b52f37d38..1270207f3d7c 100644
+--- a/net/8021q/vlan_netlink.c
++++ b/net/8021q/vlan_netlink.c
+@@ -118,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
+ {
+ 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+ 	struct net_device *real_dev;
++	unsigned int max_mtu;
+ 	__be16 proto;
+ 	int err;
+ 
+@@ -144,9 +145,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
+ 	if (err < 0)
+ 		return err;
+ 
++	max_mtu = netif_reduces_vlan_mtu(real_dev) ? real_dev->mtu - VLAN_HLEN :
++						     real_dev->mtu;
+ 	if (!tb[IFLA_MTU])
+-		dev->mtu = real_dev->mtu;
+-	else if (dev->mtu > real_dev->mtu)
++		dev->mtu = max_mtu;
++	else if (dev->mtu > max_mtu)
+ 		return -EINVAL;
+ 
+ 	err = vlan_changelink(dev, tb, data);
+diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
+index 160797722228..b32f5a4750bf 100644
+--- a/net/bridge/br_input.c
++++ b/net/bridge/br_input.c
+@@ -213,6 +213,16 @@ drop:
+ }
+ EXPORT_SYMBOL_GPL(br_handle_frame_finish);
+ 
++static void __br_handle_local_finish(struct sk_buff *skb)
++{
++	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
++	u16 vid = 0;
++
++	/* check if vlan is allowed, to avoid spoofing */
++	if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
++		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
++}
++
+ /* note: already called with rcu_read_lock */
+ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+@@ -279,6 +289,14 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
+ 		case 0x01:	/* IEEE MAC (Pause) */
+ 			goto drop;
+ 
++		case 0x0E:	/* 802.1AB LLDP */
++			fwd_mask |= p->br->group_fwd_mask;
++			if (fwd_mask & (1u << dest[5]))
++				goto forward;
++			*pskb = skb;
++			__br_handle_local_finish(skb);
++			return RX_HANDLER_PASS;
++
+ 		default:
+ 			/* Allow selective forwarding for most other protocols */
+ 			fwd_mask |= p->br->group_fwd_mask;
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index c124c3c12f7c..e2e78843301c 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -87,7 +87,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
+ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
+ 
+ /* rfc5961 challenge ack rate limiting */
+-int sysctl_tcp_challenge_ack_limit = 100;
++int sysctl_tcp_challenge_ack_limit = 1000;
+ 
+ int sysctl_tcp_stdurg __read_mostly;
+ int sysctl_tcp_rfc1337 __read_mostly;
+@@ -3423,6 +3423,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
+ 	return flag;
+ }
+ 
++static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
++				   u32 *last_oow_ack_time)
++{
++	if (*last_oow_ack_time) {
++		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
++
++		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
++			NET_INC_STATS_BH(net, mib_idx);
++			return true;	/* rate-limited: don't send yet! */
++		}
++	}
++
++	*last_oow_ack_time = tcp_time_stamp;
++
++	return false;	/* not rate-limited: go ahead, send dupack now! */
++}
++
+ /* Return true if we're currently rate-limiting out-of-window ACKs and
+  * thus shouldn't send a dupack right now. We rate-limit dupacks in
+  * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+@@ -3436,21 +3453,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
+ 	/* Data packets without SYNs are not likely part of an ACK loop. */
+ 	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+ 	    !tcp_hdr(skb)->syn)
+-		goto not_rate_limited;
+-
+-	if (*last_oow_ack_time) {
+-		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+-
+-		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+-			NET_INC_STATS_BH(net, mib_idx);
+-			return true;	/* rate-limited: don't send yet! */
+-		}
+-	}
+-
+-	*last_oow_ack_time = tcp_time_stamp;
++		return false;
+ 
+-not_rate_limited:
+-	return false;	/* not rate-limited: go ahead, send dupack now! */
++	return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
+ }
+ 
+ /* RFC 5961 7 [ACK Throttling] */
+@@ -3460,21 +3465,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
+ 	static u32 challenge_timestamp;
+ 	static unsigned int challenge_count;
+ 	struct tcp_sock *tp = tcp_sk(sk);
+-	u32 now;
++	u32 count, now;
+ 
+ 	/* First check our per-socket dupack rate limit. */
+-	if (tcp_oow_rate_limited(sock_net(sk), skb,
+-				 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+-				 &tp->last_oow_ack_time))
++	if (__tcp_oow_rate_limited(sock_net(sk),
++				   LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
++				   &tp->last_oow_ack_time))
+ 		return;
+ 
+-	/* Then check the check host-wide RFC 5961 rate limit. */
++	/* Then check host-wide RFC 5961 rate limit. */
+ 	now = jiffies / HZ;
+ 	if (now != challenge_timestamp) {
++		u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
++
+ 		challenge_timestamp = now;
+-		challenge_count = 0;
++		WRITE_ONCE(challenge_count, half +
++			   prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+ 	}
+-	if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
++	count = READ_ONCE(challenge_count);
++	if (count > 0) {
++		WRITE_ONCE(challenge_count, count - 1);
+ 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ 		tcp_send_ack(sk);
+ 	}
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 79a03b87a771..7b8e903b2a97 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -236,7 +236,8 @@ void tcp_select_initial_window(int __space, __u32 mss,
+ 		/* Set window scaling on max possible window
+ 		 * See RFC1323 for an explanation of the limit to 14
+ 		 */
+-		space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
++		space = max_t(u32, space, sysctl_tcp_rmem[2]);
++		space = max_t(u32, space, sysctl_rmem_max);
+ 		space = min_t(u32, space, *window_clamp);
+ 		while (space > 65535 && (*rcv_wscale) < 14) {
+ 			space >>= 1;
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 8ec4b3089e20..e3fc0cdf82a9 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3563,6 +3563,10 @@ restart:
+ 		if (state != INET6_IFADDR_STATE_DEAD) {
+ 			__ipv6_ifa_notify(RTM_DELADDR, ifa);
+ 			inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
++		} else {
++			if (idev->cnf.forwarding)
++				addrconf_leave_anycast(ifa);
++			addrconf_leave_solict(ifa->idev, &ifa->addr);
+ 		}
+ 
+ 		write_lock_bh(&idev->lock);
+diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
+index 923abd6b3064..8d2f7c9b491d 100644
+--- a/net/irda/af_irda.c
++++ b/net/irda/af_irda.c
+@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
+ 	}
+ 
+ 	/* Check if we have opened a local TSAP */
+-	if (!self->tsap)
+-		irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++	if (!self->tsap) {
++		err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++		if (err)
++			goto out;
++	}
+ 
+ 	/* Move to connecting socket, start sending Connect Requests */
+ 	sock->state = SS_CONNECTING;
+diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
+index ad4fa49ad1db..9068369f8a1b 100644
+--- a/security/apparmor/apparmorfs.c
++++ b/security/apparmor/apparmorfs.c
+@@ -331,6 +331,7 @@ static int aa_fs_seq_hash_show(struct seq_file *seq, void *v)
+ 			seq_printf(seq, "%.2x", profile->hash[i]);
+ 		seq_puts(seq, "\n");
+ 	}
++	aa_put_profile(profile);
+ 
+ 	return 0;
+ }


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-08-16 23:53 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-01  0:31 [gentoo-commits] proj/linux-patches:4.6 commit in: / Mike Pagano
  -- strict thread matches above, loose matches on Subject: below --
2016-08-16 23:53 Mike Pagano
2016-08-12 11:24 Mike Pagano
2016-08-10 12:57 Mike Pagano
2016-07-27 23:52 Mike Pagano
2016-07-27 19:23 Mike Pagano
2016-07-11 19:57 Mike Pagano
2016-07-02 15:27 Mike Pagano
2016-06-24 20:41 Mike Pagano
2016-06-08 10:09 Mike Pagano
2016-06-02 19:39 Mike Pagano
2016-04-25 12:19 Mike Pagano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox