diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index c941b214e0b7f5dad509da2ef35d61a91c308b3b..4150f74c521a8198ec68e5033fe2d7a0462fdff6 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -64,6 +64,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
   - arm64
   - arm32
   - ppc64
+  - ppc32
   - sparc64
   - mips64
   - s390x
@@ -73,7 +74,6 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
 And the older cBPF JIT supported on the following archs:
 
   - mips
-  - ppc
   - sparc
 
 eBPF JITs are a superset of cBPF JITs, meaning the kernel will
diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
index 7aff505af706d7b844307d418b5082bd23544542..fa83403b4aec091dd862571dcf382cce0140afe2 100644
--- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
+++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
@@ -21,7 +21,7 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |     powerpc: |  ok  |
     |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
index 3d553e8a29374aa79f176356c7200efa40996187..fce8bc7936603208f960d8296ca71fe72c5c3ca5 100644
--- a/Documentation/powerpc/papr_hcalls.rst
+++ b/Documentation/powerpc/papr_hcalls.rst
@@ -275,6 +275,20 @@ Health Bitmap Flags:
 Given a DRC Index collect the performance statistics for NVDIMM and copy them
 to the resultBuffer.
 
+**H_SCM_FLUSH**
+
+| Input: *drcIndex, continue-token*
+| Out: *continue-token*
+| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
+
+Given a DRC Index Flush the data to backend NVDIMM device.
+
+The hcall returns H_BUSY when the flush takes longer time and the hcall needs
+to be issued multiple times in order to be completely serviced. The
+*continue-token* from the output to be passed in the argument list of
+subsequent hcalls to the hypervisor until the hcall is completely serviced
+at which point H_SUCCESS or other error is returned by the hypervisor.
+
 References
 ==========
 .. [1] "Power Architecture Platform Reference"
diff --git a/Documentation/powerpc/vas-api.rst b/Documentation/powerpc/vas-api.rst
index 90c50ed839f349f21c89e18209efd977fb0a9c77..bdb50fed903e03379b4cfd4494d23139a658ef05 100644
--- a/Documentation/powerpc/vas-api.rst
+++ b/Documentation/powerpc/vas-api.rst
@@ -254,7 +254,7 @@ using this window. the signal will be issued to the thread group leader
 signals.
 
 NX-GZIP User's Manual:
-https://github.com/libnxz/power-gzip/blob/master/power_nx_gzip_um.pdf
+https://github.com/libnxz/power-gzip/blob/master/doc/power_nx_gzip_um.pdf
 
 Simple example
 ==============
@@ -301,5 +301,5 @@ Simple example
 			close(fd) or window can be closed upon process exit
 		}
 
-	Refer https://github.com/abalib/power-gzip for tests or more
+	Refer https://github.com/libnxz/power-gzip for tests or more
 	use cases.
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index 7508b0ac1d21d51a3a715629a79af38fb2bd76f4..ecb6fd4c3c647cb6a7029b08112d2ae42e8b3f95 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -155,7 +155,8 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
 }
 
 #ifdef CONFIG_TIME_NS
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
 	const struct vdso_data *ret;
 
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 4b4c0dac0e1494298e821e859c2a32737989109a..4f7a629df81f7416e9797794406029dd3024a355 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -96,7 +96,7 @@ const struct vdso_data *__arch_get_vdso_data(void)
 
 #ifdef CONFIG_TIME_NS
 static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(void)
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
 	return _timens_data;
 }
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3b34c44832e07ab4a50fcbdc8598e6b07158363c..c52b0a42062a062ac1a59f9a14556f8855b95dbd 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
 	#
 	select ARCH_32BIT_OFF_T if PPC32
 	select ARCH_HAS_DEBUG_VIRTUAL
+	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
@@ -135,7 +136,7 @@ config PPC
 	select ARCH_HAS_MEMBARRIER_CALLBACKS
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
-	select ARCH_HAS_STRICT_KERNEL_RWX	if (PPC32 && !HIBERNATION)
+	select ARCH_HAS_STRICT_KERNEL_RWX	if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UACCESS_FLUSHCACHE
 	select ARCH_HAS_COPY_MC			if PPC64
@@ -145,6 +146,7 @@ config PPC
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select ARCH_OPTIONAL_KERNEL_RWX		if ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC32 || PPC_BOOK3S_64
 	select ARCH_USE_BUILTIN_BSWAP
@@ -171,6 +173,7 @@ config PPC
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_GETTIMEOFDAY
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP		if PCI
@@ -178,13 +181,15 @@ config PPC
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
-	select GENERIC_GETTIMEOFDAY
+	select GENERIC_VDSO_TIME_NS
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
 	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN			if PPC32 && PPC_PAGE_SHIFT <= 14
 	select HAVE_ARCH_KASAN_VMALLOC		if PPC32 && PPC_PAGE_SHIFT <= 14
 	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_KFENCE			if PPC32
 	select HAVE_ARCH_MMAP_RND_BITS
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if COMPAT
 	select HAVE_ARCH_NVRAM_OPS
@@ -192,7 +197,6 @@ config PPC
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_C_RECORDMCOUNT
-	select HAVE_CBPF_JIT			if !PPC64
 	select HAVE_STACKPROTECTOR		if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
 	select HAVE_STACKPROTECTOR		if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
 	select HAVE_CONTEXT_TRACKING		if PPC64
@@ -200,7 +204,7 @@ config PPC
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if MPROFILE_KERNEL
-	select HAVE_EBPF_JIT			if PPC64
+	select HAVE_EBPF_JIT
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS	if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
 	select HAVE_FAST_GUP
 	select HAVE_FTRACE_MCOUNT_RECORD
@@ -224,8 +228,8 @@ config PPC
 	select HAVE_LIVEPATCH			if HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
-	select HAVE_HARDLOCKUP_DETECTOR_ARCH	if (PPC64 && PPC_BOOK3S)
-	select HAVE_OPTPROBES			if PPC64
+	select HAVE_HARDLOCKUP_DETECTOR_ARCH	if PPC64 && PPC_BOOK3S && SMP
+	select HAVE_OPTPROBES
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
 	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
@@ -234,7 +238,7 @@ config PPC
 	select MMU_GATHER_RCU_TABLE_FREE
 	select MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
+	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
@@ -786,7 +790,7 @@ config THREAD_SHIFT
 config DATA_SHIFT_BOOL
 	bool "Set custom data alignment"
 	depends on ADVANCED_OPTIONS
-	depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC
+	depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE
 	depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX)
 	help
 	  This option allows you to set the kernel data alignment. When
@@ -798,13 +802,13 @@ config DATA_SHIFT_BOOL
 config DATA_SHIFT
 	int "Data shift" if DATA_SHIFT_BOOL
 	default 24 if STRICT_KERNEL_RWX && PPC64
-	range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32
-	range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx
+	range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
+	range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
 	default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
-	default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32
+	default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
 	default 23 if STRICT_KERNEL_RWX && PPC_8xx
-	default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA
-	default 19 if DEBUG_PAGEALLOC && PPC_8xx
+	default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA
+	default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
 	default PPC_PAGE_SHIFT
 	help
 	  On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
@@ -1217,7 +1221,7 @@ config TASK_SIZE_BOOL
 config TASK_SIZE
 	hex "Size of user task space" if TASK_SIZE_BOOL
 	default "0x80000000" if PPC_8xx
-	default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX
+	default "0xb0000000" if PPC_BOOK3S_32
 	default "0xc0000000"
 endmenu
 
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index ae084357994e87b5f07294ab9543b8a80d7d94a0..6342f9da454551a3d72117af402010d8c552f99e 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -353,6 +353,7 @@ config PPC_EARLY_DEBUG_CPM_ADDR
 config FAIL_IOMMU
 	bool "Fault-injection capability for IOMMU"
 	depends on FAULT_INJECTION
+	depends on PCI || IBMVIO
 	help
 	  Provide fault-injection capability for IOMMU. Each device can
 	  be selectively enabled via the fail_iommu property.
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 5f8544cf724a4fff9b2aed3f0f487e047fd8ee44..3212d076ac6a4318e76a55bd98ecf230d7a520f6 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -181,12 +181,6 @@ CC_FLAGS_FTRACE := -pg
 ifdef CONFIG_MPROFILE_KERNEL
 CC_FLAGS_FTRACE += -mprofile-kernel
 endif
-# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828
-ifndef CONFIG_CC_IS_CLANG
-CC_FLAGS_FTRACE	+= $(call cc-ifversion, -lt, 0409, -mno-sched-epilog)
-endif
 endif
 
 CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU))
@@ -444,12 +438,15 @@ endif
 endif
 
 ifdef CONFIG_SMP
+ifdef CONFIG_PPC32
 prepare: task_cpu_prepare
 
 PHONY += task_cpu_prepare
 task_cpu_prepare: prepare0
 	$(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h))
-endif
+
+endif # CONFIG_PPC32
+endif # CONFIG_SMP
 
 PHONY += checkbin
 # Check toolchain versions:
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 4f05a6652478d0b058576d52871992151e5b8b03..701811c91a6f3f3cfdec97501df31f4a4c3a24c5 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -50,6 +50,7 @@ CONFIG_PPC_TRANSACTIONAL_MEM=y
 CONFIG_KEXEC=y
 CONFIG_KEXEC_FILE=y
 CONFIG_CRASH_DUMP=y
+CONFIG_FA_DUMP=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_SCHED_SMT=y
@@ -177,6 +178,7 @@ CONFIG_CHELSIO_T1=m
 CONFIG_BE2NET=m
 CONFIG_IBMVETH=m
 CONFIG_EHEA=m
+CONFIG_IBMVNIC=m
 CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_E1000E=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 777221775c8398e62a9ecdc027f0f81c59678296..50168dde4ea5987769f4c5365f036abcc69b8ef3 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -41,6 +41,7 @@ CONFIG_DTL=y
 CONFIG_SCANLOG=m
 CONFIG_PPC_SMLPAR=y
 CONFIG_IBMEBUS=y
+CONFIG_PAPR_SCM=m
 CONFIG_PPC_SVM=y
 # CONFIG_PPC_PMAC is not set
 CONFIG_RTAS_FLASH=m
@@ -159,6 +160,7 @@ CONFIG_BE2NET=m
 CONFIG_S2IO=m
 CONFIG_IBMVETH=y
 CONFIG_EHEA=y
+CONFIG_IBMVNIC=y
 CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_E1000E=y
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index e1f9b4ea1c537be14ea09ad77d84467ea4fa5802..bcf95ce0964f982da4c02bc77e5d0707e2b0609d 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
-generated-y += syscall_table_c32.h
 generated-y += syscall_table_spu.h
 generic-y += export.h
 generic-y += kvm_types.h
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 939f3c94c8f392adf3e58be533c8f46569d66ba5..1c7b75834e045c367985cb160da1fd1b428b487d 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -77,8 +77,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
 		      u32 len_high, u32 len_low);
 long sys_switch_endian(void);
-notrace unsigned int __check_irq_replay(void);
-void notrace restore_interrupts(void);
 
 /* prom_init (OpenFirmware) */
 unsigned long __init prom_init(unsigned long r3, unsigned long r4,
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index aecfde829d5da476bb7b1f3cc6f484ea1dd88ef9..7ae29cfb06c07fb006ba4dde6b45d76947116ba9 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -80,22 +80,6 @@ do {									\
 	___p1;								\
 })
 
-#ifdef CONFIG_PPC64
-#define smp_cond_load_relaxed(ptr, cond_expr) ({		\
-	typeof(ptr) __PTR = (ptr);				\
-	__unqual_scalar_typeof(*ptr) VAL;			\
-	VAL = READ_ONCE(*__PTR);				\
-	if (unlikely(!(cond_expr))) {				\
-		spin_begin();					\
-		do {						\
-			VAL = READ_ONCE(*__PTR);		\
-		} while (!(cond_expr));				\
-		spin_end();					\
-	}							\
-	(typeof(*ptr))VAL;					\
-})
-#endif
-
 #ifdef CONFIG_PPC_BOOK3S_64
 #define NOSPEC_BARRIER_SLOT   nop
 #elif defined(CONFIG_PPC_FSL_BOOK3E)
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 73bc5d2c431dfaa42ba1f9904087f99b652fff57..1670dfe9d4f10bfd346eb01afd936861f32db057 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -5,86 +5,7 @@
 #include <asm/bug.h>
 #include <asm/book3s/32/mmu-hash.h>
 
-#ifdef __ASSEMBLY__
-
-.macro kuep_update_sr	gpr1, gpr2		/* NEVER use r0 as gpr2 due to addis */
-101:	mtsrin	\gpr1, \gpr2
-	addi	\gpr1, \gpr1, 0x111		/* next VSID */
-	rlwinm	\gpr1, \gpr1, 0, 0xf0ffffff	/* clear VSID overflow */
-	addis	\gpr2, \gpr2, 0x1000		/* address of next segment */
-	bdnz	101b
-	isync
-.endm
-
-.macro kuep_lock	gpr1, gpr2
-#ifdef CONFIG_PPC_KUEP
-	li	\gpr1, NUM_USER_SEGMENTS
-	li	\gpr2, 0
-	mtctr	\gpr1
-	mfsrin	\gpr1, \gpr2
-	oris	\gpr1, \gpr1, SR_NX@h		/* set Nx */
-	kuep_update_sr \gpr1, \gpr2
-#endif
-.endm
-
-.macro kuep_unlock	gpr1, gpr2
-#ifdef CONFIG_PPC_KUEP
-	li	\gpr1, NUM_USER_SEGMENTS
-	li	\gpr2, 0
-	mtctr	\gpr1
-	mfsrin	\gpr1, \gpr2
-	rlwinm	\gpr1, \gpr1, 0, ~SR_NX		/* Clear Nx */
-	kuep_update_sr \gpr1, \gpr2
-#endif
-.endm
-
-#ifdef CONFIG_PPC_KUAP
-
-.macro kuap_update_sr	gpr1, gpr2, gpr3	/* NEVER use r0 as gpr2 due to addis */
-101:	mtsrin	\gpr1, \gpr2
-	addi	\gpr1, \gpr1, 0x111		/* next VSID */
-	rlwinm	\gpr1, \gpr1, 0, 0xf0ffffff	/* clear VSID overflow */
-	addis	\gpr2, \gpr2, 0x1000		/* address of next segment */
-	cmplw	\gpr2, \gpr3
-	blt-	101b
-	isync
-.endm
-
-.macro kuap_save_and_lock	sp, thread, gpr1, gpr2, gpr3
-	lwz	\gpr2, KUAP(\thread)
-	rlwinm.	\gpr3, \gpr2, 28, 0xf0000000
-	stw	\gpr2, STACK_REGS_KUAP(\sp)
-	beq+	102f
-	li	\gpr1, 0
-	stw	\gpr1, KUAP(\thread)
-	mfsrin	\gpr1, \gpr2
-	oris	\gpr1, \gpr1, SR_KS@h	/* set Ks */
-	kuap_update_sr	\gpr1, \gpr2, \gpr3
-102:
-.endm
-
-.macro kuap_restore	sp, current, gpr1, gpr2, gpr3
-	lwz	\gpr2, STACK_REGS_KUAP(\sp)
-	rlwinm.	\gpr3, \gpr2, 28, 0xf0000000
-	stw	\gpr2, THREAD + KUAP(\current)
-	beq+	102f
-	mfsrin	\gpr1, \gpr2
-	rlwinm	\gpr1, \gpr1, 0, ~SR_KS	/* Clear Ks */
-	kuap_update_sr	\gpr1, \gpr2, \gpr3
-102:
-.endm
-
-.macro kuap_check	current, gpr
-#ifdef CONFIG_PPC_KUAP_DEBUG
-	lwz	\gpr, THREAD + KUAP(\current)
-999:	twnei	\gpr, 0
-	EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
-#endif
-.endm
-
-#endif /* CONFIG_PPC_KUAP */
-
-#else /* !__ASSEMBLY__ */
+#ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PPC_KUAP
 
@@ -103,6 +24,51 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
 	isync();	/* Context sync required after mtsr() */
 }
 
+static inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+	unsigned long kuap = current->thread.kuap;
+	u32 addr = kuap & 0xf0000000;
+	u32 end = kuap << 28;
+
+	regs->kuap = kuap;
+	if (unlikely(!kuap))
+		return;
+
+	current->thread.kuap = 0;
+	kuap_update_sr(mfsr(addr) | SR_KS, addr, end);	/* Set Ks */
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+	u32 addr = regs->kuap & 0xf0000000;
+	u32 end = regs->kuap << 28;
+
+	current->thread.kuap = regs->kuap;
+
+	if (unlikely(regs->kuap == kuap))
+		return;
+
+	kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end);	/* Clear Ks */
+}
+
+static inline unsigned long kuap_get_and_assert_locked(void)
+{
+	unsigned long kuap = current->thread.kuap;
+
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0);
+
+	return kuap;
+}
+
+static inline void kuap_assert_locked(void)
+{
+	kuap_get_and_assert_locked();
+}
+
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
 					      u32 size, unsigned long dir)
 {
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 415ae29fa73a2f6f9d81f7fcea07642d29aaa6dc..83c65845a1a9d6062d8c916b8bb17627b1d5452a 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -194,10 +194,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
 #define VMALLOC_END	ioremap_bot
 #endif
 
-#ifdef CONFIG_STRICT_KERNEL_RWX
 #define MODULES_END	ALIGN_DOWN(PAGE_OFFSET, SZ_256M)
 #define MODULES_VADDR	(MODULES_END - SZ_256M)
-#endif
 
 #ifndef __ASSEMBLY__
 #include <linux/sched.h>
diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
index d941c06d4f2eaa57e4960d4f291dd7cfdc192764..ba1743c52b56d4bd6b9a94631a117dfd784fb699 100644
--- a/arch/powerpc/include/asm/book3s/32/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -79,4 +79,4 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 	flush_tlb_mm(mm);
 }
 
-#endif /* _ASM_POWERPC_TLBFLUSH_H */
+#endif /* _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index 8bd90505089624294d010f39a27f89b185176ebf..9700da3a40933b083ab9302cad8d03cbf94ee10e 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs *regs,
 	 */
 }
 
-static inline unsigned long kuap_get_and_check_amr(void)
+static inline unsigned long kuap_get_and_assert_locked(void)
 {
 	if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
 		unsigned long amr = mfspr(SPRN_AMR);
@@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void)
 	return 0;
 }
 
-#else /* CONFIG_PPC_PKEY */
-
-static inline void kuap_user_restore(struct pt_regs *regs)
-{
-}
-
-static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
-{
-}
-
-static inline unsigned long kuap_get_and_check_amr(void)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PPC_PKEY */
-
-
-#ifdef CONFIG_PPC_KUAP
-
-static inline void kuap_check_amr(void)
+static inline void kuap_assert_locked(void)
 {
 	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
 		WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index f911bdb68d8bb52b1df8011654ca1b6d55a65c5b..3004f3323144d1f2909d049f379e23c42ff8e423 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -18,7 +18,6 @@
  * complete pgtable.h but only a portion of it.
  */
 #include <asm/book3s/64/pgtable.h>
-#include <asm/bug.h>
 #include <asm/task_size_64.h>
 #include <asm/cpu_has_feature.h>
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 058601efbc8a3bf9df2110d250857f74ac8898cf..a666d561b44d2825e079dd7efa7f37b7b9f4f136 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -7,6 +7,7 @@
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
 #include <linux/bug.h>
+#include <linux/sizes.h>
 #endif
 
 /*
@@ -116,6 +117,7 @@
  */
 #define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
 #define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
+#define _PAGE_KERNEL_ROX	 (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
 #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY |	\
 				 _PAGE_RW | _PAGE_EXEC)
 /*
@@ -323,7 +325,8 @@ extern unsigned long pci_io_base;
 #define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
 #define IOREMAP_BASE	(PHB_IO_END)
 #define IOREMAP_START	(ioremap_bot)
-#define IOREMAP_END	(KERN_IO_END)
+#define IOREMAP_END	(KERN_IO_END - FIXADDR_SIZE)
+#define FIXADDR_SIZE	SZ_32M
 
 /* Advertise special mapping type for AGP */
 #define HAVE_PAGE_AGP
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index c7813dc628fc93aa6fbf273c0d69cf8b5c3ae7b8..59cab558e2f057069507909482f28288ef15c579 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -222,8 +222,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * from ptesync, it should probably go into update_mmu_cache, rather
 	 * than set_pte_at (which is used to set ptes unrelated to faults).
 	 *
-	 * Spurious faults to vmalloc region are not tolerated, so there is
-	 * a ptesync in flush_cache_vmap.
+	 * Spurious faults from the kernel memory are not tolerated, so there
+	 * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows
+	 * the pte update sequence from ISA Book III 6.10 Translation Table
+	 * Update Synchronization Requirements.
 	 */
 }
 
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index d1635ffbb179e7e0f21bd49398780fd8f38e576e..0b2162890d8b65874948b51e8c877853c80e562e 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -111,11 +111,8 @@
 #ifndef __ASSEMBLY__
 
 struct pt_regs;
-long do_page_fault(struct pt_regs *);
-long hash__do_page_fault(struct pt_regs *);
+void hash__do_page_fault(struct pt_regs *);
 void bad_page_fault(struct pt_regs *, int);
-void __bad_page_fault(struct pt_regs *regs, int sig);
-void do_bad_page_fault_segv(struct pt_regs *regs);
 extern void _exception(int, struct pt_regs *, int, unsigned long);
 extern void _exception_pkey(struct pt_regs *, unsigned long, int);
 extern void die(const char *, struct pt_regs *, long);
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index f63495109f634216039b0333c93070064ee153b0..7564dd4fd12b7e7c0db780621f027caf26a96773 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -30,7 +30,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+static inline void flush_dcache_page(struct page *page)
+{
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_dcache_clean, &page->flags))
+		clear_bit(PG_dcache_clean, &page->flags);
+}
 
 void flush_icache_range(unsigned long start, unsigned long stop);
 #define flush_icache_range flush_icache_range
@@ -40,7 +52,6 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
 #define flush_icache_user_page flush_icache_user_page
 
 void flush_dcache_icache_page(struct page *page);
-void __flush_dcache_icache(void *page);
 
 /**
  * flush_dcache_range(): Write any modified data cache blocks out to memory and
diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
index 2211b934ecb4eaee70fab1e16139f7abf0ebd48b..bda45788cfccaca2a5f45234f1e385841415d842 100644
--- a/arch/powerpc/include/asm/cpm2.h
+++ b/arch/powerpc/include/asm/cpm2.h
@@ -594,7 +594,7 @@ typedef struct fcc_enet {
 	uint	fen_p256c;	/* Total packets 256 < bytes <= 511 */
 	uint	fen_p512c;	/* Total packets 512 < bytes <= 1023 */
 	uint	fen_p1024c;	/* Total packets 1024 < bytes <= 1518 */
-	uint	fen_cambuf;	/* Internal CAM buffer poiner */
+	uint	fen_cambuf;	/* Internal CAM buffer pointer */
 	ushort	fen_rfthr;	/* Received frames threshold */
 	ushort	fen_rfcnt;	/* Received frames count */
 } fcc_enet_t;
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 8d03c16a3663548cf7fe9c80b629761803e89192..947b5b9c44241124ef3c00816041127d821a05e8 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -23,12 +23,17 @@
 #include <asm/kmap_size.h>
 #endif
 
+#ifdef CONFIG_PPC64
+#define FIXADDR_TOP	(IOREMAP_END + FIXADDR_SIZE)
+#else
+#define FIXADDR_SIZE	0
 #ifdef CONFIG_KASAN
 #include <asm/kasan.h>
 #define FIXADDR_TOP	(KASAN_SHADOW_START - PAGE_SIZE)
 #else
 #define FIXADDR_TOP	((unsigned long)(-PAGE_SIZE))
 #endif
+#endif
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -50,6 +55,7 @@
  */
 enum fixed_addresses {
 	FIX_HOLE,
+#ifdef CONFIG_PPC32
 	/* reserve the top 128K for early debugging purposes */
 	FIX_EARLY_DEBUG_TOP = FIX_HOLE,
 	FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1,
@@ -72,6 +78,7 @@ enum fixed_addresses {
 		       FIX_IMMR_SIZE,
 #endif
 	/* FIX_PCIE_MCFG, */
+#endif /* CONFIG_PPC32 */
 	__end_of_permanent_fixed_addresses,
 
 #define NR_FIX_BTMAPS		(SZ_256K / PAGE_SIZE)
@@ -98,6 +105,8 @@ enum fixed_addresses {
 static inline void __set_fixmap(enum fixed_addresses idx,
 				phys_addr_t phys, pgprot_t flags)
 {
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && __FIXADDR_SIZE > FIXADDR_SIZE);
+
 	if (__builtin_constant_p(idx))
 		BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
 	else if (WARN_ON(idx >= __end_of_fixed_addresses))
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index e93ee3202e4c174bad59e6c4559c312f87bcdf81..b3001f8b2c1e2028ed71537c4dfec72205d5a93d 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -33,9 +33,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 {
 	int oldval = 0, ret;
 
-	if (!access_ok(uaddr, sizeof(u32)))
+	if (!user_access_begin(uaddr, sizeof(u32)))
 		return -EFAULT;
-	allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -56,10 +55,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 	default:
 		ret = -ENOSYS;
 	}
+	user_access_end();
 
 	*oval = oldval;
 
-	prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
 	return ret;
 }
 
@@ -70,11 +69,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	int ret = 0;
 	u32 prev;
 
-	if (!access_ok(uaddr, sizeof(u32)))
+	if (!user_access_begin(uaddr, sizeof(u32)))
 		return -EFAULT;
 
-	allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
-
         __asm__ __volatile__ (
         PPC_ATOMIC_ENTRY_BARRIER
 "1:     lwarx   %1,0,%3         # futex_atomic_cmpxchg_inatomic\n\
@@ -93,8 +90,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
         : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
         : "cc", "memory");
 
+	user_access_end();
+
 	*uval = prev;
-	prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
 
         return ret;
 }
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index ed6086d57b22e14a337139d92a421c1953934929..4430509060185f4757a4c603f33b3b1db8b0d3e9 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -315,7 +315,8 @@
 #define H_SCM_HEALTH            0x400
 #define H_SCM_PERFORMANCE_STATS 0x418
 #define H_RPT_INVALIDATE	0x448
-#define MAX_HCALL_OPCODE	H_RPT_INVALIDATE
+#define H_SCM_FLUSH		0x44C
+#define MAX_HCALL_OPCODE	H_SCM_FLUSH
 
 /* Scope args for H_SCM_UNBIND_ALL */
 #define H_UNBIND_SCOPE_ALL (0x1)
@@ -389,6 +390,7 @@
 #define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
 #define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2
+#define H_CPU_BEHAV_FAVOUR_SECURITY_H	(1ull << 60) // IBM bit 3
 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58) // IBM bit 5
 #define H_CPU_BEHAV_FLUSH_LINK_STACK	(1ull << 57) // IBM bit 6
 
diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
index 999ed5ac9053163e999b831fc3e7cd29dcc6461c..ccb2034506f0f1e9971cd897b6860ba6a18330b8 100644
--- a/arch/powerpc/include/asm/hvconsole.h
+++ b/arch/powerpc/include/asm/hvconsole.h
@@ -24,5 +24,8 @@
 extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
 extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
 
+/* Provided by HVC VIO */
+void hvc_vio_init_early(void);
+
 #endif /* __KERNEL__ */
 #endif /* _PPC64_HVCONSOLE_H */
diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h
index ae02eb53d6ef82214fe01e41b4ab364283eb4804..d024447283a0cf3ca71980ab7881f9f7ee24e11e 100644
--- a/arch/powerpc/include/asm/hydra.h
+++ b/arch/powerpc/include/asm/hydra.h
@@ -94,8 +94,6 @@ extern volatile struct Hydra __iomem *Hydra;
 #define HYDRA_INT_EXT7		18	/* Power Off Request */
 #define HYDRA_INT_SPARE		19
 
-extern int hydra_init(void);
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASMPPC_HYDRA_H */
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index cc73c12675721c3e699dcc4056db785a5a04d6eb..268d3bd073c8ac0b93221992fcc1377d79eccd1d 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -4,6 +4,40 @@
 
 #include <asm/ppc-opcode.h>
 
+#ifdef CONFIG_PPC64
+
+#define ___get_user_instr(gu_op, dest, ptr)				\
+({									\
+	long __gui_ret = 0;						\
+	unsigned long __gui_ptr = (unsigned long)ptr;			\
+	struct ppc_inst __gui_inst;					\
+	unsigned int __prefix, __suffix;				\
+	__gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr);	\
+	if (__gui_ret == 0) {						\
+		if ((__prefix >> 26) == OP_PREFIX) {			\
+			__gui_ret = gu_op(__suffix,			\
+				(unsigned int __user *)__gui_ptr + 1);	\
+			__gui_inst = ppc_inst_prefix(__prefix,		\
+						     __suffix);		\
+		} else {						\
+			__gui_inst = ppc_inst(__prefix);		\
+		}							\
+		if (__gui_ret == 0)					\
+			(dest) = __gui_inst;				\
+	}								\
+	__gui_ret;							\
+})
+#else /* !CONFIG_PPC64 */
+#define ___get_user_instr(gu_op, dest, ptr)				\
+	gu_op((dest).val, (u32 __user *)(ptr))
+#endif /* CONFIG_PPC64 */
+
+#define get_user_instr(x, ptr) \
+	___get_user_instr(get_user, x, ptr)
+
+#define __get_user_instr(x, ptr) \
+	___get_user_instr(__get_user, x, ptr)
+
 /*
  * Instruction data type for POWER
  */
@@ -68,6 +102,8 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
 
 #define ppc_inst(x) ((struct ppc_inst){ .val = x })
 
+#define ppc_inst_prefix(x, y) ppc_inst(x)
+
 static inline bool ppc_inst_prefixed(struct ppc_inst x)
 {
 	return false;
@@ -113,13 +149,14 @@ static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *va
 	return location + ppc_inst_len(tmp);
 }
 
-static inline u64 ppc_inst_as_u64(struct ppc_inst x)
+static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x)
 {
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x);
-#else
-	return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
-#endif
+	if (IS_ENABLED(CONFIG_PPC32))
+		return ppc_inst_val(x);
+	else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+		return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x);
+	else
+		return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
 }
 
 #define PPC_INST_STR_LEN sizeof("00000000 00000000")
@@ -141,10 +178,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins
 	__str;				\
 })
 
-int probe_user_read_inst(struct ppc_inst *inst,
-			 struct ppc_inst __user *nip);
-
-int probe_kernel_read_inst(struct ppc_inst *inst,
-			   struct ppc_inst *src);
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src);
 
 #endif /* _ASM_POWERPC_INST_H */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index e8d09a841373b708c729e6ea4d4d63cf62654b48..44cde2e129b8833a6fe7dcd2272f3fc50f570756 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -2,6 +2,70 @@
 #ifndef _ASM_POWERPC_INTERRUPT_H
 #define _ASM_POWERPC_INTERRUPT_H
 
+/* BookE/4xx */
+#define INTERRUPT_CRITICAL_INPUT  0x100
+
+/* BookE */
+#define INTERRUPT_DEBUG           0xd00
+#ifdef CONFIG_BOOKE
+#define INTERRUPT_PERFMON         0x260
+#define INTERRUPT_DOORBELL        0x280
+#endif
+
+/* BookS/4xx/8xx */
+#define INTERRUPT_MACHINE_CHECK   0x200
+
+/* BookS/8xx */
+#define INTERRUPT_SYSTEM_RESET    0x100
+
+/* BookS */
+#define INTERRUPT_DATA_SEGMENT    0x380
+#define INTERRUPT_INST_SEGMENT    0x480
+#define INTERRUPT_TRACE           0xd00
+#define INTERRUPT_H_DATA_STORAGE  0xe00
+#define INTERRUPT_HMI			0xe60
+#define INTERRUPT_H_FAC_UNAVAIL   0xf80
+#ifdef CONFIG_PPC_BOOK3S
+#define INTERRUPT_DOORBELL        0xa00
+#define INTERRUPT_PERFMON         0xf00
+#define INTERRUPT_ALTIVEC_UNAVAIL	0xf20
+#endif
+
+/* BookE/BookS/4xx/8xx */
+#define INTERRUPT_DATA_STORAGE    0x300
+#define INTERRUPT_INST_STORAGE    0x400
+#define INTERRUPT_EXTERNAL		0x500
+#define INTERRUPT_ALIGNMENT       0x600
+#define INTERRUPT_PROGRAM         0x700
+#define INTERRUPT_SYSCALL         0xc00
+#define INTERRUPT_TRACE			0xd00
+
+/* BookE/BookS/44x */
+#define INTERRUPT_FP_UNAVAIL      0x800
+
+/* BookE/BookS/44x/8xx */
+#define INTERRUPT_DECREMENTER     0x900
+
+#ifndef INTERRUPT_PERFMON
+#define INTERRUPT_PERFMON         0x0
+#endif
+
+/* 8xx */
+#define INTERRUPT_SOFT_EMU_8xx		0x1000
+#define INTERRUPT_INST_TLB_MISS_8xx	0x1100
+#define INTERRUPT_DATA_TLB_MISS_8xx	0x1200
+#define INTERRUPT_INST_TLB_ERROR_8xx	0x1300
+#define INTERRUPT_DATA_TLB_ERROR_8xx	0x1400
+#define INTERRUPT_DATA_BREAKPOINT_8xx	0x1c00
+#define INTERRUPT_INST_BREAKPOINT_8xx	0x1d00
+
+/* 603 */
+#define INTERRUPT_INST_TLB_MISS_603		0x1000
+#define INTERRUPT_DATA_LOAD_TLB_MISS_603	0x1100
+#define INTERRUPT_DATA_STORE_TLB_MISS_603	0x1200
+
+#ifndef __ASSEMBLY__
+
 #include <linux/context_tracking.h>
 #include <linux/hardirq.h>
 #include <asm/cputime.h>
@@ -9,10 +73,18 @@
 #include <asm/kprobes.h>
 #include <asm/runlatch.h>
 
-struct interrupt_state {
-#ifdef CONFIG_PPC_BOOK3E_64
-	enum ctx_state ctx_state;
+static inline void nap_adjust_return(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_970_NAP
+	if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+		/* Can avoid a test-and-clear because NMIs do not call this */
+		clear_thread_local_flags(_TLF_NAPPING);
+		regs->nip = (unsigned long)power4_idle_nap_return;
+	}
 #endif
+}
+
+struct interrupt_state {
 };
 
 static inline void booke_restore_dbcr0(void)
@@ -29,10 +101,19 @@ static inline void booke_restore_dbcr0(void)
 
 static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
-	/*
-	 * Book3E reconciles irq soft mask in asm
-	 */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC32
+	if (!arch_irq_disabled_regs(regs))
+		trace_hardirqs_off();
+
+	if (user_mode(regs)) {
+		kuep_lock();
+		account_cpu_user_entry();
+	} else {
+		kuap_save_and_lock(regs);
+	}
+#endif
+
+#ifdef CONFIG_PPC64
 	if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
 		trace_hardirqs_off();
 	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
@@ -48,16 +129,12 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 		 * CT_WARN_ON comes here via program_check_exception,
 		 * so avoid recursion.
 		 */
-		if (TRAP(regs) != 0x700)
+		if (TRAP(regs) != INTERRUPT_PROGRAM)
 			CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
 	}
 #endif
 
-#ifdef CONFIG_PPC_BOOK3E_64
-	state->ctx_state = exception_enter();
-	if (user_mode(regs))
-		account_cpu_user_entry();
-#endif
+	booke_restore_dbcr0();
 }
 
 /*
@@ -76,23 +153,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
  */
 static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
-#ifdef CONFIG_PPC_BOOK3E_64
-	exception_exit(state->ctx_state);
-#endif
-
-	/*
-	 * Book3S exits to user via interrupt_exit_user_prepare(), which does
-	 * context tracking, which is a cleaner way to handle PREEMPT=y
-	 * and avoid context entry/exit in e.g., preempt_schedule_irq()),
-	 * which is likely to be where the core code wants to end up.
-	 *
-	 * The above comment explains why we can't do the
-	 *
-	 *     if (user_mode(regs))
-	 *         user_exit_irqoff();
-	 *
-	 * sequence here.
-	 */
+	if (user_mode(regs))
+		kuep_unlock();
 }
 
 static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
@@ -109,24 +171,46 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in
 
 static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
+	/*
+	 * Adjust at exit so the main handler sees the true NIA. This must
+	 * come before irq_exit() because irq_exit can enable interrupts, and
+	 * if another interrupt is taken before nap_adjust_return has run
+	 * here, then that interrupt would return directly to idle nap return.
+	 */
+	nap_adjust_return(regs);
+
 	irq_exit();
 	interrupt_exit_prepare(regs, state);
 }
 
 struct interrupt_nmi_state {
 #ifdef CONFIG_PPC64
-#ifdef CONFIG_PPC_BOOK3S_64
 	u8 irq_soft_mask;
 	u8 irq_happened;
-#endif
 	u8 ftrace_enabled;
 #endif
 };
 
+static inline bool nmi_disables_ftrace(struct pt_regs *regs)
+{
+	/* Allow DEC and PMI to be traced when they are soft-NMI */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+		if (TRAP(regs) == INTERRUPT_DECREMENTER)
+		       return false;
+		if (TRAP(regs) == INTERRUPT_PERFMON)
+		       return false;
+	}
+	if (IS_ENABLED(CONFIG_PPC_BOOK3E)) {
+		if (TRAP(regs) == INTERRUPT_PERFMON)
+			return false;
+	}
+
+	return true;
+}
+
 static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
 {
 #ifdef CONFIG_PPC64
-#ifdef CONFIG_PPC_BOOK3S_64
 	state->irq_soft_mask = local_paca->irq_soft_mask;
 	state->irq_happened = local_paca->irq_happened;
 
@@ -139,9 +223,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
 	/* Don't do any per-CPU operations until interrupt state is fixed */
-#endif
-	/* Allow DEC and PMI to be traced when they are soft-NMI */
-	if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) {
+
+	if (nmi_disables_ftrace(regs)) {
 		state->ftrace_enabled = this_cpu_get_ftrace_enabled();
 		this_cpu_set_ftrace_enabled(0);
 	}
@@ -164,17 +247,20 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
 			radix_enabled() || (mfmsr() & MSR_DR))
 		nmi_exit();
 
+	/*
+	 * nmi does not call nap_adjust_return because nmi should not create
+	 * new work to do (must use irq_work for that).
+	 */
+
 #ifdef CONFIG_PPC64
-	if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260)
+	if (nmi_disables_ftrace(regs))
 		this_cpu_set_ftrace_enabled(state->ftrace_enabled);
 
-#ifdef CONFIG_PPC_BOOK3S_64
 	/* Check we didn't change the pending interrupt mask. */
 	WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
 	local_paca->irq_happened = state->irq_happened;
 	local_paca->irq_soft_mask = state->irq_soft_mask;
 #endif
-#endif
 }
 
 /*
@@ -387,6 +473,7 @@ DECLARE_INTERRUPT_HANDLER(SMIException);
 DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
 DECLARE_INTERRUPT_HANDLER(unknown_exception);
 DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception);
+DECLARE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception);
 DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception);
 DECLARE_INTERRUPT_HANDLER(RunModeException);
 DECLARE_INTERRUPT_HANDLER(single_step_exception);
@@ -410,7 +497,7 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
 DECLARE_INTERRUPT_HANDLER(CacheLockingException);
 DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException);
 DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException);
-DECLARE_INTERRUPT_HANDLER(WatchdogException);
+DECLARE_INTERRUPT_HANDLER_NMI(WatchdogException);
 DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
 
 /* slb.c */
@@ -421,7 +508,7 @@ DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault);
 DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault);
 
 /* fault.c */
-DECLARE_INTERRUPT_HANDLER_RET(do_page_fault);
+DECLARE_INTERRUPT_HANDLER(do_page_fault);
 DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv);
 
 /* process.c */
@@ -436,7 +523,7 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
 
 DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
 
-void unrecoverable_exception(struct pt_regs *regs);
+void __noreturn unrecoverable_exception(struct pt_regs *regs);
 
 void replay_system_reset(void);
 void replay_soft_interrupts(void);
@@ -447,4 +534,6 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
 		local_irq_enable();
 }
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_POWERPC_INTERRUPT_H */
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index f3f264e441a79e9569ede340c8c5dd3a45cbd697..b2bd588304300fb706b85c896a6f473095ff4593 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -53,8 +53,6 @@ extern void *mcheckirq_ctx[NR_CPUS];
 extern void *hardirq_ctx[NR_CPUS];
 extern void *softirq_ctx[NR_CPUS];
 
-void call_do_softirq(void *sp);
-void call_do_irq(struct pt_regs *regs, void *sp);
 extern void do_IRQ(struct pt_regs *regs);
 extern void __init init_IRQ(void);
 extern void __do_irq(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 09297ec9fa52718c470e86088a0937dafb0a2707..2d5c6bec2b4f3368b9bae00358c2fce7a7cd5b88 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -20,7 +20,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
 	asm_volatile_goto("1:\n\t"
 		 "nop # arch_static_branch\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
-		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
+		 ".long 1b - ., %l[l_yes] - .\n\t"
+		 JUMP_ENTRY_TYPE "%c0 - .\n\t"
 		 ".popsection \n\t"
 		 : :  "i" (&((char *)key)[branch]) : : l_yes);
 
@@ -34,7 +35,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
 	asm_volatile_goto("1:\n\t"
 		 "b %l[l_yes] # arch_static_branch_jump\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
-		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
+		 ".long 1b - ., %l[l_yes] - .\n\t"
+		 JUMP_ENTRY_TYPE "%c0 - .\n\t"
 		 ".popsection \n\t"
 		 : :  "i" (&((char *)key)[branch]) : : l_yes);
 
@@ -43,23 +45,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
 	return true;
 }
 
-#ifdef CONFIG_PPC64
-typedef u64 jump_label_t;
-#else
-typedef u32 jump_label_t;
-#endif
-
-struct jump_entry {
-	jump_label_t code;
-	jump_label_t target;
-	jump_label_t key;
-};
-
 #else
 #define ARCH_STATIC_BRANCH(LABEL, KEY)		\
 1098:	nop;					\
 	.pushsection __jump_table, "aw";	\
-	FTR_ENTRY_LONG 1098b, LABEL, KEY;	\
+	.long 1098b - ., LABEL - .;		\
+	FTR_ENTRY_LONG KEY;			\
 	.popsection
 #endif
 
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 7355ed05e65edab1965a960ebecefb1513b77c8b..3c478e5ef24c2a4488bf6afaefb8099b05c445b4 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -19,7 +19,7 @@
 
 #define KASAN_SHADOW_SCALE_SHIFT	3
 
-#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX)
+#ifdef CONFIG_MODULES
 #define KASAN_KERN_START	ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
 #else
 #define KASAN_KERN_START	PAGE_OFFSET
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
new file mode 100644
index 0000000000000000000000000000000000000000..a9846b68c6b9eab427578c342cb8f87590492435
--- /dev/null
+++ b/arch/powerpc/include/asm/kfence.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * powerpc KFENCE support.
+ *
+ * Copyright (C) 2020 CS GROUP France
+ */
+
+#ifndef __ASM_POWERPC_KFENCE_H
+#define __ASM_POWERPC_KFENCE_H
+
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+	return true;
+}
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+	pte_t *kpte = virt_to_kpte(addr);
+
+	if (protect) {
+		pte_update(&init_mm, addr, kpte, _PAGE_PRESENT, 0, 0);
+		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+	} else {
+		pte_update(&init_mm, addr, kpte, 0, _PAGE_PRESENT, 0);
+	}
+
+	return true;
+}
+
+#endif /* __ASM_POWERPC_KFENCE_H */
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 7ec21af49a45c5b3e0588bd291f1bc7192bcb123..ec96232529ac27f3d0e29a16a7df7b14d3c588a3 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -28,15 +28,6 @@
 
 #ifdef __ASSEMBLY__
 #ifndef CONFIG_PPC_KUAP
-.macro kuap_save_and_lock	sp, thread, gpr1, gpr2, gpr3
-.endm
-
-.macro kuap_restore	sp, current, gpr1, gpr2, gpr3
-.endm
-
-.macro kuap_check	current, gpr
-.endm
-
 .macro kuap_check_amr	gpr1, gpr2
 .endm
 
@@ -55,6 +46,14 @@ void setup_kuep(bool disabled);
 static inline void setup_kuep(bool disabled) { }
 #endif /* CONFIG_PPC_KUEP */
 
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+void kuep_lock(void);
+void kuep_unlock(void);
+#else
+static inline void kuep_lock(void) { }
+static inline void kuep_unlock(void) { }
+#endif
+
 #ifdef CONFIG_PPC_KUAP
 void setup_kuap(bool disabled);
 #else
@@ -66,7 +65,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 	return false;
 }
 
-static inline void kuap_check_amr(void) { }
+static inline void kuap_assert_locked(void) { }
+static inline void kuap_save_and_lock(struct pt_regs *regs) { }
+static inline void kuap_user_restore(struct pt_regs *regs) { }
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
+
+static inline unsigned long kuap_get_and_assert_locked(void)
+{
+	return 0;
+}
 
 /*
  * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 2f5f919f6cd302646827509179d802461efa2a2b..c581215081570a1b73b88f072b895ed54b67e84e 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -258,6 +258,8 @@ extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
 extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
 			struct kvm_memory_slot *memslot,
 			unsigned long *map);
+extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm,
+			unsigned long lpcr);
 extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
 			unsigned long mask);
 extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 8aacd76bb702b4b8a192f98e5a23abb909490d74..9531b1c1b1900084a23f9b64c02f48bb2dd5ae20 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -767,8 +767,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
                      unsigned long pte_index, unsigned long avpn);
 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
-                      unsigned long pte_index, unsigned long avpn,
-                      unsigned long va);
+                      unsigned long pte_index, unsigned long avpn);
 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
                    unsigned long pte_index);
 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 652ce85f9410a8840d842389d80e7b6b501f3cb7..4bc45d3ed8b0e8a8d0fb02e7342fecbf76d70ee4 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -263,7 +263,7 @@ extern void arch_exit_mmap(struct mm_struct *mm);
 static inline void arch_unmap(struct mm_struct *mm,
 			      unsigned long start, unsigned long end)
 {
-	unsigned long vdso_base = (unsigned long)mm->context.vdso - PAGE_SIZE;
+	unsigned long vdso_base = (unsigned long)mm->context.vdso;
 
 	if (start <= vdso_base && vdso_base < end)
 		mm->context.vdso = NULL;
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 17a4a616436ffa51a43bdd8a515546e33356707f..295ef563960932cd52e93a599c3c0d730af3dad3 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -7,33 +7,41 @@
 
 #ifdef CONFIG_PPC_KUAP
 
-#ifdef __ASSEMBLY__
-
-.macro kuap_save_and_lock	sp, thread, gpr1, gpr2, gpr3
-	lis	\gpr2, MD_APG_KUAP@h	/* only APG0 and APG1 are used */
-	mfspr	\gpr1, SPRN_MD_AP
-	mtspr	SPRN_MD_AP, \gpr2
-	stw	\gpr1, STACK_REGS_KUAP(\sp)
-.endm
-
-.macro kuap_restore	sp, current, gpr1, gpr2, gpr3
-	lwz	\gpr1, STACK_REGS_KUAP(\sp)
-	mtspr	SPRN_MD_AP, \gpr1
-.endm
-
-.macro kuap_check	current, gpr
-#ifdef CONFIG_PPC_KUAP_DEBUG
-	mfspr	\gpr, SPRN_MD_AP
-	rlwinm	\gpr, \gpr, 16, 0xffff
-999:	twnei	\gpr, MD_APG_KUAP@h
-	EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
-#endif
-.endm
-
-#else /* !__ASSEMBLY__ */
+#ifndef __ASSEMBLY__
 
 #include <asm/reg.h>
 
+static inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+	regs->kuap = mfspr(SPRN_MD_AP);
+	mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+	mtspr(SPRN_MD_AP, regs->kuap);
+}
+
+static inline unsigned long kuap_get_and_assert_locked(void)
+{
+	unsigned long kuap = mfspr(SPRN_MD_AP);
+
+	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+		WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16);
+
+	return kuap;
+}
+
+static inline void kuap_assert_locked(void)
+{
+	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+		kuap_get_and_assert_locked();
+}
+
 static inline void allow_user_access(void __user *to, const void __user *from,
 				     unsigned long size, unsigned long dir)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 478249959baae4a8d74426107e98c26a640e404d..6e4faa0a9b35b77a67c5a4ce0bbb023c6ccdac59 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -172,6 +172,9 @@
 
 #define mmu_linear_psize	MMU_PAGE_8M
 
+#define MODULES_VADDR	(PAGE_OFFSET - SZ_256M)
+#define MODULES_END	PAGE_OFFSET
+
 #ifndef __ASSEMBLY__
 
 #include <linux/mmdebug.h>
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 6cb8aa35719176a297218f94e6ef089351e5bd77..57cd3892bfe05237231e929fb1284612d9d13cbe 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -6,6 +6,8 @@
  * the ppc64 non-hashed page table.
  */
 
+#include <linux/sizes.h>
+
 #include <asm/nohash/64/pgtable-4k.h>
 #include <asm/barrier.h>
 #include <asm/asm-const.h>
@@ -54,7 +56,8 @@
 #define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
 #define IOREMAP_BASE	(PHB_IO_END)
 #define IOREMAP_START	(ioremap_bot)
-#define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE)
+#define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE - FIXADDR_SIZE)
+#define FIXADDR_SIZE	SZ_32M
 
 
 /*
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9986ac34b8e224fd28bb893cc8303e2f584a3483..c76157237e22babbef8d9f9ba13e6260c636fde2 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -307,7 +307,7 @@ int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data,
 
 s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
 s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
-s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr);
+s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *addr);
 
 s64 opal_signal_system_reset(s32 cpu);
 s64 opal_quiesce(u64 shutdown_type, s32 cpu);
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 00e7e671bb4ba9048652763db40aeefb966f222a..f4c3428e816bd46f6c265f7284c421034308bd81 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -43,7 +43,7 @@ struct power_pmu {
 				u64 alt[]);
 	void		(*get_mem_data_src)(union perf_mem_data_src *dsrc,
 				u32 flags, struct pt_regs *regs);
-	void		(*get_mem_weight)(u64 *weight);
+	void		(*get_mem_weight)(u64 *weight, u64 type);
 	unsigned long	group_constraint_mask;
 	unsigned long	group_constraint_val;
 	u64             (*bhrb_filter_map)(u64 branch_sample_type);
@@ -67,6 +67,12 @@ struct power_pmu {
 	 * the pmu supports extended perf regs capability
 	 */
 	int		capabilities;
+	/*
+	 * Function to check event code for values which are
+	 * reserved. Function takes struct perf_event as input,
+	 * since event code could be spread in attr.config*
+	 */
+	int		(*check_attr_config)(struct perf_event *ev);
 };
 
 /*
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 4eed82172e3348efa832c5ad7ffbcf04909e195a..c6a676714f04099b6fecd66cf0c1bf034d83daa2 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -41,8 +41,6 @@ struct mm_struct;
 
 #ifndef __ASSEMBLY__
 
-#include <asm/tlbflush.h>
-
 /* Keep these as a macros to avoid include dependency mess */
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ed161ef2b3ca68c3259f6e8e4f457b785c56cf76..ac41776661e96300f5f2d66c1094f9800db2eb63 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -265,6 +265,7 @@
 #define PPC_INST_ORI			0x60000000
 #define PPC_INST_ORIS			0x64000000
 #define PPC_INST_BRANCH			0x48000000
+#define PPC_INST_BL			0x48000001
 #define PPC_INST_BRANCH_COND		0x40800000
 
 /* Prefixes */
@@ -437,6 +438,9 @@
 #define PPC_RAW_STFDX(s, a, b)		(0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_LVX(t, a, b)		(0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_STVX(s, a, b)		(0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDE(t, a, b)		(0x7c000114 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDZE(t, a)		(0x7c000194 | ___PPC_RT(t) | ___PPC_RA(a))
+#define PPC_RAW_ADDME(t, a)		(0x7c0001d4 | ___PPC_RT(t) | ___PPC_RA(a))
 #define PPC_RAW_ADD(t, a, b)		(PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_ADD_DOT(t, a, b)	(PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
 #define PPC_RAW_ADDC(t, a, b)		(0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
@@ -445,11 +449,14 @@
 #define PPC_RAW_BLR()			(PPC_INST_BLR)
 #define PPC_RAW_BLRL()			(0x4e800021)
 #define PPC_RAW_MTLR(r)			(0x7c0803a6 | ___PPC_RT(r))
+#define PPC_RAW_MFLR(t)			(PPC_INST_MFLR | ___PPC_RT(t))
 #define PPC_RAW_BCTR()			(PPC_INST_BCTR)
 #define PPC_RAW_MTCTR(r)		(PPC_INST_MTCTR | ___PPC_RT(r))
 #define PPC_RAW_ADDI(d, a, i)		(PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_LI(r, i)		PPC_RAW_ADDI(r, 0, i)
 #define PPC_RAW_ADDIS(d, a, i)		(PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC(d, a, i)		(0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC_DOT(d, a, i)	(0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_LIS(r, i)		PPC_RAW_ADDIS(r, 0, i)
 #define PPC_RAW_STDX(r, base, b)	(0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b))
 #define PPC_RAW_STDU(r, base, i)	(0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc))
@@ -472,6 +479,10 @@
 #define PPC_RAW_CMPLW(a, b)		(0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_CMPLD(a, b)		(0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_SUB(d, a, b)		(0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b))
+#define PPC_RAW_SUBFC(d, a, b)		(0x7c000010 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFE(d, a, b)		(0x7c000110 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFIC(d, a, i)		(0x20000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_SUBFZE(d, a)		(0x7c000190 | ___PPC_RT(d) | ___PPC_RA(a))
 #define PPC_RAW_MULD(d, a, b)		(0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_MULW(d, a, b)		(0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_MULHWU(d, a, b)		(0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
@@ -484,11 +495,13 @@
 #define PPC_RAW_DIVDEU_DOT(t, a, b)	(0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
 #define PPC_RAW_AND(d, a, b)		(0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_ANDI(d, a, i)		(0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ANDIS(d, a, i)		(0x74000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_AND_DOT(d, a, b)	(0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_OR(d, a, b)		(0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_MR(d, a)		PPC_RAW_OR(d, a, a)
 #define PPC_RAW_ORI(d, a, i)		(PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_ORIS(d, a, i)		(PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_NOR(d, a, b)		(0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_XOR(d, a, b)		(0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_XORI(d, a, i)		(0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_XORIS(d, a, i)		(0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 3dceb64fc9af24855d76b6aa9df7a99cb78217c9..d6739d700f0a3bd75afc834fe42c0e7a7c30b677 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -15,36 +15,6 @@
 
 #define SZL			(BITS_PER_LONG/8)
 
-/*
- * Stuff for accurate CPU time accounting.
- * These macros handle transitions between user and system state
- * in exception entry and exit and accumulate time to the
- * user_time and system_time fields in the paca.
- */
-
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)
-#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)
-#else
-#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)				\
-	MFTB(ra);			/* get timebase */		\
-	PPC_LL	rb, ACCOUNT_STARTTIME_USER(ptr);			\
-	PPC_STL	ra, ACCOUNT_STARTTIME(ptr);				\
-	subf	rb,rb,ra;		/* subtract start value */	\
-	PPC_LL	ra, ACCOUNT_USER_TIME(ptr);				\
-	add	ra,ra,rb;		/* add on to user time */	\
-	PPC_STL	ra, ACCOUNT_USER_TIME(ptr);				\
-
-#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)				\
-	MFTB(ra);			/* get timebase */		\
-	PPC_LL	rb, ACCOUNT_STARTTIME(ptr);				\
-	PPC_STL	ra, ACCOUNT_STARTTIME_USER(ptr);			\
-	subf	rb,rb,ra;		/* subtract start value */	\
-	PPC_LL	ra, ACCOUNT_SYSTEM_TIME(ptr);				\
-	add	ra,ra,rb;		/* add on to system time */	\
-	PPC_STL	ra, ACCOUNT_SYSTEM_TIME(ptr)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-
 /*
  * Macros for storing registers into and loading registers from
  * exception frames.
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8acc3590c9712b6f636d9d0c1d6ef68db962d2a3..7bf8a15af22469dca1ff15e340639b6816f83922 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -144,15 +144,12 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_PPC32
 	void		*pgdir;		/* root of page-table tree */
-	unsigned long	ksp_limit;	/* if ksp <= ksp_limit stack overflow */
 #ifdef CONFIG_PPC_RTAS
 	unsigned long	rtas_sp;	/* stack pointer for when in RTAS */
 #endif
-#endif
 #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
 	unsigned long	kuap;		/* opened segments for user access */
 #endif
-#ifdef CONFIG_VMAP_STACK
 	unsigned long	srr0;
 	unsigned long	srr1;
 	unsigned long	dar;
@@ -161,7 +158,7 @@ struct thread_struct {
 	unsigned long	r0, r3, r4, r5, r6, r8, r9, r11;
 	unsigned long	lr, ctr;
 #endif
-#endif
+#endif /* CONFIG_PPC32 */
 	/* Debug Registers */
 	struct debug_reg debug;
 #ifdef CONFIG_PPC_FPU_REGS
@@ -282,7 +279,6 @@ struct thread_struct {
 #ifdef CONFIG_PPC32
 #define INIT_THREAD { \
 	.ksp = INIT_SP, \
-	.ksp_limit = INIT_SP_LIMIT, \
 	.pgdir = swapper_pg_dir, \
 	.fpexc_mode = MSR_FE0 | MSR_FE1, \
 	SPEFSCR_INIT \
@@ -393,6 +389,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
 extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
 #ifdef CONFIG_PPC_970_NAP
 extern void power4_idle_nap(void);
+void power4_idle_nap_return(void);
 #endif
 
 extern unsigned long cpuidle_disable;
@@ -417,6 +414,8 @@ extern int fix_alignment(struct pt_regs *);
 #define NET_IP_ALIGN	0
 #endif
 
+int do_mathemu(struct pt_regs *regs);
+
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PROCESSOR_H */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 1499e928ea6a62663b44d8116c2114a974e4b235..9c9ab27461683bbfa904fbabc06d775a56078df0 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -185,44 +185,27 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 #define current_pt_regs() \
 	((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1)
 
+/*
+ * The 4 low bits (0xf) are available as flags to overload the trap word,
+ * because interrupt vectors have minimum alignment of 0x10. TRAP_FLAGS_MASK
+ * must cover the bits used as flags, including bit 0 which is used as the
+ * "norestart" bit.
+ */
 #ifdef __powerpc64__
-#ifdef CONFIG_PPC_BOOK3S
-#define TRAP_FLAGS_MASK		0x10
-#define TRAP(regs)		((regs)->trap & ~TRAP_FLAGS_MASK)
-#define FULL_REGS(regs)		true
-#define SET_FULL_REGS(regs)	do { } while (0)
-#else
-#define TRAP_FLAGS_MASK		0x11
-#define TRAP(regs)		((regs)->trap & ~TRAP_FLAGS_MASK)
-#define FULL_REGS(regs)		(((regs)->trap & 1) == 0)
-#define SET_FULL_REGS(regs)	((regs)->trap &= ~1)
-#endif
-#define CHECK_FULL_REGS(regs)	BUG_ON(!FULL_REGS(regs))
-#define NV_REG_POISON		0xdeadbeefdeadbeefUL
+#define TRAP_FLAGS_MASK		0x1
 #else
 /*
- * We use the least-significant bit of the trap field to indicate
- * whether we have saved the full set of registers, or only a
- * partial set.  A 1 there means the partial set.
- * On 4xx we use the next bit to indicate whether the exception
+ * On 4xx we use bit 1 in the trap word to indicate whether the exception
  * is a critical exception (1 means it is).
  */
-#define TRAP_FLAGS_MASK		0x1F
-#define TRAP(regs)		((regs)->trap & ~TRAP_FLAGS_MASK)
-#define FULL_REGS(regs)		(((regs)->trap & 1) == 0)
-#define SET_FULL_REGS(regs)	((regs)->trap &= ~1)
+#define TRAP_FLAGS_MASK		0xf
 #define IS_CRITICAL_EXC(regs)	(((regs)->trap & 2) != 0)
 #define IS_MCHECK_EXC(regs)	(((regs)->trap & 4) != 0)
 #define IS_DEBUG_EXC(regs)	(((regs)->trap & 8) != 0)
-#define NV_REG_POISON		0xdeadbeef
-#define CHECK_FULL_REGS(regs)						      \
-do {									      \
-	if ((regs)->trap & 1)						      \
-		printk(KERN_CRIT "%s: partial register set\n", __func__); \
-} while (0)
 #endif /* __powerpc64__ */
+#define TRAP(regs)		((regs)->trap & ~TRAP_FLAGS_MASK)
 
-static inline void set_trap(struct pt_regs *regs, unsigned long val)
+static __always_inline void set_trap(struct pt_regs *regs, unsigned long val)
 {
 	regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK);
 }
@@ -244,12 +227,12 @@ static inline bool trap_is_syscall(struct pt_regs *regs)
 
 static inline bool trap_norestart(struct pt_regs *regs)
 {
-	return regs->trap & 0x10;
+	return regs->trap & 0x1;
 }
 
-static inline void set_trap_norestart(struct pt_regs *regs)
+static __always_inline void set_trap_norestart(struct pt_regs *regs)
 {
-	regs->trap |= 0x10;
+	regs->trap |= 0x1;
 }
 
 #define arch_has_single_step()	(1)
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index b752d34517b39d37977746fb4f055629913112d9..07318bc63e3d03b32507d604daaae3d5e94bc065 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -44,20 +44,6 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 }
 #define queued_spin_lock queued_spin_lock
 
-#define smp_mb__after_spinlock()   smp_mb()
-
-static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
-{
-	/*
-	 * This barrier was added to simple spinlocks by commit 51d7d5205d338,
-	 * but it should now be possible to remove it, asm arm64 has done with
-	 * commit c6f5d02b6a0f.
-	 */
-	smp_mb();
-	return atomic_read(&lock->val);
-}
-#define queued_spin_is_locked queued_spin_is_locked
-
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 #define SPIN_THRESHOLD (1<<15) /* not tuned */
 
@@ -86,6 +72,13 @@ static inline void pv_spinlocks_init(void)
 
 #endif
 
+/*
+ * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait,
+ * which was found to have performance problems if implemented with
+ * the preferred spin_begin()/spin_end() SMT priority pattern. Use the
+ * generic version instead.
+ */
+
 #include <asm-generic/qspinlock.h>
 
 #endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index da103e92c1126aa4e6e2410cfa3a510bd8c5624b..7c81d3e563b28e94b8d5e1e4e56fdef0f063b302 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -124,7 +124,7 @@
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
 #else
-#define MSR_TM_ACTIVE(x) 0
+#define MSR_TM_ACTIVE(x) ((void)(x), 0)
 #endif
 
 #if defined(CONFIG_PPC_BOOK3S_64)
@@ -441,6 +441,7 @@
 #define   LPCR_VRMA_LP1		ASM_CONST(0x0000800000000000)
 #define   LPCR_RMLS		0x1C000000	/* Implementation dependent RMO limit sel */
 #define   LPCR_RMLS_SH		26
+#define   LPCR_HAIL		ASM_CONST(0x0000000004000000)   /* HV AIL (ISAv3.1) */
 #define   LPCR_ILE		ASM_CONST(0x0000000002000000)   /* !HV irqs set MSR:LE */
 #define   LPCR_AIL		ASM_CONST(0x0000000001800000)	/* Alternate interrupt location */
 #define   LPCR_AIL_0		ASM_CONST(0x0000000000000000)	/* MMU off exception offset 0x0 */
@@ -1393,8 +1394,7 @@ static inline void mtmsr_isync(unsigned long val)
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
 #endif
-#define wrtspr(rn)	asm volatile("mtspr " __stringify(rn) ",0" : \
-				     : : "memory")
+#define wrtspr(rn)	asm volatile("mtspr " __stringify(rn) ",2" : : : "memory")
 
 static inline void wrtee(unsigned long val)
 {
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 658448ca5b8ae97e65914f9e73fd07e938c81fb4..9dc97d2f9d27e09d56917db552a89b72b8a0fe3b 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -19,8 +19,8 @@
 #define RTAS_UNKNOWN_SERVICE (-1)
 #define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */
 
-/* Buffer size for ppc_rtas system call. */
-#define RTAS_RMOBUF_MAX (64 * 1024)
+/* Memory set aside for sys_rtas to use with calls that need a work area. */
+#define RTAS_USER_REGION_SIZE (64 * 1024)
 
 /* RTAS return status codes */
 #define RTAS_BUSY		-2    /* RTAS Busy */
@@ -357,7 +357,7 @@ extern void rtas_take_timebase(void);
 static inline int page_is_rtas_user_buf(unsigned long pfn)
 {
 	unsigned long paddr = (pfn << PAGE_SHIFT);
-	if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_RMOBUF_MAX))
+	if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_USER_REGION_SIZE))
 		return 1;
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
index 5b862de29dff6c4316a17823e3b66a758814f654..552f325412cc7e5eaf01b27c914bdba6bd6bab26 100644
--- a/arch/powerpc/include/asm/simple_spinlock.h
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -38,8 +38,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	smp_mb();
-	return !arch_spin_value_unlocked(*lock);
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
 /*
@@ -282,7 +281,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 #define arch_read_relax(lock)	rw_yield(lock)
 #define arch_write_relax(lock)	rw_yield(lock)
 
-/* See include/linux/spinlock.h */
-#define smp_mb__after_spinlock()   smp_mb()
-
 #endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 7a13bc20f0a0ce3ef95a6eb9921b40a3a2ba2e69..03b3d010cbab667a48eef50c535859b468db5fae 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id;
 extern bool coregroup_enabled;
 
 extern int cpu_to_chip_id(int cpu);
+extern int *chip_id_lookup_table;
 
 #ifdef CONFIG_SMP
 
@@ -121,6 +122,11 @@ static inline struct cpumask *cpu_sibling_mask(int cpu)
 	return per_cpu(cpu_sibling_map, cpu);
 }
 
+static inline struct cpumask *cpu_core_mask(int cpu)
+{
+	return per_cpu(cpu_core_map, cpu);
+}
+
 static inline struct cpumask *cpu_l2_cache_mask(int cpu)
 {
 	return per_cpu(cpu_l2_cache_map, cpu);
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 6ec72282888dc2331e70f3dbdae6b3c76889f6f6..bd75872a6334a4ce21d6d8ce86df80ecd0bf2884 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -10,6 +10,9 @@
 #include <asm/simple_spinlock.h>
 #endif
 
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()	smp_mb()
+
 #ifndef CONFIG_PARAVIRT_SPINLOCKS
 static inline void pv_spinlocks_init(void) { }
 #endif
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 386d576673a1df56b6e18c73477d73e09ecf0eae..b4ec6c7dd72ee99375b6c7965107062415226975 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -38,7 +38,6 @@
 #ifndef __ASSEMBLY__
 #include <linux/cache.h>
 #include <asm/processor.h>
-#include <asm/page.h>
 #include <asm/accounting.h>
 
 #define SLB_PRELOAD_NR	16U
@@ -152,6 +151,12 @@ void arch_setup_new_exec(void);
 
 #ifndef __ASSEMBLY__
 
+static inline void clear_thread_local_flags(unsigned int flags)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->local_flags &= ~flags;
+}
+
 static inline bool test_thread_local_flags(unsigned int flags)
 {
 	struct thread_info *ti = current_thread_info();
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 3beeb030cd78e9bbcababbf4de178c588fbe0eee..e4db64c0e1842eabf54da6d0860fc06b47cb044c 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -126,7 +126,7 @@ static inline int cpu_to_coregroup_id(int cpu)
 #define topology_physical_package_id(cpu)	(cpu_to_chip_id(cpu))
 
 #define topology_sibling_cpumask(cpu)	(per_cpu(cpu_sibling_map, cpu))
-#define topology_core_cpumask(cpu)	(cpu_cpu_mask(cpu))
+#define topology_core_cpumask(cpu)	(per_cpu(cpu_core_map, cpu))
 #define topology_core_id(cpu)		(cpu_to_core_id(cpu))
 
 #endif
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 78e2a3990eab30c8c7aeac12914f9ea7f3b52308..a09e4240c5b1670ee184d85c93d82d738251ebe5 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -43,129 +43,39 @@ static inline bool __access_ok(unsigned long addr, unsigned long size)
  * exception handling means that it's no longer "just"...)
  *
  */
-#define get_user(x, ptr) \
-	__get_user_check((x), (ptr), sizeof(*(ptr)))
-#define put_user(x, ptr) \
-	__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#define __get_user(x, ptr) \
-	__get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
-#define __put_user(x, ptr) \
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#define __get_user_allowed(x, ptr) \
-	__get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
-
-#define __get_user_inatomic(x, ptr) \
-	__get_user_nosleep((x), (ptr), sizeof(*(ptr)))
-#define __put_user_inatomic(x, ptr) \
-	__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#ifdef CONFIG_PPC64
-
-#define ___get_user_instr(gu_op, dest, ptr)				\
-({									\
-	long __gui_ret = 0;						\
-	unsigned long __gui_ptr = (unsigned long)ptr;			\
-	struct ppc_inst __gui_inst;					\
-	unsigned int __prefix, __suffix;				\
-	__gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr);	\
-	if (__gui_ret == 0) {						\
-		if ((__prefix >> 26) == OP_PREFIX) {			\
-			__gui_ret = gu_op(__suffix,			\
-				(unsigned int __user *)__gui_ptr + 1);	\
-			__gui_inst = ppc_inst_prefix(__prefix,		\
-						     __suffix);		\
-		} else {						\
-			__gui_inst = ppc_inst(__prefix);		\
-		}							\
-		if (__gui_ret == 0)					\
-			(dest) = __gui_inst;				\
-	}								\
-	__gui_ret;							\
-})
-
-#define get_user_instr(x, ptr) \
-	___get_user_instr(get_user, x, ptr)
-
-#define __get_user_instr(x, ptr) \
-	___get_user_instr(__get_user, x, ptr)
-
-#define __get_user_instr_inatomic(x, ptr) \
-	___get_user_instr(__get_user_inatomic, x, ptr)
-
-#else /* !CONFIG_PPC64 */
-#define get_user_instr(x, ptr) \
-	get_user((x).val, (u32 __user *)(ptr))
-
-#define __get_user_instr(x, ptr) \
-	__get_user_nocheck((x).val, (u32 __user *)(ptr), sizeof(u32), true)
-
-#define __get_user_instr_inatomic(x, ptr) \
-	__get_user_nosleep((x).val, (u32 __user *)(ptr), sizeof(u32))
-
-#endif /* CONFIG_PPC64 */
-
-extern long __put_user_bad(void);
-
-#define __put_user_size(x, ptr, size, retval)			\
-do {								\
-	__label__ __pu_failed;					\
-								\
-	retval = 0;						\
-	allow_write_to_user(ptr, size);				\
-	__put_user_size_goto(x, ptr, size, __pu_failed);	\
-	prevent_write_to_user(ptr, size);			\
-	break;							\
-								\
-__pu_failed:							\
-	retval = -EFAULT;					\
-	prevent_write_to_user(ptr, size);			\
-} while (0)
-
-#define __put_user_nocheck(x, ptr, size)			\
+#define __put_user(x, ptr)					\
 ({								\
 	long __pu_err;						\
 	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
-	__typeof__(*(ptr)) __pu_val = (x);			\
-	__typeof__(size) __pu_size = (size);			\
+	__typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x);	\
+	__typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr));	\
 								\
-	if (!is_kernel_addr((unsigned long)__pu_addr))		\
-		might_fault();					\
-	__chk_user_ptr(__pu_addr);				\
-	__put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err);	\
+	might_fault();						\
+	do {							\
+		__label__ __pu_failed;				\
+								\
+		allow_write_to_user(__pu_addr, __pu_size);	\
+		__put_user_size_goto(__pu_val, __pu_addr, __pu_size, __pu_failed);	\
+		prevent_write_to_user(__pu_addr, __pu_size);	\
+		__pu_err = 0;					\
+		break;						\
+								\
+__pu_failed:							\
+		prevent_write_to_user(__pu_addr, __pu_size);	\
+		__pu_err = -EFAULT;				\
+	} while (0);						\
 								\
 	__pu_err;						\
 })
 
-#define __put_user_check(x, ptr, size)					\
+#define put_user(x, ptr)						\
 ({									\
-	long __pu_err = -EFAULT;					\
-	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
-	__typeof__(*(ptr)) __pu_val = (x);				\
-	__typeof__(size) __pu_size = (size);				\
-									\
-	might_fault();							\
-	if (access_ok(__pu_addr, __pu_size))				\
-		__put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \
+	__typeof__(*(ptr)) __user *_pu_addr = (ptr);			\
 									\
-	__pu_err;							\
+	access_ok(_pu_addr, sizeof(*(ptr))) ?				\
+		  __put_user(x, _pu_addr) : -EFAULT;			\
 })
 
-#define __put_user_nosleep(x, ptr, size)			\
-({								\
-	long __pu_err;						\
-	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
-	__typeof__(*(ptr)) __pu_val = (x);			\
-	__typeof__(size) __pu_size = (size);			\
-								\
-	__chk_user_ptr(__pu_addr);				\
-	__put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \
-								\
-	__pu_err;						\
-})
-
-
 /*
  * We don't tell gcc that we are accessing memory, but this is OK
  * because we do not write to any memory gcc knows about, so there
@@ -198,25 +108,17 @@ __pu_failed:							\
 
 #define __put_user_size_goto(x, ptr, size, label)		\
 do {								\
+	__typeof__(*(ptr)) __user *__pus_addr = (ptr);		\
+								\
 	switch (size) {						\
-	case 1: __put_user_asm_goto(x, ptr, label, "stb"); break;	\
-	case 2: __put_user_asm_goto(x, ptr, label, "sth"); break;	\
-	case 4: __put_user_asm_goto(x, ptr, label, "stw"); break;	\
-	case 8: __put_user_asm2_goto(x, ptr, label); break;	\
-	default: __put_user_bad();				\
+	case 1: __put_user_asm_goto(x, __pus_addr, label, "stb"); break;	\
+	case 2: __put_user_asm_goto(x, __pus_addr, label, "sth"); break;	\
+	case 4: __put_user_asm_goto(x, __pus_addr, label, "stw"); break;	\
+	case 8: __put_user_asm2_goto(x, __pus_addr, label); break;		\
+	default: BUILD_BUG();					\
 	}							\
 } while (0)
 
-#define __unsafe_put_user_goto(x, ptr, size, label)		\
-do {								\
-	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
-	__chk_user_ptr(ptr);					\
-	__put_user_size_goto((x), __pu_addr, (size), label);	\
-} while (0)
-
-
-extern long __get_user_bad(void);
-
 /*
  * This does an atomic 128 byte aligned load from userspace.
  * Upto caller to do enable_kernel_vmx() before calling!
@@ -234,6 +136,59 @@ extern long __get_user_bad(void);
 		: "=r" (err)			\
 		: "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
 
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define __get_user_asm_goto(x, addr, label, op)			\
+	asm_volatile_goto(					\
+		"1:	"op"%U1%X1 %0, %1	# get_user\n"	\
+		EX_TABLE(1b, %l2)				\
+		: "=r" (x)					\
+		: "m"UPD_CONSTR (*addr)				\
+		:						\
+		: label)
+
+#ifdef __powerpc64__
+#define __get_user_asm2_goto(x, addr, label)			\
+	__get_user_asm_goto(x, addr, label, "ld")
+#else /* __powerpc64__ */
+#define __get_user_asm2_goto(x, addr, label)			\
+	asm_volatile_goto(					\
+		"1:	lwz%X1 %0, %1\n"			\
+		"2:	lwz%X1 %L0, %L1\n"			\
+		EX_TABLE(1b, %l2)				\
+		EX_TABLE(2b, %l2)				\
+		: "=r" (x)					\
+		: "m" (*addr)					\
+		:						\
+		: label)
+#endif /* __powerpc64__ */
+
+#define __get_user_size_goto(x, ptr, size, label)				\
+do {										\
+	BUILD_BUG_ON(size > sizeof(x));						\
+	switch (size) {								\
+	case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break;	\
+	case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break;	\
+	case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break;	\
+	case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label);  break;	\
+	default: x = 0; BUILD_BUG();						\
+	}									\
+} while (0)
+
+#define __get_user_size_allowed(x, ptr, size, retval)			\
+do {									\
+		__label__ __gus_failed;					\
+									\
+		__get_user_size_goto(x, ptr, size, __gus_failed);	\
+		retval = 0;						\
+		break;							\
+__gus_failed:								\
+		x = 0;							\
+		retval = -EFAULT;					\
+} while (0)
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
 #define __get_user_asm(x, addr, err, op)		\
 	__asm__ __volatile__(				\
 		"1:	"op"%U2%X2 %1, %2	# get_user\n"	\
@@ -271,25 +226,27 @@ extern long __get_user_bad(void);
 #define __get_user_size_allowed(x, ptr, size, retval)		\
 do {								\
 	retval = 0;						\
-	__chk_user_ptr(ptr);					\
-	if (size > sizeof(x))					\
-		(x) = __get_user_bad();				\
+	BUILD_BUG_ON(size > sizeof(x));				\
 	switch (size) {						\
 	case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break;	\
 	case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break;	\
 	case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break;	\
 	case 8: __get_user_asm2(x, (u64 __user *)ptr, retval);  break;	\
-	default: (x) = __get_user_bad();			\
+	default: x = 0; BUILD_BUG();				\
 	}							\
 } while (0)
 
-#define __get_user_size(x, ptr, size, retval)			\
+#define __get_user_size_goto(x, ptr, size, label)		\
 do {								\
-	allow_read_from_user(ptr, size);			\
-	__get_user_size_allowed(x, ptr, size, retval);		\
-	prevent_read_from_user(ptr, size);			\
+	long __gus_retval;					\
+								\
+	__get_user_size_allowed(x, ptr, size, __gus_retval);	\
+	if (__gus_retval)					\
+		goto label;					\
 } while (0)
 
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
 /*
  * This is a type: either unsigned long, if the argument fits into
  * that type, or otherwise unsigned long long.
@@ -297,86 +254,36 @@ do {								\
 #define __long_type(x) \
 	__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 
-#define __get_user_nocheck(x, ptr, size, do_allow)			\
+#define __get_user(x, ptr)					\
 ({								\
 	long __gu_err;						\
 	__long_type(*(ptr)) __gu_val;				\
 	__typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
-	__typeof__(size) __gu_size = (size);			\
+	__typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr));	\
 								\
-	__chk_user_ptr(__gu_addr);				\
-	if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \
-		might_fault();					\
-	if (do_allow)								\
-		__get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err);	\
-	else									\
-		__get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \
+	might_fault();					\
+	allow_read_from_user(__gu_addr, __gu_size);		\
+	__get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err);	\
+	prevent_read_from_user(__gu_addr, __gu_size);		\
 	(x) = (__typeof__(*(ptr)))__gu_val;			\
 								\
 	__gu_err;						\
 })
 
-#define __get_user_check(x, ptr, size)					\
+#define get_user(x, ptr)						\
 ({									\
-	long __gu_err = -EFAULT;					\
-	__long_type(*(ptr)) __gu_val = 0;				\
-	__typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
-	__typeof__(size) __gu_size = (size);				\
-									\
-	might_fault();							\
-	if (access_ok(__gu_addr, __gu_size))				\
-		__get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \
-	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
+	__typeof__(*(ptr)) __user *_gu_addr = (ptr);			\
 									\
-	__gu_err;							\
+	access_ok(_gu_addr, sizeof(*(ptr))) ?				\
+		  __get_user(x, _gu_addr) :				\
+		  ((x) = (__force __typeof__(*(ptr)))0, -EFAULT);	\
 })
 
-#define __get_user_nosleep(x, ptr, size)			\
-({								\
-	long __gu_err;						\
-	__long_type(*(ptr)) __gu_val;				\
-	__typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
-	__typeof__(size) __gu_size = (size);			\
-								\
-	__chk_user_ptr(__gu_addr);				\
-	__get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \
-	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
-								\
-	__gu_err;						\
-})
-
-
 /* more complex routines */
 
 extern unsigned long __copy_tofrom_user(void __user *to,
 		const void __user *from, unsigned long size);
 
-#ifdef CONFIG_ARCH_HAS_COPY_MC
-unsigned long __must_check
-copy_mc_generic(void *to, const void *from, unsigned long size);
-
-static inline unsigned long __must_check
-copy_mc_to_kernel(void *to, const void *from, unsigned long size)
-{
-	return copy_mc_generic(to, from, size);
-}
-#define copy_mc_to_kernel copy_mc_to_kernel
-
-static inline unsigned long __must_check
-copy_mc_to_user(void __user *to, const void *from, unsigned long n)
-{
-	if (likely(check_copy_size(from, n, true))) {
-		if (access_ok(to, n)) {
-			allow_write_to_user(to, n);
-			n = copy_mc_generic((void *)to, from, n);
-			prevent_write_to_user(to, n);
-		}
-	}
-
-	return n;
-}
-#endif
-
 #ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
@@ -414,26 +321,51 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 
 unsigned long __arch_clear_user(void __user *addr, unsigned long size);
 
-static inline unsigned long clear_user(void __user *addr, unsigned long size)
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
 {
-	unsigned long ret = size;
+	unsigned long ret;
+
 	might_fault();
-	if (likely(access_ok(addr, size))) {
-		allow_write_to_user(addr, size);
-		ret = __arch_clear_user(addr, size);
-		prevent_write_to_user(addr, size);
-	}
+	allow_write_to_user(addr, size);
+	ret = __arch_clear_user(addr, size);
+	prevent_write_to_user(addr, size);
 	return ret;
 }
 
-static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+static inline unsigned long clear_user(void __user *addr, unsigned long size)
 {
-	return clear_user(addr, size);
+	return likely(access_ok(addr, size)) ? __clear_user(addr, size) : size;
 }
 
 extern long strncpy_from_user(char *dst, const char __user *src, long count);
 extern __must_check long strnlen_user(const char __user *str, long n);
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_generic(void *to, const void *from, unsigned long size);
+
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned long size)
+{
+	return copy_mc_generic(to, from, size);
+}
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+static inline unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (likely(check_copy_size(from, n, true))) {
+		if (access_ok(to, n)) {
+			allow_write_to_user(to, n);
+			n = copy_mc_generic((void *)to, from, n);
+			prevent_write_to_user(to, n);
+		}
+	}
+
+	return n;
+}
+#endif
+
 extern long __copy_from_user_flushcache(void *dst, const void __user *src,
 		unsigned size);
 extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
@@ -482,10 +414,37 @@ user_write_access_begin(const void __user *ptr, size_t len)
 #define user_write_access_begin	user_write_access_begin
 #define user_write_access_end		prevent_current_write_to_user
 
-#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
-#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
+#define unsafe_get_user(x, p, e) do {					\
+	__long_type(*(p)) __gu_val;				\
+	__typeof__(*(p)) __user *__gu_addr = (p);		\
+								\
+	__get_user_size_goto(__gu_val, __gu_addr, sizeof(*(p)), e); \
+	(x) = (__typeof__(*(p)))__gu_val;			\
+} while (0)
+
 #define unsafe_put_user(x, p, e) \
-	__unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e)
+	__put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e)
+
+#define unsafe_copy_from_user(d, s, l, e) \
+do {											\
+	u8 *_dst = (u8 *)(d);								\
+	const u8 __user *_src = (const u8 __user *)(s);					\
+	size_t _len = (l);								\
+	int _i;										\
+											\
+	for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64))		\
+		unsafe_get_user(*(u64 *)(_dst + _i), (u64 __user *)(_src + _i), e);	\
+	if (_len & 4) {									\
+		unsafe_get_user(*(u32 *)(_dst + _i), (u32 __user *)(_src + _i), e);	\
+		_i += 4;								\
+	}										\
+	if (_len & 2) {									\
+		unsafe_get_user(*(u16 *)(_dst + _i), (u16 __user *)(_src + _i), e);	\
+		_i += 2;								\
+	}										\
+	if (_len & 1)									\
+		unsafe_get_user(*(u8 *)(_dst + _i), (u8 __user *)(_src + _i), e);	\
+} while (0)
 
 #define unsafe_copy_to_user(d, s, l, e) \
 do {									\
@@ -494,9 +453,9 @@ do {									\
 	size_t _len = (l);						\
 	int _i;								\
 									\
-	for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long))		\
-		unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + _i), e); \
-	if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) {			\
+	for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64))	\
+		unsafe_put_user(*(u64 *)(_src + _i), (u64 __user *)(_dst + _i), e); \
+	if (_len & 4) {							\
 		unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \
 		_i += 4;						\
 	}								\
@@ -511,14 +470,8 @@ do {									\
 #define HAVE_GET_KERNEL_NOFAULT
 
 #define __get_kernel_nofault(dst, src, type, err_label)			\
-do {									\
-	int __kr_err;							\
-									\
-	__get_user_size_allowed(*((type *)(dst)), (__force type __user *)(src),\
-			sizeof(type), __kr_err);			\
-	if (unlikely(__kr_err))						\
-		goto err_label;						\
-} while (0)
+	__get_user_size_goto(*((type *)(dst)),				\
+		(__force type __user *)(src), sizeof(type), err_label)
 
 #define __put_kernel_nofault(dst, src, type, err_label)			\
 	__put_user_size_goto(*((type *)(src)),				\
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 700fcdac2e3cc7b423b53499354ba2aaa06ce8a6..b541c690a31c2c25410cdd3b7b305f95813dacd0 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -40,6 +40,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #ifdef CONFIG_PPC32
 #define __ARCH_WANT_OLD_STAT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #endif
 #ifdef CONFIG_PPC64
 #define __ARCH_WANT_SYS_TIME
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
index 77c635c2c90d43f2979006e280ba8e1071ea5b41..1faff0be1111bdaca599a6a9ead90f5d3ffcf130 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
 #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
 
+#include <asm/page.h>
+
 #ifdef __ASSEMBLY__
 
 #include <asm/ppc_asm.h>
@@ -154,6 +156,14 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
 
 const struct vdso_data *__arch_get_vdso_data(void);
 
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+	return (void *)vd + PAGE_SIZE;
+}
+#endif
+
 static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
 {
 	return true;
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index 3f958ecf2beb563e4cfeac0e9ff9e7c851184054..a585c8e538ff0fe1e422c117ca44abb1d6b7aa3b 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -107,9 +107,7 @@ extern struct vdso_arch_data *vdso_data;
 	bcl	20, 31, .+4
 999:
 	mflr	\ptr
-#if CONFIG_PPC_PAGE_SHIFT > 14
 	addis	\ptr, \ptr, (_vdso_datapage - 999b)@ha
-#endif
 	addi	\ptr, \ptr, (_vdso_datapage - 999b)@l
 .endm
 
diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h
index 721c0d6715ac8f7599740355b312d397adc28894..e7479a4abf969eab08c2bb7c7338ae82550b69f8 100644
--- a/arch/powerpc/include/asm/vio.h
+++ b/arch/powerpc/include/asm/vio.h
@@ -114,6 +114,7 @@ struct vio_driver {
 	const struct vio_device_id *id_table;
 	int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
 	void (*remove)(struct vio_dev *dev);
+	void (*shutdown)(struct vio_dev *dev);
 	/* A driver must have a get_desired_dma() function to
 	 * be loaded in a CMO environment if it uses DMA.
 	 */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 9a312b975ca826e2782b2eb3b3b89dcd0412b67d..aa094a8655b0360cd85eec08147d0f2c1664b007 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -102,6 +102,7 @@ void xive_flush_interrupt(void);
 /* xmon hook */
 void xmon_xive_do_dump(int cpu);
 int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d);
+void xmon_xive_get_irq_all(void);
 
 /* APIs used by KVM */
 u32 xive_native_default_eq_shift(void);
diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h
index cc79856896a192a0ef6e4b22f9c433f4c2f11103..4ba87de32be0026a8843e72ac36ae4e723011eee 100644
--- a/arch/powerpc/include/uapi/asm/errno.h
+++ b/arch/powerpc/include/uapi/asm/errno.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_POWERPC_ERRNO_H
 #define _ASM_POWERPC_ERRNO_H
 
+#undef	EDEADLOCK
 #include <asm-generic/errno.h>
 
 #undef	EDEADLOCK
diff --git a/arch/powerpc/include/uapi/asm/posix_types.h b/arch/powerpc/include/uapi/asm/posix_types.h
index f698400e4bb017d4eaa53a56d5958075914612b6..9c03423125446c3edf08dbeea193aec4e396d72a 100644
--- a/arch/powerpc/include/uapi/asm/posix_types.h
+++ b/arch/powerpc/include/uapi/asm/posix_types.h
@@ -12,11 +12,6 @@
 typedef unsigned long	__kernel_old_dev_t;
 #define __kernel_old_dev_t __kernel_old_dev_t
 #else
-typedef unsigned int	__kernel_size_t;
-typedef int		__kernel_ssize_t;
-typedef long		__kernel_ptrdiff_t;
-#define __kernel_size_t __kernel_size_t
-
 typedef short		__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 #endif
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index c7797eb958c73e94eb6e6bb0c1417de797632505..bbb4181621ddcae8a630857b953d8436c398e47d 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -107,7 +107,6 @@ static struct aligninfo spe_aligninfo[32] = {
 static int emulate_spe(struct pt_regs *regs, unsigned int reg,
 		       struct ppc_inst ppc_instr)
 {
-	int ret;
 	union {
 		u64 ll;
 		u32 w[2];
@@ -127,11 +126,6 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
 	nb = spe_aligninfo[instr].len;
 	flags = spe_aligninfo[instr].flags;
 
-	/* Verify the address of the operand */
-	if (unlikely(user_mode(regs) &&
-		     !access_ok(addr, nb)))
-		return -EFAULT;
-
 	/* userland only */
 	if (unlikely(!user_mode(regs)))
 		return 0;
@@ -169,26 +163,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
 		}
 	} else {
 		temp.ll = data.ll = 0;
-		ret = 0;
 		p = addr;
 
+		if (!user_read_access_begin(addr, nb))
+			return -EFAULT;
+
 		switch (nb) {
 		case 8:
-			ret |= __get_user_inatomic(temp.v[0], p++);
-			ret |= __get_user_inatomic(temp.v[1], p++);
-			ret |= __get_user_inatomic(temp.v[2], p++);
-			ret |= __get_user_inatomic(temp.v[3], p++);
+			unsafe_get_user(temp.v[0], p++, Efault_read);
+			unsafe_get_user(temp.v[1], p++, Efault_read);
+			unsafe_get_user(temp.v[2], p++, Efault_read);
+			unsafe_get_user(temp.v[3], p++, Efault_read);
 			fallthrough;
 		case 4:
-			ret |= __get_user_inatomic(temp.v[4], p++);
-			ret |= __get_user_inatomic(temp.v[5], p++);
+			unsafe_get_user(temp.v[4], p++, Efault_read);
+			unsafe_get_user(temp.v[5], p++, Efault_read);
 			fallthrough;
 		case 2:
-			ret |= __get_user_inatomic(temp.v[6], p++);
-			ret |= __get_user_inatomic(temp.v[7], p++);
-			if (unlikely(ret))
-				return -EFAULT;
+			unsafe_get_user(temp.v[6], p++, Efault_read);
+			unsafe_get_user(temp.v[7], p++, Efault_read);
 		}
+		user_read_access_end();
 
 		switch (instr) {
 		case EVLDD:
@@ -255,31 +250,41 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
 
 	/* Store result to memory or update registers */
 	if (flags & ST) {
-		ret = 0;
 		p = addr;
+
+		if (!user_write_access_begin(addr, nb))
+			return -EFAULT;
+
 		switch (nb) {
 		case 8:
-			ret |= __put_user_inatomic(data.v[0], p++);
-			ret |= __put_user_inatomic(data.v[1], p++);
-			ret |= __put_user_inatomic(data.v[2], p++);
-			ret |= __put_user_inatomic(data.v[3], p++);
+			unsafe_put_user(data.v[0], p++, Efault_write);
+			unsafe_put_user(data.v[1], p++, Efault_write);
+			unsafe_put_user(data.v[2], p++, Efault_write);
+			unsafe_put_user(data.v[3], p++, Efault_write);
 			fallthrough;
 		case 4:
-			ret |= __put_user_inatomic(data.v[4], p++);
-			ret |= __put_user_inatomic(data.v[5], p++);
+			unsafe_put_user(data.v[4], p++, Efault_write);
+			unsafe_put_user(data.v[5], p++, Efault_write);
 			fallthrough;
 		case 2:
-			ret |= __put_user_inatomic(data.v[6], p++);
-			ret |= __put_user_inatomic(data.v[7], p++);
+			unsafe_put_user(data.v[6], p++, Efault_write);
+			unsafe_put_user(data.v[7], p++, Efault_write);
 		}
-		if (unlikely(ret))
-			return -EFAULT;
+		user_write_access_end();
 	} else {
 		*evr = data.w[0];
 		regs->gpr[reg] = data.w[1];
 	}
 
 	return 1;
+
+Efault_read:
+	user_read_access_end();
+	return -EFAULT;
+
+Efault_write:
+	user_write_access_end();
+	return -EFAULT;
 }
 #endif /* CONFIG_SPE */
 
@@ -299,13 +304,12 @@ int fix_alignment(struct pt_regs *regs)
 	struct instruction_op op;
 	int r, type;
 
-	/*
-	 * We require a complete register set, if not, then our assembly
-	 * is broken
-	 */
-	CHECK_FULL_REGS(regs);
+	if (is_kernel_addr(regs->nip))
+		r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip);
+	else
+		r = __get_user_instr(instr, (void __user *)regs->nip);
 
-	if (unlikely(__get_user_instr(instr, (void __user *)regs->nip)))
+	if (unlikely(r))
 		return -EFAULT;
 	if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
 		/* We don't handle PPC little-endian any more... */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f3a662201a9fbdaa36aa615c7d58215f21d1fe64..28af4efb458701ea041741d9065b86eb55bcd005 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -91,7 +91,6 @@ int main(void)
 	DEFINE(SIGSEGV, SIGSEGV);
 	DEFINE(NMI_MASK, NMI_MASK);
 #else
-	OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
 #ifdef CONFIG_PPC_RTAS
 	OFFSET(RTAS_SP, thread_struct, rtas_sp);
 #endif
@@ -132,7 +131,6 @@ int main(void)
 	OFFSET(KSP_VSID, thread_struct, ksp_vsid);
 #else /* CONFIG_PPC64 */
 	OFFSET(PGDIR, thread_struct, pgdir);
-#ifdef CONFIG_VMAP_STACK
 	OFFSET(SRR0, thread_struct, srr0);
 	OFFSET(SRR1, thread_struct, srr1);
 	OFFSET(DAR, thread_struct, dar);
@@ -149,7 +147,6 @@ int main(void)
 	OFFSET(THLR, thread_struct, lr);
 	OFFSET(THCTR, thread_struct, ctr);
 #endif
-#endif
 #ifdef CONFIG_SPE
 	OFFSET(THREAD_EVR0, thread_struct, evr[0]);
 	OFFSET(THREAD_ACC, thread_struct, acc);
@@ -285,21 +282,11 @@ int main(void)
 	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
 	OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
 	OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
-	OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime);
-	OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user);
-	OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
-	OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
 #ifdef CONFIG_PPC_BOOK3E
 	OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
 #endif
 	OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
 #else /* CONFIG_PPC64 */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime);
-	OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user);
-	OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.utime);
-	OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.stime);
-#endif
 #endif /* CONFIG_PPC64 */
 
 	/* RTAS */
@@ -323,9 +310,6 @@ int main(void)
 	STACK_PT_REGS_OFFSET(GPR11, gpr[11]);
 	STACK_PT_REGS_OFFSET(GPR12, gpr[12]);
 	STACK_PT_REGS_OFFSET(GPR13, gpr[13]);
-#ifndef CONFIG_PPC64
-	STACK_PT_REGS_OFFSET(GPR14, gpr[14]);
-#endif /* CONFIG_PPC64 */
 	/*
 	 * Note: these symbols include _ because they overlap with special
 	 * register names
@@ -381,7 +365,6 @@ int main(void)
 	DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
 	DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
 	DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
-	DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
 #endif
 #endif
 
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index cd60bc1c8701138f8e2e3682e6f3aaafc88ff62f..f24cd53ff26e2e5ff4806e7fb70e45cc161c5c3b 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -362,14 +362,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 	pa = pte_pfn(*ptep);
 
 	/* On radix we can do hugepage mappings for io, so handle that */
-	if (hugepage_shift) {
-		pa <<= hugepage_shift;
-		pa |= token & ((1ul << hugepage_shift) - 1);
-	} else {
-		pa <<= PAGE_SHIFT;
-		pa |= token & (PAGE_SIZE - 1);
-	}
+	if (!hugepage_shift)
+		hugepage_shift = PAGE_SHIFT;
 
+	pa <<= PAGE_SHIFT;
+	pa |= token & ((1ul << hugepage_shift) - 1);
 	return pa;
 }
 
@@ -779,7 +776,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 	default:
 		eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
@@ -1568,6 +1565,7 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
 }
 EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
 
+#ifdef CONFIG_PROC_FS
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
 	if (!eeh_enabled()) {
@@ -1594,6 +1592,7 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 
 	return 0;
 }
+#endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 78c430b7f9d943316c8c538ee8ab164c3f32d995..9160285cb2f444dd67e05672f295d8f270de073f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -48,195 +48,16 @@
  */
 	.align	12
 
-#ifdef CONFIG_BOOKE
-	.globl	mcheck_transfer_to_handler
-mcheck_transfer_to_handler:
-	mfspr	r0,SPRN_DSRR0
-	stw	r0,_DSRR0(r11)
-	mfspr	r0,SPRN_DSRR1
-	stw	r0,_DSRR1(r11)
-	/* fall through */
-_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler)
-
-	.globl	debug_transfer_to_handler
-debug_transfer_to_handler:
-	mfspr	r0,SPRN_CSRR0
-	stw	r0,_CSRR0(r11)
-	mfspr	r0,SPRN_CSRR1
-	stw	r0,_CSRR1(r11)
-	/* fall through */
-_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler)
-
-	.globl	crit_transfer_to_handler
-crit_transfer_to_handler:
-#ifdef CONFIG_PPC_BOOK3E_MMU
-	mfspr	r0,SPRN_MAS0
-	stw	r0,MAS0(r11)
-	mfspr	r0,SPRN_MAS1
-	stw	r0,MAS1(r11)
-	mfspr	r0,SPRN_MAS2
-	stw	r0,MAS2(r11)
-	mfspr	r0,SPRN_MAS3
-	stw	r0,MAS3(r11)
-	mfspr	r0,SPRN_MAS6
-	stw	r0,MAS6(r11)
-#ifdef CONFIG_PHYS_64BIT
-	mfspr	r0,SPRN_MAS7
-	stw	r0,MAS7(r11)
-#endif /* CONFIG_PHYS_64BIT */
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-#ifdef CONFIG_44x
-	mfspr	r0,SPRN_MMUCR
-	stw	r0,MMUCR(r11)
-#endif
-	mfspr	r0,SPRN_SRR0
-	stw	r0,_SRR0(r11)
-	mfspr	r0,SPRN_SRR1
-	stw	r0,_SRR1(r11)
-
-	/* set the stack limit to the current stack */
-	mfspr	r8,SPRN_SPRG_THREAD
-	lwz	r0,KSP_LIMIT(r8)
-	stw	r0,SAVED_KSP_LIMIT(r11)
-	rlwinm	r0,r1,0,0,(31 - THREAD_SHIFT)
-	stw	r0,KSP_LIMIT(r8)
-	/* fall through */
-_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
-#endif
-
-#ifdef CONFIG_40x
-	.globl	crit_transfer_to_handler
-crit_transfer_to_handler:
-	lwz	r0,crit_r10@l(0)
-	stw	r0,GPR10(r11)
-	lwz	r0,crit_r11@l(0)
-	stw	r0,GPR11(r11)
-	mfspr	r0,SPRN_SRR0
-	stw	r0,crit_srr0@l(0)
-	mfspr	r0,SPRN_SRR1
-	stw	r0,crit_srr1@l(0)
-
-	/* set the stack limit to the current stack */
-	mfspr	r8,SPRN_SPRG_THREAD
-	lwz	r0,KSP_LIMIT(r8)
-	stw	r0,saved_ksp_limit@l(0)
-	rlwinm	r0,r1,0,0,(31 - THREAD_SHIFT)
-	stw	r0,KSP_LIMIT(r8)
-	/* fall through */
-_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
-#endif
-
-/*
- * This code finishes saving the registers to the exception frame
- * and jumps to the appropriate handler for the exception, turning
- * on address translation.
- * Note that we rely on the caller having set cr0.eq iff the exception
- * occurred in kernel mode (i.e. MSR:PR = 0).
- */
-	.globl	transfer_to_handler_full
-transfer_to_handler_full:
-	SAVE_NVGPRS(r11)
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full)
-	/* fall through */
-
-	.globl	transfer_to_handler
-transfer_to_handler:
-	stw	r2,GPR2(r11)
-	stw	r12,_NIP(r11)
-	stw	r9,_MSR(r11)
-	andi.	r2,r9,MSR_PR
-	mfctr	r12
-	mfspr	r2,SPRN_XER
-	stw	r12,_CTR(r11)
-	stw	r2,_XER(r11)
-	mfspr	r12,SPRN_SPRG_THREAD
-	tovirt_vmstack r12, r12
-	beq	2f			/* if from user, fix up THREAD.regs */
-	addi	r2, r12, -THREAD
-	addi	r11,r1,STACK_FRAME_OVERHEAD
-	stw	r11,PT_REGS(r12)
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-	/* Check to see if the dbcr0 register is set up to debug.  Use the
-	   internal debug mode bit to do this. */
-	lwz	r12,THREAD_DBCR0(r12)
-	andis.	r12,r12,DBCR0_IDM@h
-#endif
-	ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_lock r11, r12
-#endif
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-	beq+	3f
-	/* From user and task is ptraced - load up global dbcr0 */
-	li	r12,-1			/* clear all pending debug events */
-	mtspr	SPRN_DBSR,r12
-	lis	r11,global_dbcr0@ha
-	tophys(r11,r11)
-	addi	r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-	lwz	r9,TASK_CPU(r2)
-	slwi	r9,r9,2
-	add	r11,r11,r9
-#endif
-	lwz	r12,0(r11)
-	mtspr	SPRN_DBCR0,r12
-#endif
-
-	b	3f
-
-2:	/* if from kernel, check interrupted DOZE/NAP mode and
-         * check for stack overflow
-         */
-	kuap_save_and_lock r11, r12, r9, r2, r6
-	addi	r2, r12, -THREAD
-#ifndef CONFIG_VMAP_STACK
-	lwz	r9,KSP_LIMIT(r12)
-	cmplw	r1,r9			/* if r1 <= ksp_limit */
-	ble-	stack_ovf		/* then the kernel stack overflowed */
-#endif
-5:
 #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
+	.globl	prepare_transfer_to_handler
+prepare_transfer_to_handler:
+	/* if from kernel, check interrupted DOZE/NAP mode */
 	lwz	r12,TI_LOCAL_FLAGS(r2)
 	mtcrf	0x01,r12
 	bt-	31-TLF_NAPPING,4f
 	bt-	31-TLF_SLEEPING,7f
-#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
-	.globl transfer_to_handler_cont
-transfer_to_handler_cont:
-3:
-	mflr	r9
-	tovirt_novmstack r2, r2 	/* set r2 to current */
-	tovirt_vmstack r9, r9
-	lwz	r11,0(r9)		/* virtual address of handler */
-	lwz	r9,4(r9)		/* where to go when done */
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-	mtspr	SPRN_NRI, r0
-#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/*
-	 * When tracing IRQ state (lockdep) we enable the MMU before we call
-	 * the IRQ tracing functions as they might access vmalloc space or
-	 * perform IOs for console output.
-	 *
-	 * To speed up the syscall path where interrupts stay on, let's check
-	 * first if we are changing the MSR value at all.
-	 */
-	tophys_novmstack r12, r1
-	lwz	r12,_MSR(r12)
-	andi.	r12,r12,MSR_EE
-	bne	1f
-
-	/* MSR isn't changing, just transition directly */
-#endif
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r10
-	mtlr	r9
-	rfi				/* jump to handler, enable MMU */
-#ifdef CONFIG_40x
-	b .	/* Prevent prefetch past rfi */
-#endif
+	blr
 
-#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
 4:	rlwinm	r12,r12,0,~_TLF_NAPPING
 	stw	r12,TI_LOCAL_FLAGS(r2)
 	b	power_save_ppc32_restore
@@ -246,97 +67,18 @@ transfer_to_handler_cont:
 	lwz	r9,_MSR(r11)		/* if sleeping, clear MSR.EE */
 	rlwinm	r9,r9,0,~MSR_EE
 	lwz	r12,_LINK(r11)		/* and return to address in LR */
-	kuap_restore r11, r2, r3, r4, r5
 	lwz	r2, GPR2(r11)
 	b	fast_exception_return
-#endif
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler)
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-1:	/* MSR is changing, re-enable MMU so we can notify lockdep. We need to
-	 * keep interrupts disabled at this point otherwise we might risk
-	 * taking an interrupt before we tell lockdep they are enabled.
-	 */
-	lis	r12,reenable_mmu@h
-	ori	r12,r12,reenable_mmu@l
-	LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
-	mtspr	SPRN_SRR0,r12
-	mtspr	SPRN_SRR1,r0
-	rfi
-#ifdef CONFIG_40x
-	b .	/* Prevent prefetch past rfi */
-#endif
-
-reenable_mmu:
-	/*
-	 * We save a bunch of GPRs,
-	 * r3 can be different from GPR3(r1) at this point, r9 and r11
-	 * contains the old MSR and handler address respectively,
-	 * r0, r4-r8, r12, CCR, CTR, XER etc... are left
-	 * clobbered as they aren't useful past this point.
-	 */
-
-	stwu	r1,-32(r1)
-	stw	r9,8(r1)
-	stw	r11,12(r1)
-	stw	r3,16(r1)
-
-	/* If we are disabling interrupts (normal case), simply log it with
-	 * lockdep
-	 */
-1:	bl	trace_hardirqs_off
-	lwz	r3,16(r1)
-	lwz	r11,12(r1)
-	lwz	r9,8(r1)
-	addi	r1,r1,32
-	mtctr	r11
-	mtlr	r9
-	bctr				/* jump to handler */
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-#ifndef CONFIG_VMAP_STACK
-/*
- * On kernel stack overflow, load up an initial stack pointer
- * and call StackOverflow(regs), which should not return.
- */
-stack_ovf:
-	/* sometimes we use a statically-allocated stack, which is OK. */
-	lis	r12,_end@h
-	ori	r12,r12,_end@l
-	cmplw	r1,r12
-	ble	5b			/* r1 <= &_end is OK */
-	SAVE_NVGPRS(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	lis	r1,init_thread_union@ha
-	addi	r1,r1,init_thread_union@l
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	lis	r9,StackOverflow@ha
-	addi	r9,r9,StackOverflow@l
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-	mtspr	SPRN_NRI, r0
-#endif
-	mtspr	SPRN_SRR0,r9
-	mtspr	SPRN_SRR1,r10
-	rfi
-#ifdef CONFIG_40x
-	b .	/* Prevent prefetch past rfi */
-#endif
-_ASM_NOKPROBE_SYMBOL(stack_ovf)
-#endif
+_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
 
 	.globl	transfer_to_syscall
 transfer_to_syscall:
 	SAVE_NVGPRS(r1)
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_lock r11, r12
-#endif
 
 	/* Calling convention has r9 = orig r0, r10 = regs */
 	addi	r10,r1,STACK_FRAME_OVERHEAD
 	mr	r9,r0
-	stw	r10,THREAD+PT_REGS(r2)
 	bl	system_call_exception
 
 ret_from_syscall:
@@ -349,10 +91,6 @@ ret_from_syscall:
 	cmplwi	cr0,r5,0
 	bne-	2f
 #endif /* CONFIG_PPC_47x */
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_unlock r5, r7
-#endif
-	kuap_check r2, r4
 	lwz	r4,_LINK(r1)
 	lwz	r5,_CCR(r1)
 	mtlr	r4
@@ -411,27 +149,6 @@ ret_from_kernel_thread:
 	li	r3,0
 	b	ret_from_syscall
 
-/*
- * Top-level page fault handling.
- * This is in assembler because if do_page_fault tells us that
- * it is a bad kernel page fault, we want to save the non-volatile
- * registers before calling bad_page_fault.
- */
-	.globl	handle_page_fault
-handle_page_fault:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_page_fault
-	cmpwi	r3,0
-	beq+	ret_from_except
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	clrrwi	r0,r0,1
-	stw	r0,_TRAP(r1)
-	mr	r4,r3		/* err arg for bad_page_fault */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	__bad_page_fault
-	b	ret_from_except_full
-
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
@@ -485,7 +202,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
 	stw	r10,_CCR(r1)
 	stw	r1,KSP(r3)	/* Set old stack pointer */
 
-	kuap_check r2, r0
 #ifdef CONFIG_SMP
 	/* We need a sync somewhere here to make sure that if the
 	 * previous task gets rescheduled on another CPU, it sees all
@@ -529,12 +245,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
 fast_exception_return:
 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 	andi.	r10,r9,MSR_RI		/* check for recoverable interrupt */
-	beq	1f			/* if not, we've got problems */
+	beq	3f			/* if not, we've got problems */
 #endif
 
 2:	REST_4GPRS(3, r11)
 	lwz	r10,_CCR(r11)
-	REST_GPR(1, r11)
+	REST_2GPRS(1, r11)
 	mtcr	r10
 	lwz	r10,_LINK(r11)
 	mtlr	r10
@@ -556,257 +272,147 @@ fast_exception_return:
 #endif
 _ASM_NOKPROBE_SYMBOL(fast_exception_return)
 
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-/* check if the exception happened in a restartable section */
-1:	lis	r3,exc_exit_restart_end@ha
-	addi	r3,r3,exc_exit_restart_end@l
-	cmplw	r12,r3
-	bge	3f
-	lis	r4,exc_exit_restart@ha
-	addi	r4,r4,exc_exit_restart@l
-	cmplw	r12,r4
-	blt	3f
-	lis	r3,fee_restarts@ha
-	tophys(r3,r3)
-	lwz	r5,fee_restarts@l(r3)
-	addi	r5,r5,1
-	stw	r5,fee_restarts@l(r3)
-	mr	r12,r4		/* restart at exc_exit_restart */
-	b	2b
-
-	.section .bss
-	.align	2
-fee_restarts:
-	.space	4
-	.previous
-
 /* aargh, a nonrecoverable interrupt, panic */
 /* aargh, we don't know which trap this is */
 3:
 	li	r10,-1
 	stw	r10,_TRAP(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	lis	r10,MSR_KERNEL@h
-	ori	r10,r10,MSR_KERNEL@l
-	bl	transfer_to_handler_full
-	.long	unrecoverable_exception
-	.long	ret_from_except
-#endif
-
-	.globl	ret_from_except_full
-ret_from_except_full:
-	REST_NVGPRS(r1)
-	/* fall through */
-
-	.globl	ret_from_except
-ret_from_except:
-	/* Hard-disable interrupts so that current_thread_info()->flags
-	 * can't change between when we test it and when we return
-	 * from the interrupt. */
-	/* Note: We don't bother telling lockdep about it */
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-	mtmsr	r10		/* disable interrupts */
-
-	lwz	r3,_MSR(r1)	/* Returning to user mode? */
-	andi.	r0,r3,MSR_PR
-	beq	resume_kernel
-
-user_exc_return:		/* r10 contains MSR_KERNEL here */
-	/* Check current_thread_info()->flags */
-	lwz	r9,TI_FLAGS(r2)
-	andi.	r0,r9,_TIF_USER_WORK_MASK
-	bne	do_work
+	prepare_transfer_to_handler
+	bl	unrecoverable_exception
+	trap	/* should not get here */
 
-restore_user:
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-	/* Check whether this process has its own DBCR0 value.  The internal
-	   debug mode bit tells us that dbcr0 should be loaded. */
-	lwz	r0,THREAD+THREAD_DBCR0(r2)
-	andis.	r10,r0,DBCR0_IDM@h
-	bnel-	load_dbcr0
-#endif
-	ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_unlock	r10, r11
-#endif
+	.globl interrupt_return
+interrupt_return:
+	lwz	r4,_MSR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	andi.	r0,r4,MSR_PR
+	beq	.Lkernel_interrupt_return
+	bl	interrupt_exit_user_prepare
+	cmpwi	r3,0
+	bne-	.Lrestore_nvgprs
 
-	b	restore
+.Lfast_user_interrupt_return:
+	lwz	r11,_NIP(r1)
+	lwz	r12,_MSR(r1)
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
 
-/* N.B. the only way to get here is from the beq following ret_from_except. */
-resume_kernel:
-	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
-	lwz	r8,TI_FLAGS(r2)
-	andis.	r0,r8,_TIF_EMULATE_STACK_STORE@h
-	beq+	1f
+BEGIN_FTR_SECTION
+	stwcx.	r0,0,r1		/* to clear the reservation */
+FTR_SECTION_ELSE
+	lwarx	r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
-	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
+	lwz	r3,_CCR(r1)
+	lwz	r4,_LINK(r1)
+	lwz	r5,_CTR(r1)
+	lwz	r6,_XER(r1)
+	li	r0,0
 
-	lwz	r3,GPR1(r1)
-	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
-	mr	r4,r1			/* src:  current exception frame */
-	mr	r1,r3			/* Reroute the trampoline frame to r1 */
+	/*
+	 * Leaving a stale exception_marker on the stack can confuse
+	 * the reliable stack unwinder later on. Clear it.
+	 */
+	stw	r0,8(r1)
+	REST_4GPRS(7, r1)
+	REST_2GPRS(11, r1)
 
-	/* Copy from the original to the trampoline. */
-	li	r5,INT_FRAME_SIZE/4	/* size: INT_FRAME_SIZE */
-	li	r6,0			/* start offset: 0 */
+	mtcr	r3
+	mtlr	r4
 	mtctr	r5
-2:	lwzx	r0,r6,r4
-	stwx	r0,r6,r3
-	addi	r6,r6,4
-	bdnz	2b
-
-	/* Do real store operation to complete stwu */
-	lwz	r5,GPR1(r1)
-	stw	r8,0(r5)
+	mtspr	SPRN_XER,r6
 
-	/* Clear _TIF_EMULATE_STACK_STORE flag */
-	lis	r11,_TIF_EMULATE_STACK_STORE@h
-	addi	r5,r2,TI_FLAGS
-0:	lwarx	r8,0,r5
-	andc	r8,r8,r11
-	stwcx.	r8,0,r5
-	bne-	0b
-1:
-
-#ifdef CONFIG_PREEMPTION
-	/* check current_thread_info->preempt_count */
-	lwz	r0,TI_PREEMPT(r2)
-	cmpwi	0,r0,0		/* if non-zero, just restore regs and return */
-	bne	restore_kuap
-	andi.	r8,r8,_TIF_NEED_RESCHED
-	beq+	restore_kuap
-	lwz	r3,_MSR(r1)
-	andi.	r0,r3,MSR_EE	/* interrupts off? */
-	beq	restore_kuap	/* don't schedule if so */
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* Lockdep thinks irqs are enabled, we need to call
-	 * preempt_schedule_irq with IRQs off, so we inform lockdep
-	 * now that we -did- turn them off already
-	 */
-	bl	trace_hardirqs_off
-#endif
-	bl	preempt_schedule_irq
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* And now, to properly rebalance the above, we tell lockdep they
-	 * are being turned back on, which will happen when we return
-	 */
-	bl	trace_hardirqs_on
+	REST_4GPRS(2, r1)
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	rfi
+#ifdef CONFIG_40x
+	b .	/* Prevent prefetch past rfi */
 #endif
-#endif /* CONFIG_PREEMPTION */
-restore_kuap:
-	kuap_restore r1, r2, r9, r10, r0
-
-	/* interrupts are hard-disabled at this point */
-restore:
-#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x)
-	lis	r4,icache_44x_need_flush@ha
-	lwz	r5,icache_44x_need_flush@l(r4)
-	cmplwi	cr0,r5,0
-	beq+	1f
-	li	r6,0
-	iccci	r0,r0
-	stw	r6,icache_44x_need_flush@l(r4)
-1:
-#endif  /* CONFIG_44x */
 
-	lwz	r9,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* Lockdep doesn't know about the fact that IRQs are temporarily turned
-	 * off in this assembly code while peeking at TI_FLAGS() and such. However
-	 * we need to inform it if the exception turned interrupts off, and we
-	 * are about to trun them back on.
-	 */
-	andi.	r10,r9,MSR_EE
-	beq	1f
-	stwu	r1,-32(r1)
-	mflr	r0
-	stw	r0,4(r1)
-	bl	trace_hardirqs_on
-	addi	r1, r1, 32
-	lwz	r9,_MSR(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
+.Lrestore_nvgprs:
+	REST_NVGPRS(r1)
+	b	.Lfast_user_interrupt_return
 
-	lwz	r0,GPR0(r1)
-	lwz	r2,GPR2(r1)
-	REST_4GPRS(3, r1)
-	REST_2GPRS(7, r1)
+.Lkernel_interrupt_return:
+	bl	interrupt_exit_kernel_prepare
 
-	lwz	r10,_XER(r1)
-	lwz	r11,_CTR(r1)
-	mtspr	SPRN_XER,r10
-	mtctr	r11
+.Lfast_kernel_interrupt_return:
+	cmpwi	cr1,r3,0
+	lwz	r11,_NIP(r1)
+	lwz	r12,_MSR(r1)
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
 
 BEGIN_FTR_SECTION
-	lwarx	r11,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
-	stwcx.	r0,0,r1			/* to clear the reservation */
+	stwcx.	r0,0,r1		/* to clear the reservation */
+FTR_SECTION_ELSE
+	lwarx	r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-	andi.	r10,r9,MSR_RI		/* check if this exception occurred */
-	beql	nonrecoverable		/* at a bad place (MSR:RI = 0) */
+	lwz	r3,_LINK(r1)
+	lwz	r4,_CTR(r1)
+	lwz	r5,_XER(r1)
+	lwz	r6,_CCR(r1)
+	li	r0,0
+
+	REST_4GPRS(7, r1)
+	REST_2GPRS(11, r1)
 
-	lwz	r10,_CCR(r1)
-	lwz	r11,_LINK(r1)
-	mtcrf	0xFF,r10
-	mtlr	r11
+	mtlr	r3
+	mtctr	r4
+	mtspr	SPRN_XER,r5
 
-	/* Clear the exception_marker on the stack to avoid confusing stacktrace */
-	li	r10, 0
-	stw	r10, 8(r1)
 	/*
-	 * Once we put values in SRR0 and SRR1, we are in a state
-	 * where exceptions are not recoverable, since taking an
-	 * exception will trash SRR0 and SRR1.  Therefore we clear the
-	 * MSR:RI bit to indicate this.  If we do take an exception,
-	 * we can't return to the point of the exception but we
-	 * can restart the exception exit path at the label
-	 * exc_exit_restart below.  -- paulus
+	 * Leaving a stale exception_marker on the stack can confuse
+	 * the reliable stack unwinder later on. Clear it.
 	 */
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
-	mtmsr	r10		/* clear the RI bit */
-	.globl exc_exit_restart
-exc_exit_restart:
-	lwz	r12,_NIP(r1)
-	mtspr	SPRN_SRR0,r12
-	mtspr	SPRN_SRR1,r9
-	REST_4GPRS(9, r1)
-	lwz	r1,GPR1(r1)
-	.globl exc_exit_restart_end
-exc_exit_restart_end:
+	stw	r0,8(r1)
+
+	REST_4GPRS(2, r1)
+
+	bne-	cr1,1f /* emulate stack store */
+	mtcr	r6
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
 	rfi
-_ASM_NOKPROBE_SYMBOL(exc_exit_restart)
-_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end)
+#ifdef CONFIG_40x
+	b .	/* Prevent prefetch past rfi */
+#endif
 
-#else /* !(CONFIG_4xx || CONFIG_BOOKE) */
-	/*
-	 * This is a bit different on 4xx/Book-E because it doesn't have
-	 * the RI bit in the MSR.
-	 * The TLB miss handler checks if we have interrupted
-	 * the exception exit path and restarts it if so
-	 * (well maybe one day it will... :).
+1:	/*
+	 * Emulate stack store with update. New r1 value was already calculated
+	 * and updated in our interrupt regs by emulate_loadstore, but we can't
+	 * store the previous value of r1 to the stack before re-loading our
+	 * registers from it, otherwise they could be clobbered.  Use
+	 * SPRG Scratch0 as temporary storage to hold the store
+	 * data, as interrupts are disabled here so it won't be clobbered.
 	 */
-	lwz	r11,_LINK(r1)
-	mtlr	r11
-	lwz	r10,_CCR(r1)
-	mtcrf	0xff,r10
-	/* Clear the exception_marker on the stack to avoid confusing stacktrace */
-	li	r10, 0
-	stw	r10, 8(r1)
-	REST_2GPRS(9, r1)
-	.globl exc_exit_restart
-exc_exit_restart:
-	lwz	r11,_NIP(r1)
-	lwz	r12,_MSR(r1)
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r12
-	REST_2GPRS(11, r1)
-	lwz	r1,GPR1(r1)
-	.globl exc_exit_restart_end
-exc_exit_restart_end:
+	mtcr	r6
+#ifdef CONFIG_BOOKE
+	mtspr	SPRN_SPRG_WSCRATCH0, r9
+#else
+	mtspr	SPRN_SPRG_SCRATCH0, r9
+#endif
+	addi	r9,r1,INT_FRAME_SIZE /* get original r1 */
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	stw	r9,0(r1) /* perform store component of stwu */
+#ifdef CONFIG_BOOKE
+	mfspr	r9, SPRN_SPRG_RSCRATCH0
+#else
+	mfspr	r9, SPRN_SPRG_SCRATCH0
+#endif
 	rfi
-	b	.			/* prevent prefetch past rfi */
-_ASM_NOKPROBE_SYMBOL(exc_exit_restart)
+#ifdef CONFIG_40x
+	b .	/* Prevent prefetch past rfi */
+#endif
+_ASM_NOKPROBE_SYMBOL(interrupt_return)
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
 
 /*
  * Returning from a critical interrupt in user mode doesn't need
@@ -837,8 +443,7 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart)
 	REST_NVGPRS(r1);						\
 	lwz	r3,_MSR(r1);						\
 	andi.	r3,r3,MSR_PR;						\
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL);				\
-	bne	user_exc_return;					\
+	bne	interrupt_return;					\
 	lwz	r0,GPR0(r1);						\
 	lwz	r2,GPR2(r1);						\
 	REST_4GPRS(3, r1);						\
@@ -906,11 +511,6 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart)
 #ifdef CONFIG_40x
 	.globl	ret_from_crit_exc
 ret_from_crit_exc:
-	mfspr	r9,SPRN_SPRG_THREAD
-	lis	r10,saved_ksp_limit@ha;
-	lwz	r10,saved_ksp_limit@l(r10);
-	tovirt(r9,r9);
-	stw	r10,KSP_LIMIT(r9)
 	lis	r9,crit_srr0@ha;
 	lwz	r9,crit_srr0@l(r9);
 	lis	r10,crit_srr1@ha;
@@ -924,9 +524,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
 #ifdef CONFIG_BOOKE
 	.globl	ret_from_crit_exc
 ret_from_crit_exc:
-	mfspr	r9,SPRN_SPRG_THREAD
-	lwz	r10,SAVED_KSP_LIMIT(r1)
-	stw	r10,KSP_LIMIT(r9)
 	RESTORE_xSRR(SRR0,SRR1);
 	RESTORE_MMU_REGS;
 	RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
@@ -934,9 +531,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
 
 	.globl	ret_from_debug_exc
 ret_from_debug_exc:
-	mfspr	r9,SPRN_SPRG_THREAD
-	lwz	r10,SAVED_KSP_LIMIT(r1)
-	stw	r10,KSP_LIMIT(r9)
 	RESTORE_xSRR(SRR0,SRR1);
 	RESTORE_xSRR(CSRR0,CSRR1);
 	RESTORE_MMU_REGS;
@@ -945,9 +539,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_debug_exc)
 
 	.globl	ret_from_mcheck_exc
 ret_from_mcheck_exc:
-	mfspr	r9,SPRN_SPRG_THREAD
-	lwz	r10,SAVED_KSP_LIMIT(r1)
-	stw	r10,KSP_LIMIT(r9)
 	RESTORE_xSRR(SRR0,SRR1);
 	RESTORE_xSRR(CSRR0,CSRR1);
 	RESTORE_xSRR(DSRR0,DSRR1);
@@ -955,121 +546,8 @@ ret_from_mcheck_exc:
 	RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
 _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc)
 #endif /* CONFIG_BOOKE */
-
-/*
- * Load the DBCR0 value for a task that is being ptraced,
- * having first saved away the global DBCR0.  Note that r0
- * has the dbcr0 value to set upon entry to this.
- */
-load_dbcr0:
-	mfmsr	r10		/* first disable debug exceptions */
-	rlwinm	r10,r10,0,~MSR_DE
-	mtmsr	r10
-	isync
-	mfspr	r10,SPRN_DBCR0
-	lis	r11,global_dbcr0@ha
-	addi	r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-	lwz	r9,TASK_CPU(r2)
-	slwi	r9,r9,2
-	add	r11,r11,r9
-#endif
-	stw	r10,0(r11)
-	mtspr	SPRN_DBCR0,r0
-	li	r11,-1
-	mtspr	SPRN_DBSR,r11	/* clear all pending debug events */
-	blr
-
-	.section .bss
-	.align	4
-	.global global_dbcr0
-global_dbcr0:
-	.space	4*NR_CPUS
-	.previous
 #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
 
-do_work:			/* r10 contains MSR_KERNEL here */
-	andi.	r0,r9,_TIF_NEED_RESCHED
-	beq	do_user_signal
-
-do_resched:			/* r10 contains MSR_KERNEL here */
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_on
-	mfmsr	r10
-#endif
-	ori	r10,r10,MSR_EE
-	mtmsr	r10		/* hard-enable interrupts */
-	bl	schedule
-recheck:
-	/* Note: And we don't tell it we are disabling them again
-	 * neither. Those disable/enable cycles used to peek at
-	 * TI_FLAGS aren't advertised.
-	 */
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-	mtmsr	r10		/* disable interrupts */
-	lwz	r9,TI_FLAGS(r2)
-	andi.	r0,r9,_TIF_NEED_RESCHED
-	bne-	do_resched
-	andi.	r0,r9,_TIF_USER_WORK_MASK
-	beq	restore_user
-do_user_signal:			/* r10 contains MSR_KERNEL here */
-	ori	r10,r10,MSR_EE
-	mtmsr	r10		/* hard-enable interrupts */
-	/* save r13-r31 in the exception frame, if not already done */
-	lwz	r3,_TRAP(r1)
-	andi.	r0,r3,1
-	beq	2f
-	SAVE_NVGPRS(r1)
-	rlwinm	r3,r3,0,0,30
-	stw	r3,_TRAP(r1)
-2:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	mr	r4,r9
-	bl	do_notify_resume
-	REST_NVGPRS(r1)
-	b	recheck
-
-/*
- * We come here when we are at the end of handling an exception
- * that occurred at a place where taking an exception will lose
- * state information, such as the contents of SRR0 and SRR1.
- */
-nonrecoverable:
-	lis	r10,exc_exit_restart_end@ha
-	addi	r10,r10,exc_exit_restart_end@l
-	cmplw	r12,r10
-	bge	3f
-	lis	r11,exc_exit_restart@ha
-	addi	r11,r11,exc_exit_restart@l
-	cmplw	r12,r11
-	blt	3f
-	lis	r10,ee_restarts@ha
-	lwz	r12,ee_restarts@l(r10)
-	addi	r12,r12,1
-	stw	r12,ee_restarts@l(r10)
-	mr	r12,r11		/* restart at exc_exit_restart */
-	blr
-3:	/* OK, we can't recover, kill this process */
-	lwz	r3,_TRAP(r1)
-	andi.	r0,r3,1
-	beq	5f
-	SAVE_NVGPRS(r1)
-	rlwinm	r3,r3,0,0,30
-	stw	r3,_TRAP(r1)
-5:	mfspr	r2,SPRN_SPRG_THREAD
-	addi	r2,r2,-THREAD
-	tovirt(r2,r2)			/* set back r2 to current */
-4:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unrecoverable_exception
-	/* shouldn't return */
-	b	4b
-_ASM_NOKPROBE_SYMBOL(nonrecoverable)
-
-	.section .bss
-	.align	2
-ee_restarts:
-	.space	4
-	.previous
-
 /*
  * PROM code for specific machines follows.  Put it
  * here so it's easy to add arch-specific sections later.
@@ -1088,7 +566,6 @@ _GLOBAL(enter_rtas)
 	lis	r6,1f@ha	/* physical return address for rtas */
 	addi	r6,r6,1f@l
 	tophys(r6,r6)
-	tophys_novmstack r7, r1
 	lwz	r8,RTASENTRY(r4)
 	lwz	r4,RTASBASE(r4)
 	mfmsr	r9
@@ -1097,24 +574,25 @@ _GLOBAL(enter_rtas)
 	mtmsr	r0	/* disable interrupts so SRR0/1 don't get trashed */
 	li	r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
 	mtlr	r6
-	stw	r7, THREAD + RTAS_SP(r2)
+	stw	r1, THREAD + RTAS_SP(r2)
 	mtspr	SPRN_SRR0,r8
 	mtspr	SPRN_SRR1,r9
 	rfi
-1:	tophys_novmstack r9, r1
-#ifdef CONFIG_VMAP_STACK
-	li	r0, MSR_KERNEL & ~MSR_IR	/* can take DTLB miss */
-	mtmsr	r0
-	isync
-#endif
-	lwz	r8,INT_FRAME_SIZE+4(r9)	/* get return address */
-	lwz	r9,8(r9)	/* original msr value */
-	addi	r1,r1,INT_FRAME_SIZE
-	li	r0,0
-	tophys_novmstack r7, r2
-	stw	r0, THREAD + RTAS_SP(r7)
+1:
+	lis	r8, 1f@h
+	ori	r8, r8, 1f@l
+	LOAD_REG_IMMEDIATE(r9,MSR_KERNEL)
 	mtspr	SPRN_SRR0,r8
 	mtspr	SPRN_SRR1,r9
-	rfi			/* return to caller */
+	rfi			/* Reactivate MMU translation */
+1:
+	lwz	r8,INT_FRAME_SIZE+4(r1)	/* get return address */
+	lwz	r9,8(r1)	/* original msr value */
+	addi	r1,r1,INT_FRAME_SIZE
+	li	r0,0
+	stw	r0, THREAD + RTAS_SP(r2)
+	mtlr	r8
+	mtmsr	r9
+	blr			/* return to caller */
 _ASM_NOKPROBE_SYMBOL(enter_rtas)
 #endif /* CONFIG_PPC_RTAS */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6c4d9e276c4d5b8835a150e3b8d775eb384ecfc3..03727308d8cc4596429274d7143ac65bb1bc5e55 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -117,13 +117,12 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	/*
-	 * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
-	 * would clobber syscall parameters. Also we always enter with IRQs
-	 * enabled and nothing pending. system_call_exception() will call
-	 * trace_hardirqs_off().
-	 *
-	 * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The
-	 * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED.
+	 * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
+	 * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
+	 * and interrupts may be masked and pending already.
+	 * system_call_exception() will call trace_hardirqs_off() which means
+	 * interrupts could already have been blocked before trace_hardirqs_off,
+	 * but this is the best we can do.
 	 */
 
 	/* Calling convention has r9 = orig r0, r10 = regs */
@@ -288,9 +287,8 @@ END_BTB_FLUSH_SECTION
 	std	r11,-16(r10)		/* "regshere" marker */
 
 	/*
-	 * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
-	 * would clobber syscall parameters. Also we always enter with IRQs
-	 * enabled and nothing pending. system_call_exception() will call
+	 * We always enter kernel from userspace with irq soft-mask enabled and
+	 * nothing pending. system_call_exception() will call
 	 * trace_hardirqs_off().
 	 */
 	li	r11,IRQS_ALL_DISABLED
@@ -417,19 +415,6 @@ _GLOBAL(ret_from_kernel_thread)
 	li	r3,0
 	b	.Lsyscall_exit
 
-#ifdef CONFIG_PPC_BOOK3E
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
-	ld	r11,_TRAP(r1)
-	andi.	r0,r11,1
-	beqlr-
-	SAVE_NVGPRS(r1)
-	clrrdi	r0,r11,1
-	std	r0,_TRAP(r1)
-	blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-#endif
-
 #ifdef CONFIG_PPC_BOOK3S_64
 
 #define FLUSH_COUNT_CACHE	\
@@ -645,7 +630,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	addi	r1,r1,SWITCH_FRAME_SIZE
 	blr
 
-#ifdef CONFIG_PPC_BOOK3S
 	/*
 	 * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
 	 * touched, no exit work created, then this can be used.
@@ -657,6 +641,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
 	kuap_check_amr r3, r4
 	ld	r5,_MSR(r1)
 	andi.	r0,r5,MSR_PR
+#ifdef CONFIG_PPC_BOOK3S
 	bne	.Lfast_user_interrupt_return_amr
 	kuap_kernel_restore r3, r4
 	andi.	r0,r5,MSR_RI
@@ -665,6 +650,10 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	unrecoverable_exception
 	b	. /* should not get here */
+#else
+	bne	.Lfast_user_interrupt_return
+	b	.Lfast_kernel_interrupt_return
+#endif
 
 	.balign IFETCH_ALIGN_BYTES
 	.globl interrupt_return
@@ -678,8 +667,10 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
 	cmpdi	r3,0
 	bne-	.Lrestore_nvgprs
 
+#ifdef CONFIG_PPC_BOOK3S
 .Lfast_user_interrupt_return_amr:
 	kuap_user_restore r3, r4
+#endif
 .Lfast_user_interrupt_return:
 	ld	r11,_NIP(r1)
 	ld	r12,_MSR(r1)
@@ -788,7 +779,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
 	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
-#endif /* CONFIG_PPC_BOOK3S */
 
 #ifdef CONFIG_PPC_RTAS
 /*
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e8eb9992a27038d078256a745c0342287c89f163..7c3654b0d0f47b3f8a42436e59f3d3de0ca0232f 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -63,9 +63,6 @@
 	ld	reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
 
 special_reg_save:
-	lbz	r9,PACAIRQHAPPENED(r13)
-	RECONCILE_IRQ_STATE(r3,r4)
-
 	/*
 	 * We only need (or have stack space) to save this stuff if
 	 * we interrupted the kernel.
@@ -119,15 +116,11 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_MAS5,r10
 	mtspr	SPRN_MAS8,r10
 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
-	SPECIAL_EXC_STORE(r9,IRQHAPPENED)
-
 	mfspr	r10,SPRN_DEAR
 	SPECIAL_EXC_STORE(r10,DEAR)
 	mfspr	r10,SPRN_ESR
 	SPECIAL_EXC_STORE(r10,ESR)
 
-	lbz	r10,PACAIRQSOFTMASK(r13)
-	SPECIAL_EXC_STORE(r10,SOFTE)
 	ld	r10,_NIP(r1)
 	SPECIAL_EXC_STORE(r10,CSRR0)
 	ld	r10,_MSR(r1)
@@ -139,7 +132,8 @@ ret_from_level_except:
 	ld	r3,_MSR(r1)
 	andi.	r3,r3,MSR_PR
 	beq	1f
-	b	ret_from_except
+	REST_NVGPRS(r1)
+	b	interrupt_return
 1:
 
 	LOAD_REG_ADDR(r11,extlb_level_exc)
@@ -193,27 +187,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_MAS8,r10
 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 
-	lbz	r6,PACAIRQSOFTMASK(r13)
-	ld	r5,SOFTE(r1)
-
-	/* Interrupts had better not already be enabled... */
-	tweqi	r6,IRQS_ENABLED
-
-	andi.	r6,r5,IRQS_DISABLED
-	bne	1f
-
-	TRACE_ENABLE_INTS
-	stb	r5,PACAIRQSOFTMASK(r13)
-1:
-	/*
-	 * Restore PACAIRQHAPPENED rather than setting it based on
-	 * the return MSR[EE], since we could have interrupted
-	 * __check_irq_replay() or other inconsistent transitory
-	 * states that must remain that way.
-	 */
-	SPECIAL_EXC_LOAD(r10,IRQHAPPENED)
-	stb	r10,PACAIRQHAPPENED(r13)
-
 	SPECIAL_EXC_LOAD(r10,DEAR)
 	mtspr	SPRN_DEAR,r10
 	SPECIAL_EXC_LOAD(r10,ESR)
@@ -417,14 +390,15 @@ exc_##n##_common:							    \
 	std	r6,_LINK(r1);						    \
 	std	r7,_CTR(r1);						    \
 	std	r8,_XER(r1);						    \
-	li	r3,(n)+1;		/* indicate partial regs in trap */ \
+	li	r3,(n);			/* regs.trap vector */		    \
 	std	r9,0(r1);		/* store stack frame back link */   \
 	std	r10,_CCR(r1);		/* store orig CR in stackframe */   \
 	std	r9,GPR1(r1);		/* store stack frame back link */   \
 	std	r11,SOFTE(r1);		/* and save it to stackframe */     \
 	std	r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */	    \
 	std	r3,_TRAP(r1);		/* set trap number		*/  \
-	std	r0,RESULT(r1);		/* clear regs->result */
+	std	r0,RESULT(r1);		/* clear regs->result */	    \
+	SAVE_NVGPRS(r1);
 
 #define EXCEPTION_COMMON(n) \
 	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
@@ -435,28 +409,6 @@ exc_##n##_common:							    \
 #define EXCEPTION_COMMON_DBG(n) \
 	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
 
-/*
- * This is meant for exceptions that don't immediately hard-enable.  We
- * set a bit in paca->irq_happened to ensure that a subsequent call to
- * arch_local_irq_restore() will properly hard-enable and avoid the
- * fast-path, and then reconcile irq state.
- */
-#define INTS_DISABLE	RECONCILE_IRQ_STATE(r3,r4)
-
-/*
- * This is called by exceptions that don't use INTS_DISABLE (that did not
- * touch irq indicators in the PACA).  This will restore MSR:EE to it's
- * previous value
- *
- * XXX In the long run, we may want to open-code it in order to separate the
- *     load from the wrtee, thus limiting the latency caused by the dependency
- *     but at this point, I'll favor code clarity until we have a near to final
- *     implementation
- */
-#define INTS_RESTORE_HARD						    \
-	ld	r11,_MSR(r1);						    \
-	wrtee	r11;
-
 /* XXX FIXME: Restore r14/r15 when necessary */
 #define BAD_STACK_TRAMPOLINE(n)						    \
 exc_##n##_bad_stack:							    \
@@ -505,12 +457,11 @@ exc_##n##_bad_stack:							    \
 	START_EXCEPTION(label);						\
 	NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
 	EXCEPTION_COMMON(trapnum)					\
-	INTS_DISABLE;							\
 	ack(r8);							\
 	CHECK_NAPPING();						\
 	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
 	bl	hdlr;							\
-	b	ret_from_except_lite;
+	b	interrupt_return
 
 /* This value is used to mark exception frames on the stack. */
 	.section	".toc","aw"
@@ -561,11 +512,10 @@ __end_interrupts:
 	CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
 			      PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_CRIT(0x100)
-	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unknown_exception
+	bl	unknown_nmi_exception
 	b	ret_from_crit_except
 
 /* Machine Check Interrupt */
@@ -573,7 +523,6 @@ __end_interrupts:
 	MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
 			    PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_MC(0x000)
-	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
@@ -587,7 +536,6 @@ __end_interrupts:
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
 	EXCEPTION_COMMON(0x300)
-	INTS_DISABLE
 	b	storage_fault_common
 
 /* Instruction Storage Interrupt */
@@ -597,7 +545,6 @@ __end_interrupts:
 	li	r15,0
 	mr	r14,r10
 	EXCEPTION_COMMON(0x400)
-	INTS_DISABLE
 	b	storage_fault_common
 
 /* External Input Interrupt */
@@ -619,13 +566,12 @@ __end_interrupts:
 				PROLOG_ADDITION_1REG)
 	mfspr	r14,SPRN_ESR
 	EXCEPTION_COMMON(0x700)
-	INTS_DISABLE
 	std	r14,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXGEN+EX_R14(r13)
-	bl	save_nvgprs
 	bl	program_check_exception
-	b	ret_from_except
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 /* Floating Point Unavailable Interrupt */
 	START_EXCEPTION(fp_unavailable);
@@ -637,12 +583,10 @@ __end_interrupts:
 	andi.	r0,r12,MSR_PR;
 	beq-	1f
 	bl	load_up_fpu
-	b	fast_exception_return
-1:	INTS_DISABLE
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	b	fast_interrupt_return
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	kernel_fp_unavailable_exception
-	b	ret_from_except
+	b	interrupt_return
 
 /* Altivec Unavailable Interrupt */
 	START_EXCEPTION(altivec_unavailable);
@@ -656,15 +600,13 @@ BEGIN_FTR_SECTION
 	andi.	r0,r12,MSR_PR;
 	beq-	1f
 	bl	load_up_altivec
-	b	fast_exception_return
+	b	fast_interrupt_return
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
-	INTS_DISABLE
-	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	altivec_unavailable_exception
-	b	ret_from_except
+	b	interrupt_return
 
 /* AltiVec Assist */
 	START_EXCEPTION(altivec_assist);
@@ -672,17 +614,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 				BOOKE_INTERRUPT_ALTIVEC_ASSIST,
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x220)
-	INTS_DISABLE
-	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 	bl	altivec_assist_exception
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+	REST_NVGPRS(r1)
 #else
 	bl	unknown_exception
 #endif
-	b	ret_from_except
+	b	interrupt_return
 
 
 /* Decrementer Interrupt */
@@ -698,14 +639,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
 			      PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_CRIT(0x9f0)
-	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_BOOKE_WDT
 	bl	WatchdogException
 #else
-	bl	unknown_exception
+	bl	unknown_nmi_exception
 #endif
 	b	ret_from_crit_except
 
@@ -722,11 +662,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0xf20)
-	INTS_DISABLE
-	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	unknown_exception
-	b	ret_from_except
+	b	interrupt_return
 
 /* Debug exception as a critical interrupt*/
 	START_EXCEPTION(debug_crit);
@@ -792,9 +730,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXCRIT+EX_R14(r13)
 	ld	r15,PACA_EXCRIT+EX_R15(r13)
-	bl	save_nvgprs
 	bl	DebugException
-	b	ret_from_except
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 kernel_dbg_exc:
 	b	.	/* NYI */
@@ -859,24 +797,22 @@ kernel_dbg_exc:
 	 */
 	mfspr	r14,SPRN_DBSR
 	EXCEPTION_COMMON_DBG(0xd08)
-	INTS_DISABLE
 	std	r14,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXDBG+EX_R14(r13)
 	ld	r15,PACA_EXDBG+EX_R15(r13)
-	bl	save_nvgprs
 	bl	DebugException
-	b	ret_from_except
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 	START_EXCEPTION(perfmon);
 	NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x260)
-	INTS_DISABLE
 	CHECK_NAPPING()
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	performance_monitor_exception
-	b	ret_from_except_lite
+	b	interrupt_return
 
 /* Doorbell interrupt */
 	MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
@@ -887,11 +823,10 @@ kernel_dbg_exc:
 	CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
 			      PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_CRIT(0x2a0)
-	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unknown_exception
+	bl	unknown_nmi_exception
 	b	ret_from_crit_except
 
 /*
@@ -903,21 +838,18 @@ kernel_dbg_exc:
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x2c0)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
 	bl	unknown_exception
-	b	ret_from_except
+	b	interrupt_return
 
 /* Guest Doorbell critical Interrupt */
 	START_EXCEPTION(guest_doorbell_crit);
 	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
 			      PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON_CRIT(0x2e0)
-	bl	save_nvgprs
 	bl	special_reg_save
 	CHECK_NAPPING();
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unknown_exception
+	bl	unknown_nmi_exception
 	b	ret_from_crit_except
 
 /* Hypervisor call */
@@ -926,10 +858,8 @@ kernel_dbg_exc:
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x310)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
 	bl	unknown_exception
-	b	ret_from_except
+	b	interrupt_return
 
 /* Embedded Hypervisor priviledged  */
 	START_EXCEPTION(ehpriv);
@@ -937,10 +867,8 @@ kernel_dbg_exc:
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x320)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
 	bl	unknown_exception
-	b	ret_from_except
+	b	interrupt_return
 
 /* LRAT Error interrupt */
 	START_EXCEPTION(lrat_error);
@@ -948,10 +876,8 @@ kernel_dbg_exc:
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x340)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
 	bl	unknown_exception
-	b	ret_from_except
+	b	interrupt_return
 
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
@@ -1011,14 +937,7 @@ storage_fault_common:
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
 	bl	do_page_fault
-	cmpdi	r3,0
-	bne-	1f
-	b	ret_from_except_lite
-1:	bl	save_nvgprs
-	mr	r4,r3
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	__bad_page_fault
-	b	ret_from_except
+	b	interrupt_return
 
 /*
  * Alignment exception doesn't fit entirely in the 0x100 bytes so it
@@ -1030,291 +949,9 @@ alignment_more:
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	ld	r15,PACA_EXGEN+EX_R15(r13)
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
 	bl	alignment_exception
-	b	ret_from_except
-
-	.align	7
-_GLOBAL(ret_from_except)
-	ld	r11,_TRAP(r1)
-	andi.	r0,r11,1
-	bne	ret_from_except_lite
 	REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
-	/*
-	 * Disable interrupts so that current_thread_info()->flags
-	 * can't change between when we test it and when we return
-	 * from the interrupt.
-	 */
-	wrteei	0
-
-	ld	r9, PACA_THREAD_INFO(r13)
-	ld	r3,_MSR(r1)
-	ld	r10,PACACURRENT(r13)
-	ld	r4,TI_FLAGS(r9)
-	andi.	r3,r3,MSR_PR
-	beq	resume_kernel
-	lwz	r3,(THREAD+THREAD_DBCR0)(r10)
-
-	/* Check current_thread_info()->flags */
-	andi.	r0,r4,_TIF_USER_WORK_MASK
-	bne	1f
-	/*
-	 * Check to see if the dbcr0 register is set up to debug.
-	 * Use the internal debug mode bit to do this.
-	 */
-	andis.	r0,r3,DBCR0_IDM@h
-	beq	restore
-	mfmsr	r0
-	rlwinm	r0,r0,0,~MSR_DE	/* Clear MSR.DE */
-	mtmsr	r0
-	mtspr	SPRN_DBCR0,r3
-	li	r10, -1
-	mtspr	SPRN_DBSR,r10
-	b	restore
-1:	andi.	r0,r4,_TIF_NEED_RESCHED
-	beq	2f
-	bl	restore_interrupts
-	SCHEDULE_USER
-	b	ret_from_except_lite
-2:
-	bl	save_nvgprs
-	/*
-	 * Use a non volatile GPR to save and restore our thread_info flags
-	 * across the call to restore_interrupts.
-	 */
-	mr	r30,r4
-	bl	restore_interrupts
-	mr	r4,r30
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_notify_resume
-	b	ret_from_except
-
-resume_kernel:
-	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
-	andis.	r8,r4,_TIF_EMULATE_STACK_STORE@h
-	beq+	1f
-
-	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
-
-	ld	r3,GPR1(r1)
-	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
-	mr	r4,r1			/* src:  current exception frame */
-	mr	r1,r3			/* Reroute the trampoline frame to r1 */
-
-	/* Copy from the original to the trampoline. */
-	li	r5,INT_FRAME_SIZE/8	/* size: INT_FRAME_SIZE */
-	li	r6,0			/* start offset: 0 */
-	mtctr	r5
-2:	ldx	r0,r6,r4
-	stdx	r0,r6,r3
-	addi	r6,r6,8
-	bdnz	2b
-
-	/* Do real store operation to complete stdu */
-	ld	r5,GPR1(r1)
-	std	r8,0(r5)
-
-	/* Clear _TIF_EMULATE_STACK_STORE flag */
-	lis	r11,_TIF_EMULATE_STACK_STORE@h
-	addi	r5,r9,TI_FLAGS
-0:	ldarx	r4,0,r5
-	andc	r4,r4,r11
-	stdcx.	r4,0,r5
-	bne-	0b
-1:
-
-#ifdef CONFIG_PREEMPT
-	/* Check if we need to preempt */
-	andi.	r0,r4,_TIF_NEED_RESCHED
-	beq+	restore
-	/* Check that preempt_count() == 0 and interrupts are enabled */
-	lwz	r8,TI_PREEMPT(r9)
-	cmpwi	cr0,r8,0
-	bne	restore
-	ld	r0,SOFTE(r1)
-	andi.	r0,r0,IRQS_DISABLED
-	bne	restore
-
-	/*
-	 * Here we are preempting the current task. We want to make
-	 * sure we are soft-disabled first and reconcile irq state.
-	 */
-	RECONCILE_IRQ_STATE(r3,r4)
-	bl	preempt_schedule_irq
-
-	/*
-	 * arch_local_irq_restore() from preempt_schedule_irq above may
-	 * enable hard interrupt but we really should disable interrupts
-	 * when we return from the interrupt, and so that we don't get
-	 * interrupted after loading SRR0/1.
-	 */
-	wrteei	0
-#endif /* CONFIG_PREEMPT */
-
-restore:
-	/*
-	 * This is the main kernel exit path. First we check if we
-	 * are about to re-enable interrupts
-	 */
-	ld	r5,SOFTE(r1)
-	lbz	r6,PACAIRQSOFTMASK(r13)
-	andi.	r5,r5,IRQS_DISABLED
-	bne	.Lrestore_irq_off
-
-	/* We are enabling, were we already enabled ? Yes, just return */
-	andi.	r6,r6,IRQS_DISABLED
-	beq	cr0,fast_exception_return
-
-	/*
-	 * We are about to soft-enable interrupts (we are hard disabled
-	 * at this point). We check if there's anything that needs to
-	 * be replayed first.
-	 */
-	lbz	r0,PACAIRQHAPPENED(r13)
-	cmpwi	cr0,r0,0
-	bne-	.Lrestore_check_irq_replay
-
-	/*
-	 * Get here when nothing happened while soft-disabled, just
-	 * soft-enable and move-on. We will hard-enable as a side
-	 * effect of rfi
-	 */
-.Lrestore_no_replay:
-	TRACE_ENABLE_INTS
-	li	r0,IRQS_ENABLED
-	stb	r0,PACAIRQSOFTMASK(r13);
-
-/* This is the return from load_up_fpu fast path which could do with
- * less GPR restores in fact, but for now we have a single return path
- */
-fast_exception_return:
-	wrteei	0
-1:	mr	r0,r13
-	ld	r10,_MSR(r1)
-	REST_4GPRS(2, r1)
-	andi.	r6,r10,MSR_PR
-	REST_2GPRS(6, r1)
-	beq	1f
-	ACCOUNT_CPU_USER_EXIT(r13, r10, r11)
-	ld	r0,GPR13(r1)
-
-1:	stdcx.	r0,0,r1		/* to clear the reservation */
-
-	ld	r8,_CCR(r1)
-	ld	r9,_LINK(r1)
-	ld	r10,_CTR(r1)
-	ld	r11,_XER(r1)
-	mtcr	r8
-	mtlr	r9
-	mtctr	r10
-	mtxer	r11
-	REST_2GPRS(8, r1)
-	ld	r10,GPR10(r1)
-	ld	r11,GPR11(r1)
-	ld	r12,GPR12(r1)
-	mtspr	SPRN_SPRG_GEN_SCRATCH,r0
-
-	std	r10,PACA_EXGEN+EX_R10(r13);
-	std	r11,PACA_EXGEN+EX_R11(r13);
-	ld	r10,_NIP(r1)
-	ld	r11,_MSR(r1)
-	ld	r0,GPR0(r1)
-	ld	r1,GPR1(r1)
-	mtspr	SPRN_SRR0,r10
-	mtspr	SPRN_SRR1,r11
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	ld	r11,PACA_EXGEN+EX_R11(r13)
-	mfspr	r13,SPRN_SPRG_GEN_SCRATCH
-	rfi
-
-	/*
-	 * We are returning to a context with interrupts soft disabled.
-	 *
-	 * However, we may also about to hard enable, so we need to
-	 * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
-	 * or that bit can get out of sync and bad things will happen
-	 */
-.Lrestore_irq_off:
-	ld	r3,_MSR(r1)
-	lbz	r7,PACAIRQHAPPENED(r13)
-	andi.	r0,r3,MSR_EE
-	beq	1f
-	rlwinm	r7,r7,0,~PACA_IRQ_HARD_DIS
-	stb	r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
-	/* The interrupt should not have soft enabled. */
-	lbz	r7,PACAIRQSOFTMASK(r13)
-1:	tdeqi	r7,IRQS_ENABLED
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-	b	fast_exception_return
-
-	/*
-	 * Something did happen, check if a re-emit is needed
-	 * (this also clears paca->irq_happened)
-	 */
-.Lrestore_check_irq_replay:
-	/* XXX: We could implement a fast path here where we check
-	 * for irq_happened being just 0x01, in which case we can
-	 * clear it and return. That means that we would potentially
-	 * miss a decrementer having wrapped all the way around.
-	 *
-	 * Still, this might be useful for things like hash_page
-	 */
-	bl	__check_irq_replay
-	cmpwi	cr0,r3,0
-	beq	.Lrestore_no_replay
-
-	/*
-	 * We need to re-emit an interrupt. We do so by re-using our
-	 * existing exception frame. We first change the trap value,
-	 * but we need to ensure we preserve the low nibble of it
-	 */
-	ld	r4,_TRAP(r1)
-	clrldi	r4,r4,60
-	or	r4,r4,r3
-	std	r4,_TRAP(r1)
-
-	/*
-	 * PACA_IRQ_HARD_DIS won't always be set here, so set it now
-	 * to reconcile the IRQ state. Tracing is already accounted for.
-	 */
-	lbz	r4,PACAIRQHAPPENED(r13)
-	ori	r4,r4,PACA_IRQ_HARD_DIS
-	stb	r4,PACAIRQHAPPENED(r13)
-
-	/*
-	 * Then find the right handler and call it. Interrupts are
-	 * still soft-disabled and we keep them that way.
-	*/
-	cmpwi	cr0,r3,0x500
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	do_IRQ
-	b	ret_from_except
-1:	cmpwi	cr0,r3,0x900
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	timer_interrupt
-	b	ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-	cmpwi	cr0,r3,0x280
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1:	b	ret_from_except /* What else to do here ? */
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+	b	interrupt_return
 
 /*
  * Trampolines used when spotting a bad kernel stack pointer in
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 8082b690e87468b92b33238549ea50426560202c..fa8e52a0239ee0e530da6f96e6ddd06c11924c4b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -692,25 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r1,GPR1(r1)
 .endm
 
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP				\
-BEGIN_FTR_SECTION				\
-	ld	r11, PACA_THREAD_INFO(r13);	\
-	ld	r9,TI_LOCAL_FLAGS(r11);		\
-	andi.	r10,r9,_TLF_NAPPING;		\
-	bnel	power4_fixup_nap;		\
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
-
 /*
  * There are a few constraints to be concerned with.
  * - Real mode exceptions code/data must be located at their physical location.
@@ -1248,7 +1229,6 @@ EXC_COMMON_BEGIN(machine_check_common)
 	 */
 	GEN_COMMON machine_check
 
-	FINISH_NAP
 	/* Enable MSR_RI when finished with PACA_EXMC */
 	li	r10,MSR_RI
 	mtmsrd 	r10,1
@@ -1571,7 +1551,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 EXC_COMMON_BEGIN(hardware_interrupt_common)
 	GEN_COMMON hardware_interrupt
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_IRQ
 	b	interrupt_return
@@ -1801,7 +1780,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
 EXC_VIRT_END(decrementer, 0x4900, 0x80)
 EXC_COMMON_BEGIN(decrementer_common)
 	GEN_COMMON decrementer
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	timer_interrupt
 	b	interrupt_return
@@ -1886,7 +1864,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
 EXC_COMMON_BEGIN(doorbell_super_common)
 	GEN_COMMON doorbell_super
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
 	bl	doorbell_exception
@@ -2237,7 +2214,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
 
 EXC_COMMON_BEGIN(hmi_exception_common)
 	GEN_COMMON hmi_exception
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	handle_hmi_exception
 	b	interrupt_return
@@ -2266,7 +2242,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
 EXC_COMMON_BEGIN(h_doorbell_common)
 	GEN_COMMON h_doorbell
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
 	bl	doorbell_exception
@@ -2299,7 +2274,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
 EXC_COMMON_BEGIN(h_virt_irq_common)
 	GEN_COMMON h_virt_irq
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_IRQ
 	b	interrupt_return
@@ -2345,7 +2319,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
 EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
 EXC_COMMON_BEGIN(performance_monitor_common)
 	GEN_COMMON performance_monitor
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	performance_monitor_exception
 	b	interrupt_return
@@ -2530,8 +2503,6 @@ EXC_VIRT_NONE(0x5100, 0x100)
 INT_DEFINE_BEGIN(cbe_system_error)
 	IVEC=0x1200
 	IHSRR=1
-	IKVM_SKIP=1
-	IKVM_REAL=1
 INT_DEFINE_END(cbe_system_error)
 
 EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
@@ -2551,11 +2522,16 @@ EXC_REAL_NONE(0x1200, 0x100)
 EXC_VIRT_NONE(0x5200, 0x100)
 #endif
 
-
+/**
+ * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt.
+ * This has been removed from the ISA before 2.01, which is the earliest
+ * 64-bit BookS ISA supported, however the G5 / 970 implements this
+ * interrupt with a non-architected feature available through the support
+ * processor interface.
+ */
 INT_DEFINE_BEGIN(instruction_breakpoint)
 	IVEC=0x1300
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-	IKVM_SKIP=1
 	IKVM_REAL=1
 #endif
 INT_DEFINE_END(instruction_breakpoint)
@@ -2701,8 +2677,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
 INT_DEFINE_BEGIN(cbe_maintenance)
 	IVEC=0x1600
 	IHSRR=1
-	IKVM_SKIP=1
-	IKVM_REAL=1
 INT_DEFINE_END(cbe_maintenance)
 
 EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
@@ -2754,8 +2728,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
 INT_DEFINE_BEGIN(cbe_thermal)
 	IVEC=0x1800
 	IHSRR=1
-	IKVM_SKIP=1
-	IKVM_REAL=1
 INT_DEFINE_END(cbe_thermal)
 
 EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
@@ -3096,24 +3068,6 @@ USE_FIXED_SECTION(virt_trampolines)
 __end_interrupts:
 DEFINE_FIXED_SYMBOL(__end_interrupts)
 
-#ifdef CONFIG_PPC_970_NAP
-	/*
-	 * Called by exception entry code if _TLF_NAPPING was set, this clears
-	 * the NAPPING flag, and redirects the exception exit to
-	 * power4_fixup_nap_return.
-	 */
-	.globl power4_fixup_nap
-EXC_COMMON_BEGIN(power4_fixup_nap)
-	andc	r9,r9,r10
-	std	r9,TI_LOCAL_FLAGS(r11)
-	LOAD_REG_ADDR(r10, power4_idle_nap_return)
-	std	r10,_NIP(r1)
-	blr
-
-power4_idle_nap_return:
-	blr
-#endif
-
 CLOSE_FIXED_SECTION(real_vectors);
 CLOSE_FIXED_SECTION(real_trampolines);
 CLOSE_FIXED_SECTION(virt_vectors);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8482739d42f380bfcfeec9aec05dd1c79c466160..b990075285f57cf62f10a147b201244e91a77d5e 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -31,6 +31,7 @@
 #include <asm/fadump.h>
 #include <asm/fadump-internal.h>
 #include <asm/setup.h>
+#include <asm/interrupt.h>
 
 /*
  * The CPU who acquired the lock to trigger the fadump crash should
@@ -44,22 +45,21 @@ static struct fw_dump fw_dump;
 
 static void __init fadump_reserve_crash_area(u64 base);
 
-struct kobject *fadump_kobj;
-
 #ifndef CONFIG_PRESERVE_FA_DUMP
 
+static struct kobject *fadump_kobj;
+
 static atomic_t cpus_in_fadump;
 static DEFINE_MUTEX(fadump_mutex);
 
-struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
+static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
 
 #define RESERVED_RNGS_SZ	16384 /* 16K - 128 entries */
 #define RESERVED_RNGS_CNT	(RESERVED_RNGS_SZ / \
 				 sizeof(struct fadump_memory_range))
 static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
-struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs,
-						   RESERVED_RNGS_SZ, 0,
-						   RESERVED_RNGS_CNT, true };
+static struct fadump_mrange_info
+reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true };
 
 static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
 
@@ -79,7 +79,7 @@ static struct cma *fadump_cma;
  * But for some reason even if it fails we still have the memory reservation
  * with us and we can still continue doing fadump.
  */
-int __init fadump_cma_init(void)
+static int __init fadump_cma_init(void)
 {
 	unsigned long long base, size;
 	int rc;
@@ -292,7 +292,7 @@ static void fadump_show_config(void)
  * that is required for a kernel to boot successfully.
  *
  */
-static inline u64 fadump_calculate_reserve_size(void)
+static __init u64 fadump_calculate_reserve_size(void)
 {
 	u64 base, size, bootmem_min;
 	int ret;
@@ -728,7 +728,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
 	 * If we came in via system reset, wait a while for the secondary
 	 * CPUs to enter.
 	 */
-	if (TRAP(&(fdh->regs)) == 0x100) {
+	if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) {
 		msecs = CRASH_TIMEOUT;
 		while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
 			mdelay(1);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 3ff9a8fafa467c9d76b9b6d3605ed3884b45ccd9..2c57ece6671c492dd765df229b6cd30657da02f7 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -92,9 +92,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	/* enable use of FP after return */
 #ifdef CONFIG_PPC32
 	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
-#ifdef CONFIG_VMAP_STACK
 	tovirt(r5, r5)
-#endif
 	lwz	r4,THREAD_FPEXC_MODE(r5)
 	ori	r9,r9,MSR_FP		/* enable FP for current */
 	or	r9,r9,r4
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 5d4706c14572717493c563bd7919c2171fc394f2..a8221ddcbd66dac128a7f14f82aeb24998dcebbd 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -10,36 +10,39 @@
  * We assume sprg3 has the physical address of the current
  * task's thread_struct.
  */
-.macro EXCEPTION_PROLOG handle_dar_dsisr=0
+.macro EXCEPTION_PROLOG		trapno name handle_dar_dsisr=0
 	EXCEPTION_PROLOG_0	handle_dar_dsisr=\handle_dar_dsisr
 	EXCEPTION_PROLOG_1
-	EXCEPTION_PROLOG_2	handle_dar_dsisr=\handle_dar_dsisr
+	EXCEPTION_PROLOG_2	\trapno \name handle_dar_dsisr=\handle_dar_dsisr
 .endm
 
 .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
 	mtspr	SPRN_SPRG_SCRATCH0,r10
 	mtspr	SPRN_SPRG_SCRATCH1,r11
-#ifdef CONFIG_VMAP_STACK
 	mfspr	r10, SPRN_SPRG_THREAD
 	.if	\handle_dar_dsisr
+#ifdef CONFIG_40x
+	mfspr	r11, SPRN_DEAR
+#else
 	mfspr	r11, SPRN_DAR
+#endif
 	stw	r11, DAR(r10)
+#ifdef CONFIG_40x
+	mfspr	r11, SPRN_ESR
+#else
 	mfspr	r11, SPRN_DSISR
+#endif
 	stw	r11, DSISR(r10)
 	.endif
 	mfspr	r11, SPRN_SRR0
 	stw	r11, SRR0(r10)
-#endif
 	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
-#ifdef CONFIG_VMAP_STACK
 	stw	r11, SRR1(r10)
-#endif
 	mfcr	r10
 	andi.	r11, r11, MSR_PR
 .endm
 
-.macro EXCEPTION_PROLOG_1 for_rtas=0
-#ifdef CONFIG_VMAP_STACK
+.macro EXCEPTION_PROLOG_1
 	mtspr	SPRN_SPRG_SCRATCH2,r1
 	subi	r1, r1, INT_FRAME_SIZE		/* use r1 if kernel */
 	beq	1f
@@ -47,32 +50,33 @@
 	lwz	r1,TASK_STACK-THREAD(r1)
 	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
 1:
+#ifdef CONFIG_VMAP_STACK
 	mtcrf	0x3f, r1
-	bt	32 - THREAD_ALIGN_SHIFT, stack_overflow
-#else
-	subi	r11, r1, INT_FRAME_SIZE		/* use r1 if kernel */
-	beq	1f
-	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,TASK_STACK-THREAD(r11)
-	addi	r11, r11, THREAD_SIZE - INT_FRAME_SIZE
-1:	tophys(r11, r11)
+	bt	32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow
 #endif
 .endm
 
-.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
-#ifdef CONFIG_VMAP_STACK
-	li	r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
-	mtmsr	r11
-	isync
+.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0
+#ifdef CONFIG_PPC_8xx
+	.if	\handle_dar_dsisr
+	li	r11, RPN_PATTERN
+	mtspr	SPRN_DAR, r11	/* Tag DAR, to be used in DTLB Error */
+	.endif
+#endif
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */
+	mtspr	SPRN_SRR1, r11
+	lis	r11, 1f@h
+	ori	r11, r11, 1f@l
+	mtspr	SPRN_SRR0, r11
 	mfspr	r11, SPRN_SPRG_SCRATCH2
+	rfi
+
+	.text
+\name\()_virt:
+1:
 	stw	r11,GPR1(r1)
 	stw	r11,0(r1)
 	mr	r11, r1
-#else
-	stw	r1,GPR1(r11)
-	stw	r1,0(r11)
-	tovirt(r1, r11)		/* set new kernel sp */
-#endif
 	stw	r10,_CCR(r11)		/* save registers */
 	stw	r12,GPR12(r11)
 	stw	r9,GPR9(r11)
@@ -82,7 +86,6 @@
 	stw	r12,GPR11(r11)
 	mflr	r10
 	stw	r10,_LINK(r11)
-#ifdef CONFIG_VMAP_STACK
 	mfspr	r12, SPRN_SPRG_THREAD
 	tovirt(r12, r12)
 	.if	\handle_dar_dsisr
@@ -93,26 +96,48 @@
 	.endif
 	lwz	r9, SRR1(r12)
 	lwz	r12, SRR0(r12)
-#else
-	mfspr	r12,SPRN_SRR0
-	mfspr	r9,SPRN_SRR1
-#endif
 #ifdef CONFIG_40x
 	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?) */
+#elif defined(CONFIG_PPC_8xx)
+	mtspr	SPRN_EID, r2		/* Set MSR_RI */
 #else
-#ifdef CONFIG_VMAP_STACK
-	li	r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */
-#else
-	li	r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
-#endif
+	li	r10, MSR_KERNEL		/* can take exceptions */
 	mtmsr	r10			/* (except for mach check in rtas) */
 #endif
-	stw	r0,GPR0(r11)
+	COMMON_EXCEPTION_PROLOG_END \trapno
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+	stw	r0,GPR0(r1)
 	lis	r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
 	addi	r10,r10,STACK_FRAME_REGS_MARKER@l
-	stw	r10,8(r11)
-	SAVE_4GPRS(3, r11)
-	SAVE_2GPRS(7, r11)
+	stw	r10,8(r1)
+	li	r10, \trapno
+	stw	r10,_TRAP(r1)
+	SAVE_4GPRS(3, r1)
+	SAVE_2GPRS(7, r1)
+	SAVE_NVGPRS(r1)
+	stw	r2,GPR2(r1)
+	stw	r12,_NIP(r1)
+	stw	r9,_MSR(r1)
+	mfctr	r10
+	mfspr	r2,SPRN_SPRG_THREAD
+	stw	r10,_CTR(r1)
+	tovirt(r2, r2)
+	mfspr	r10,SPRN_XER
+	addi	r2, r2, -THREAD
+	stw	r10,_XER(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_BOOK3S_32
+	andi.	r12,r9,MSR_PR
+	bne	777f
+	bl	prepare_transfer_to_handler
+777:
+#endif
 .endm
 
 .macro SYSCALL_ENTRY trapno
@@ -156,54 +181,6 @@
 	b	transfer_to_syscall		/* jump to handler */
 .endm
 
-.macro save_dar_dsisr_on_stack reg1, reg2, sp
-#ifndef CONFIG_VMAP_STACK
-	mfspr	\reg1, SPRN_DAR
-	mfspr	\reg2, SPRN_DSISR
-	stw	\reg1, _DAR(\sp)
-	stw	\reg2, _DSISR(\sp)
-#endif
-.endm
-
-.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp
-#ifdef CONFIG_VMAP_STACK
-	lwz	\reg1, _DAR(\sp)
-	lwz	\reg2, _DSISR(\sp)
-#else
-	save_dar_dsisr_on_stack \reg1, \reg2, \sp
-#endif
-.endm
-
-.macro tovirt_vmstack dst, src
-#ifdef CONFIG_VMAP_STACK
-	tovirt(\dst, \src)
-#else
-	.ifnc	\dst, \src
-	mr	\dst, \src
-	.endif
-#endif
-.endm
-
-.macro tovirt_novmstack dst, src
-#ifndef CONFIG_VMAP_STACK
-	tovirt(\dst, \src)
-#else
-	.ifnc	\dst, \src
-	mr	\dst, \src
-	.endif
-#endif
-.endm
-
-.macro tophys_novmstack dst, src
-#ifndef CONFIG_VMAP_STACK
-	tophys(\dst, \src)
-#else
-	.ifnc	\dst, \src
-	mr	\dst, \src
-	.endif
-#endif
-.endm
-
 /*
  * Note: code which follows this uses cr0.eq (set if from kernel),
  * r11, r12 (SRR0), and r9 (SRR1).
@@ -217,41 +194,29 @@
  */
 #ifdef CONFIG_PPC_BOOK3S
 #define	START_EXCEPTION(n, label)		\
+	__HEAD;					\
 	. = n;					\
 	DO_KVM n;				\
 label:
 
 #else
 #define	START_EXCEPTION(n, label)		\
+	__HEAD;					\
 	. = n;					\
 label:
 
 #endif
 
-#define EXCEPTION(n, label, hdlr, xfer)		\
+#define EXCEPTION(n, label, hdlr)		\
 	START_EXCEPTION(n, label)		\
-	EXCEPTION_PROLOG;			\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;	\
-	xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)		\
-	li	r10,trap;					\
-	stw	r10,_TRAP(r11);					\
-	LOAD_REG_IMMEDIATE(r10, msr);				\
-	bl	tfer;						\
-	.long	hdlr;						\
-	.long	ret
-
-#define EXC_XFER_STD(n, hdlr)		\
-	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full,	\
-			  ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr)		\
-	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
-			  ret_from_except)
+	EXCEPTION_PROLOG n label;		\
+	prepare_transfer_to_handler;		\
+	bl	hdlr;				\
+	b	interrupt_return
 
 .macro vmap_stack_overflow_exception
-#ifdef CONFIG_VMAP_STACK
+	__HEAD
+vmap_stack_overflow:
 #ifdef CONFIG_SMP
 	mfspr	r1, SPRN_SPRG_THREAD
 	lwz	r1, TASK_CPU - THREAD(r1)
@@ -261,16 +226,11 @@
 	lis	r1, emergency_ctx@ha
 #endif
 	lwz	r1, emergency_ctx@l(r1)
-	cmpwi	cr1, r1, 0
-	bne	cr1, 1f
-	lis	r1, init_thread_union@ha
-	addi	r1, r1, init_thread_union@l
-1:	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
-	EXCEPTION_PROLOG_2
-	SAVE_NVGPRS(r11)
-	addi	r3, r1, STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0, stack_overflow_exception)
-#endif
+	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+	EXCEPTION_PROLOG_2 0 vmap_stack_overflow
+	prepare_transfer_to_handler
+	bl	stack_overflow_exception
+	b	interrupt_return
 .endm
 
 #endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 24724a7dad49a678e689aa043b99193f89fa37ef..e1360b88b6cb86e0c9f10ac9c4141248839b4c66 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -89,7 +89,11 @@ _ENTRY(crit_srr0)
 	.space	4
 _ENTRY(crit_srr1)
 	.space	4
-_ENTRY(saved_ksp_limit)
+_ENTRY(crit_r1)
+	.space	4
+_ENTRY(crit_dear)
+	.space	4
+_ENTRY(crit_esr)
 	.space	4
 
 /*
@@ -100,42 +104,62 @@ _ENTRY(saved_ksp_limit)
  * Instead we use a couple of words of memory at low physical addresses.
  * This is OK since we don't support SMP on these processors.
  */
-#define CRITICAL_EXCEPTION_PROLOG					     \
-	stw	r10,crit_r10@l(0);	/* save two registers to work with */\
-	stw	r11,crit_r11@l(0);					     \
-	mfcr	r10;			/* save CR in r10 for now	   */\
-	mfspr	r11,SPRN_SRR3;		/* check whether user or kernel    */\
-	andi.	r11,r11,MSR_PR;						     \
-	lis	r11,critirq_ctx@ha;					     \
-	tophys(r11,r11);						     \
-	lwz	r11,critirq_ctx@l(r11);					     \
-	beq	1f;							     \
-	/* COMING FROM USER MODE */					     \
-	mfspr	r11,SPRN_SPRG_THREAD;	/* if from user, start at top of   */\
-	lwz	r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
-1:	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm  */\
-	tophys(r11,r11);						     \
-	stw	r10,_CCR(r11);          /* save various registers	   */\
-	stw	r12,GPR12(r11);						     \
-	stw	r9,GPR9(r11);						     \
-	mflr	r10;							     \
-	stw	r10,_LINK(r11);						     \
-	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
-	stw	r12,_DEAR(r11);		/* since they may have had stuff   */\
-	mfspr	r9,SPRN_ESR;		/* in them at the point where the  */\
-	stw	r9,_ESR(r11);		/* exception was taken		   */\
-	mfspr	r12,SPRN_SRR2;						     \
-	stw	r1,GPR1(r11);						     \
-	mfspr	r9,SPRN_SRR3;						     \
-	stw	r1,0(r11);						     \
-	tovirt(r1,r11);							     \
-	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
-	stw	r0,GPR0(r11);						     \
-	lis	r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
-	addi	r10, r10, STACK_FRAME_REGS_MARKER@l;			     \
-	stw	r10, 8(r11);						     \
-	SAVE_4GPRS(3, r11);						     \
-	SAVE_2GPRS(7, r11)
+.macro CRITICAL_EXCEPTION_PROLOG trapno name
+	stw	r10,crit_r10@l(0)	/* save two registers to work with */
+	stw	r11,crit_r11@l(0)
+	mfspr	r10,SPRN_SRR0
+	mfspr	r11,SPRN_SRR1
+	stw	r10,crit_srr0@l(0)
+	stw	r11,crit_srr1@l(0)
+	mfspr	r10,SPRN_DEAR
+	mfspr	r11,SPRN_ESR
+	stw	r10,crit_dear@l(0)
+	stw	r11,crit_esr@l(0)
+	mfcr	r10			/* save CR in r10 for now	   */
+	mfspr	r11,SPRN_SRR3		/* check whether user or kernel    */
+	andi.	r11,r11,MSR_PR
+	lis	r11,(critirq_ctx-PAGE_OFFSET)@ha
+	lwz	r11,(critirq_ctx-PAGE_OFFSET)@l(r11)
+	beq	1f
+	/* COMING FROM USER MODE */
+	mfspr	r11,SPRN_SPRG_THREAD	/* if from user, start at top of   */
+	lwz	r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
+1:	stw	r1,crit_r1@l(0)
+	addi	r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */
+	mtspr	SPRN_SRR1, r11
+	lis	r11, 1f@h
+	ori	r11, r11, 1f@l
+	mtspr	SPRN_SRR0, r11
+	rfi
+
+	.text
+1:
+\name\()_virt:
+	lwz	r11,crit_r1@l(0)
+	stw	r11,GPR1(r1)
+	stw	r11,0(r1)
+	mr	r11,r1
+	stw	r10,_CCR(r11)		/* save various registers	   */
+	stw	r12,GPR12(r11)
+	stw	r9,GPR9(r11)
+	mflr	r10
+	stw	r10,_LINK(r11)
+	lis	r9,PAGE_OFFSET@ha
+	lwz	r10,crit_r10@l(r9)
+	lwz	r12,crit_r11@l(r9)
+	stw	r10,GPR10(r11)
+	stw	r12,GPR11(r11)
+	lwz	r12,crit_dear@l(r9)
+	lwz	r9,crit_esr@l(r9)
+	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
+	stw	r9,_ESR(r11)		/* exception was taken		   */
+	mfspr	r12,SPRN_SRR2
+	mfspr	r9,SPRN_SRR3
+	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?)	   */
+	COMMON_EXCEPTION_PROLOG_END \trapno + 2
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
 
 	/*
 	 * State at this point:
@@ -155,10 +179,10 @@ _ENTRY(saved_ksp_limit)
  */
 #define CRITICAL_EXCEPTION(n, label, hdlr)			\
 	START_EXCEPTION(n, label);				\
-	CRITICAL_EXCEPTION_PROLOG;				\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
-	EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-			  crit_transfer_to_handler, ret_from_crit_exc)
+	CRITICAL_EXCEPTION_PROLOG n label;				\
+	prepare_transfer_to_handler;				\
+	bl	hdlr;						\
+	b	ret_from_crit_exc
 
 /*
  * 0x0100 - Critical Interrupt Exception
@@ -178,69 +202,67 @@ _ENTRY(saved_ksp_limit)
  * if they can't resolve the lightweight TLB fault.
  */
 	START_EXCEPTION(0x0300,	DataStorage)
-	EXCEPTION_PROLOG
-	mfspr	r5, SPRN_ESR		/* Grab the ESR, save it */
-	stw	r5, _ESR(r11)
-	mfspr	r4, SPRN_DEAR		/* Grab the DEAR, save it */
-	stw	r4, _DEAR(r11)
-	EXC_XFER_LITE(0x300, handle_page_fault)
+	EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
 
 /*
  * 0x0400 - Instruction Storage Exception
  * This is caused by a fetch from non-execute or guarded pages.
  */
 	START_EXCEPTION(0x0400, InstructionAccess)
-	EXCEPTION_PROLOG
+	EXCEPTION_PROLOG 0x400 InstructionAccess
 	li	r5,0
 	stw	r5, _ESR(r11)		/* Zero ESR */
 	stw	r12, _DEAR(r11)		/* SRR0 as DEAR */
-	EXC_XFER_LITE(0x400, handle_page_fault)
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
 
 /* 0x0500 - External Interrupt Exception */
-	EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+	EXCEPTION(0x0500, HardwareInterrupt, do_IRQ)
 
 /* 0x0600 - Alignment Exception */
 	START_EXCEPTION(0x0600, Alignment)
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_DEAR		/* Grab the DEAR and save it */
-	stw	r4,_DEAR(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0x600, alignment_exception)
+	EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	alignment_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 /* 0x0700 - Program Exception */
 	START_EXCEPTION(0x0700, ProgramCheck)
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_ESR		/* Grab the ESR and save it */
-	stw	r4,_ESR(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0x700, program_check_exception)
+	EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	program_check_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
-	EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x0800, Trap_08, unknown_exception)
+	EXCEPTION(0x0900, Trap_09, unknown_exception)
+	EXCEPTION(0x0A00, Trap_0A, unknown_exception)
+	EXCEPTION(0x0B00, Trap_0B, unknown_exception)
 
 /* 0x0C00 - System Call Exception */
 	START_EXCEPTION(0x0C00,	SystemCall)
 	SYSCALL_ENTRY	0xc00
 /*	Trap_0D is commented out to get more space for system call exception */
 
-/*	EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */
-	EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
+/*	EXCEPTION(0x0D00, Trap_0D, unknown_exception) */
+	EXCEPTION(0x0E00, Trap_0E, unknown_exception)
+	EXCEPTION(0x0F00, Trap_0F, unknown_exception)
 
 /* 0x1000 - Programmable Interval Timer (PIT) Exception */
-	. = 0x1000
+	START_EXCEPTION(0x1000, DecrementerTrap)
 	b Decrementer
 
-/* 0x1010 - Fixed Interval Timer (FIT) Exception
-*/
-	. = 0x1010
+/* 0x1010 - Fixed Interval Timer (FIT) Exception */
+	START_EXCEPTION(0x1010, FITExceptionTrap)
 	b FITException
 
-/* 0x1020 - Watchdog Timer (WDT) Exception
-*/
-	. = 0x1020
+/* 0x1020 - Watchdog Timer (WDT) Exception */
+	START_EXCEPTION(0x1020, WDTExceptionTrap)
 	b WDTException
 
 /* 0x1100 - Data TLB Miss Exception
@@ -249,13 +271,13 @@ _ENTRY(saved_ksp_limit)
  * load TLB entries from the page table if they exist.
  */
 	START_EXCEPTION(0x1100,	DTLBMiss)
-	mtspr	SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
-	mtspr	SPRN_SPRG_SCRATCH1, r11
+	mtspr	SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */
+	mtspr	SPRN_SPRG_SCRATCH6, r11
 	mtspr	SPRN_SPRG_SCRATCH3, r12
 	mtspr	SPRN_SPRG_SCRATCH4, r9
 	mfcr	r12
 	mfspr	r9, SPRN_PID
-	mtspr	SPRN_SPRG_SCRATCH5, r9
+	rlwimi	r12, r9, 0, 0xff
 	mfspr	r10, SPRN_DEAR		/* Get faulting address */
 
 	/* If we are faulting a kernel address, we have to use the
@@ -316,13 +338,12 @@ _ENTRY(saved_ksp_limit)
 	/* The bailout.  Restore registers to pre-exception conditions
 	 * and call the heavyweights to help us out.
 	 */
-	mfspr	r9, SPRN_SPRG_SCRATCH5
-	mtspr	SPRN_PID, r9
-	mtcr	r12
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
 	mfspr	r9, SPRN_SPRG_SCRATCH4
 	mfspr	r12, SPRN_SPRG_SCRATCH3
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
 	b	DataStorage
 
 /* 0x1200 - Instruction TLB Miss Exception
@@ -330,13 +351,13 @@ _ENTRY(saved_ksp_limit)
  * registers and bailout to a different point.
  */
 	START_EXCEPTION(0x1200,	ITLBMiss)
-	mtspr	SPRN_SPRG_SCRATCH0, r10	 /* Save some working registers */
-	mtspr	SPRN_SPRG_SCRATCH1, r11
+	mtspr	SPRN_SPRG_SCRATCH5, r10	 /* Save some working registers */
+	mtspr	SPRN_SPRG_SCRATCH6, r11
 	mtspr	SPRN_SPRG_SCRATCH3, r12
 	mtspr	SPRN_SPRG_SCRATCH4, r9
 	mfcr	r12
 	mfspr	r9, SPRN_PID
-	mtspr	SPRN_SPRG_SCRATCH5, r9
+	rlwimi	r12, r9, 0, 0xff
 	mfspr	r10, SPRN_SRR0		/* Get faulting address */
 
 	/* If we are faulting a kernel address, we have to use the
@@ -397,28 +418,27 @@ _ENTRY(saved_ksp_limit)
 	/* The bailout.  Restore registers to pre-exception conditions
 	 * and call the heavyweights to help us out.
 	 */
-	mfspr	r9, SPRN_SPRG_SCRATCH5
-	mtspr	SPRN_PID, r9
-	mtcr	r12
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
 	mfspr	r9, SPRN_SPRG_SCRATCH4
 	mfspr	r12, SPRN_SPRG_SCRATCH3
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
 	b	InstructionAccess
 
-	EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1300, Trap_13, unknown_exception)
+	EXCEPTION(0x1400, Trap_14, unknown_exception)
+	EXCEPTION(0x1500, Trap_15, unknown_exception)
+	EXCEPTION(0x1600, Trap_16, unknown_exception)
+	EXCEPTION(0x1700, Trap_17, unknown_exception)
+	EXCEPTION(0x1800, Trap_18, unknown_exception)
+	EXCEPTION(0x1900, Trap_19, unknown_exception)
+	EXCEPTION(0x1A00, Trap_1A, unknown_exception)
+	EXCEPTION(0x1B00, Trap_1B, unknown_exception)
+	EXCEPTION(0x1C00, Trap_1C, unknown_exception)
+	EXCEPTION(0x1D00, Trap_1D, unknown_exception)
+	EXCEPTION(0x1E00, Trap_1E, unknown_exception)
+	EXCEPTION(0x1F00, Trap_1F, unknown_exception)
 
 /* Check for a single step debug exception while in an exception
  * handler before state has been saved.  This is to catch the case
@@ -435,7 +455,7 @@ _ENTRY(saved_ksp_limit)
  */
 	/* 0x2000 - Debug Exception */
 	START_EXCEPTION(0x2000, DebugTrap)
-	CRITICAL_EXCEPTION_PROLOG
+	CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap
 
 	/*
 	 * If this is a single step or branch-taken exception in an
@@ -477,32 +497,35 @@ _ENTRY(saved_ksp_limit)
 	/* continue normal handling for a critical exception... */
 2:	mfspr	r4,SPRN_DBSR
 	stw	r4,_ESR(r11)		/* DebugException takes DBSR in _ESR */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_TEMPLATE(DebugException, 0x2002, \
-		(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-		crit_transfer_to_handler, ret_from_crit_exc)
+	prepare_transfer_to_handler
+	bl	DebugException
+	b	ret_from_crit_exc
 
 	/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
+	__HEAD
 Decrementer:
-	EXCEPTION_PROLOG
+	EXCEPTION_PROLOG 0x1000 Decrementer
 	lis	r0,TSR_PIS@h
 	mtspr	SPRN_TSR,r0		/* Clear the PIT exception */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x1000, timer_interrupt)
+	prepare_transfer_to_handler
+	bl	timer_interrupt
+	b	interrupt_return
 
 	/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
+	__HEAD
 FITException:
-	EXCEPTION_PROLOG
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	EXC_XFER_STD(0x1010, unknown_exception)
+	EXCEPTION_PROLOG 0x1010 FITException
+	prepare_transfer_to_handler
+	bl	unknown_exception
+	b	interrupt_return
 
 	/* Watchdog Timer (WDT) Exception. (from 0x1020) */
+	__HEAD
 WDTException:
-	CRITICAL_EXCEPTION_PROLOG;
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
-	                  (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
-			  crit_transfer_to_handler, ret_from_crit_exc)
+	CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException
+	prepare_transfer_to_handler
+	bl	WatchdogException
+	b	ret_from_crit_exc
 
 /* Other PowerPC processors, namely those derived from the 6xx-series
  * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
@@ -510,6 +533,7 @@ WDTException:
  * reserved.
  */
 
+	__HEAD
 	/* Damn, I came up one instruction too many to fit into the
 	 * exception space :-).  Both the instruction and data TLB
 	 * miss get to this point to load the TLB.
@@ -543,13 +567,12 @@ finish_tlb_load:
 
 	/* Done...restore registers and get out of here.
 	*/
-	mfspr	r9, SPRN_SPRG_SCRATCH5
-	mtspr	SPRN_PID, r9
-	mtcr	r12
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
 	mfspr	r9, SPRN_SPRG_SCRATCH4
 	mfspr	r12, SPRN_SPRG_SCRATCH3
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
 	rfi			/* Should sync shadow TLBs */
 	b	.		/* prevent prefetch past rfi */
 
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 813fa305c33bd324ca68f898f4727efd69f96b7a..5c106ac3662603eee6581b8e739d2876e494be5e 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -263,8 +263,7 @@ interrupt_base:
 	INSTRUCTION_STORAGE_EXCEPTION
 
 	/* External Input Interrupt */
-	EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
-		  do_IRQ, EXC_XFER_LITE)
+	EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ)
 
 	/* Alignment Interrupt */
 	ALIGNMENT_EXCEPTION
@@ -277,7 +276,7 @@ interrupt_base:
 	FP_UNAVAILABLE_EXCEPTION
 #else
 	EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
-		  FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
+		  FloatingPointUnavailable, unknown_exception)
 #endif
 	/* System Call Interrupt */
 	START_EXCEPTION(SystemCall)
@@ -285,15 +284,14 @@ interrupt_base:
 
 	/* Auxiliary Processor Unavailable Interrupt */
 	EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
-		  AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
+		  AuxillaryProcessorUnavailable, unknown_exception)
 
 	/* Decrementer Interrupt */
 	DECREMENTER_EXCEPTION
 
 	/* Fixed Internal Timer Interrupt */
 	/* TODO: Add FIT support */
-	EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
-		  unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception)
 
 	/* Watchdog Timer Interrupt */
 	/* TODO: Add watchdog support */
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 46dff3f9c31f16ce95ce8d43dd7319ad646f7886..7d445e4342c0c6eea1cd68eff063d4e276cf81ac 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -29,6 +29,13 @@
 #include <asm/ptrace.h>
 #include <asm/export.h>
 #include <asm/code-patching-asm.h>
+#include <asm/interrupt.h>
+
+/*
+ * Value for the bits that have fixed value in RPN entries.
+ * Also used for tagging DAR for DTLBerror.
+ */
+#define RPN_PATTERN	0x00f0
 
 #include "head_32.h"
 
@@ -42,12 +49,6 @@
 #endif
 .endm
 
-/*
- * Value for the bits that have fixed value in RPN entries.
- * Also used for tagging DAR for DTLBerror.
- */
-#define RPN_PATTERN	0x00f0
-
 #define PAGE_SHIFT_512K		19
 #define PAGE_SHIFT_8M		23
 
@@ -118,56 +119,54 @@ instruction_counter:
 #endif
 
 /* System reset */
-	EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
+	EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception)
 
 /* Machine check */
-	. = 0x200
-MachineCheck:
-	EXCEPTION_PROLOG handle_dar_dsisr=1
-	save_dar_dsisr_on_stack r4, r5, r11
-	li	r6, RPN_PATTERN
-	mtspr	SPRN_DAR, r6	/* Tag DAR, to be used in DTLB Error */
-	addi r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0x200, machine_check_exception)
+	START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+	EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	machine_check_exception
+	b	interrupt_return
 
 /* External interrupt */
-	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+	EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
 
 /* Alignment exception */
-	. = 0x600
-Alignment:
-	EXCEPTION_PROLOG handle_dar_dsisr=1
-	save_dar_dsisr_on_stack r4, r5, r11
-	li	r6, RPN_PATTERN
-	mtspr	SPRN_DAR, r6	/* Tag DAR, to be used in DTLB Error */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	b	.Lalignment_exception_ool
+	START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+	EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	alignment_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 /* Program check exception */
-	EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+	START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+	EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+	prepare_transfer_to_handler
+	bl	program_check_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 /* Decrementer */
-	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
-	/* With VMAP_STACK there's not enough room for this at 0x600 */
-	. = 0xa00
-.Lalignment_exception_ool:
-	EXC_XFER_STD(0x600, alignment_exception)
+	EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
 
 /* System call */
-	. = 0xc00
-SystemCall:
-	SYSCALL_ENTRY	0xc00
+	START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+	SYSCALL_ENTRY	INTERRUPT_SYSCALL
 
 /* Single step - not used on 601 */
-	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
+	EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
 
 /* On the MPC8xx, this is a software emulation interrupt.  It occurs
  * for all unimplemented and illegal instructions.
  */
-	EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD)
+	START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu)
+	EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu
+	prepare_transfer_to_handler
+	bl	emulation_assist_interrupt
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
-	. = 0x1100
 /*
  * For the MPC8xx, this is a software tablewalk to load the instruction
  * TLB.  The task switch loads the M_TWB register with the pointer to the first
@@ -189,7 +188,7 @@ SystemCall:
 #define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)
 #endif
 
-InstructionTLBMiss:
+	START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss)
 	mtspr	SPRN_SPRG_SCRATCH2, r10
 	mtspr	SPRN_M_TW, r11
 
@@ -245,8 +244,7 @@ InstructionTLBMiss:
 	rfi
 #endif
 
-	. = 0x1200
-DataStoreTLBMiss:
+	START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss)
 	mtspr	SPRN_SPRG_SCRATCH2, r10
 	mtspr	SPRN_M_TW, r11
 	mfcr	r11
@@ -309,83 +307,74 @@ DataStoreTLBMiss:
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
  */
-	. = 0x1300
-InstructionTLBError:
-	EXCEPTION_PROLOG
+	START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError)
+	/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+	EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError
 	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
 	andis.	r10,r9,SRR1_ISI_NOPT@h
 	beq+	.Litlbie
 	tlbie	r12
-	/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
 .Litlbie:
 	stw	r12, _DAR(r11)
 	stw	r5, _DSISR(r11)
-	EXC_XFER_LITE(0x400, handle_page_fault)
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
  * many reasons, including a dirty update to a pte.  We bail out to
  * a higher level function that can handle it.
  */
-	. = 0x1400
-DataTLBError:
+	START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError)
 	EXCEPTION_PROLOG_0 handle_dar_dsisr=1
 	mfspr	r11, SPRN_DAR
 	cmpwi	cr1, r11, RPN_PATTERN
 	beq-	cr1, FixupDAR	/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
-#ifdef CONFIG_VMAP_STACK
-	li	r11, RPN_PATTERN
-	mtspr	SPRN_DAR, r11	/* Tag DAR, to be used in DTLB Error */
-#endif
 	EXCEPTION_PROLOG_1
-	EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-	get_and_save_dar_dsisr_on_stack r4, r5, r11
+	/* 0x300 is DataAccess exception, needed by bad_page_fault() */
+	EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1
+	lwz	r4, _DAR(r11)
+	lwz	r5, _DSISR(r11)
 	andis.	r10,r5,DSISR_NOHPTE@h
 	beq+	.Ldtlbie
 	tlbie	r4
 .Ldtlbie:
-#ifndef CONFIG_VMAP_STACK
-	li	r10,RPN_PATTERN
-	mtspr	SPRN_DAR,r10	/* Tag DAR, to be used in DTLB Error */
-#endif
-	/* 0x300 is DataAccess exception, needed by bad_page_fault() */
-	EXC_XFER_LITE(0x300, handle_page_fault)
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
 
-stack_overflow:
+#ifdef CONFIG_VMAP_STACK
 	vmap_stack_overflow_exception
+#endif
 
 /* On the MPC8xx, these next four traps are used for development
  * support of breakpoints and such.  Someday I will get around to
  * using them.
  */
-do_databreakpoint:
-	EXCEPTION_PROLOG_1
-	EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	mfspr	r4,SPRN_BAR
-	stw	r4,_DAR(r11)
-#ifndef CONFIG_VMAP_STACK
-	mfspr	r5,SPRN_DSISR
-	stw	r5,_DSISR(r11)
-#endif
-	EXC_XFER_STD(0x1c00, do_break)
-
-	. = 0x1c00
-DataBreakpoint:
+	START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint)
 	EXCEPTION_PROLOG_0 handle_dar_dsisr=1
 	mfspr	r11, SPRN_SRR0
 	cmplwi	cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
 	cmplwi	cr7, r11, (.Litlbie - PAGE_OFFSET)@l
 	cror	4*cr1+eq, 4*cr1+eq, 4*cr7+eq
-	bne	cr1, do_databreakpoint
+	bne	cr1, 1f
 	mtcr	r10
 	mfspr	r10, SPRN_SPRG_SCRATCH0
 	mfspr	r11, SPRN_SPRG_SCRATCH1
 	rfi
 
+1:	EXCEPTION_PROLOG_1
+	EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1
+	mfspr	r4,SPRN_BAR
+	stw	r4,_DAR(r11)
+	prepare_transfer_to_handler
+	bl	do_break
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
 #ifdef CONFIG_PERF_EVENTS
-	. = 0x1d00
-InstructionBreakpoint:
+	START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint)
 	mtspr	SPRN_SPRG_SCRATCH0, r10
 	lwz	r10, (instruction_counter - PAGE_OFFSET)@l(0)
 	addi	r10, r10, -1
@@ -396,11 +385,12 @@ InstructionBreakpoint:
 	mfspr	r10, SPRN_SPRG_SCRATCH0
 	rfi
 #else
-	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception)
 #endif
-	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+	EXCEPTION(0x1f00, Trap_1f, unknown_exception)
 
+	__HEAD
 	. = 0x2000
 
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
@@ -510,14 +500,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
 152:
 	mfdar	r11
 	mtctr	r11			/* restore ctr reg from DAR */
-#ifdef CONFIG_VMAP_STACK
 	mfspr	r11, SPRN_SPRG_THREAD
 	stw	r10, DAR(r11)
 	mfspr	r10, SPRN_DSISR
 	stw	r10, DSISR(r11)
-#else
-	mtdar	r10			/* save fault EA to DAR */
-#endif
 	mfspr	r10,SPRN_M_TW
 	b	DARFixed		/* Go back to normal TLB handling */
 
@@ -819,7 +805,7 @@ EXPORT_SYMBOL(empty_zero_page)
 swapper_pg_dir:
 	.space	PGD_TABLE_SIZE
 
-/* Room for two PTE table poiners, usually the kernel and current user
+/* Room for two PTE table pointers, usually the kernel and current user
  * pointer to their respective root page table (pgdir).
  */
 	.globl	abatron_pteptrs
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 565e84e20a7214c5485fa143b33755e22ab0542a..065178f19a3d62b0445ae38bd9c98bab0365cf4d 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -31,6 +31,7 @@
 #include <asm/kvm_book3s_asm.h>
 #include <asm/export.h>
 #include <asm/feature-fixups.h>
+#include <asm/interrupt.h>
 
 #include "head_32.h"
 
@@ -239,7 +240,7 @@ __secondary_hold_acknowledge:
 /* System reset */
 /* core99 pmac starts the seconary here by changing the vector, and
    putting it back to what it was (unknown_async_exception) when done.  */
-	EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD)
+	EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception)
 
 /* Machine check */
 /*
@@ -255,40 +256,28 @@ __secondary_hold_acknowledge:
  * pointer when we take an exception from supervisor mode.)
  *	-- paulus.
  */
-	. = 0x200
-	DO_KVM  0x200
-MachineCheck:
+	START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
 	EXCEPTION_PROLOG_0
 #ifdef CONFIG_PPC_CHRP
-#ifdef CONFIG_VMAP_STACK
 	mtspr	SPRN_SPRG_SCRATCH2,r1
 	mfspr	r1, SPRN_SPRG_THREAD
 	lwz	r1, RTAS_SP(r1)
 	cmpwi	cr1, r1, 0
 	bne	cr1, 7f
 	mfspr	r1, SPRN_SPRG_SCRATCH2
-#else
-	mfspr	r11, SPRN_SPRG_THREAD
-	lwz	r11, RTAS_SP(r11)
-	cmpwi	cr1, r11, 0
-	bne	cr1, 7f
-#endif
 #endif /* CONFIG_PPC_CHRP */
-	EXCEPTION_PROLOG_1 for_rtas=1
-7:	EXCEPTION_PROLOG_2
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXCEPTION_PROLOG_1
+7:	EXCEPTION_PROLOG_2 0x200 MachineCheck
 #ifdef CONFIG_PPC_CHRP
-	beq	cr1, machine_check_tramp
+	beq	cr1, 1f
 	twi	31, 0, 0
-#else
-	b	machine_check_tramp
 #endif
+1:	prepare_transfer_to_handler
+	bl	machine_check_exception
+	b	interrupt_return
 
 /* Data access exception. */
-	. = 0x300
-	DO_KVM  0x300
-DataAccess:
-#ifdef CONFIG_VMAP_STACK
+	START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess)
 #ifdef CONFIG_PPC_BOOK3S_604
 BEGIN_MMU_FTR_SECTION
 	mtspr	SPRN_SPRG_SCRATCH2,r10
@@ -309,30 +298,20 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
 #endif
 1:	EXCEPTION_PROLOG_0 handle_dar_dsisr=1
 	EXCEPTION_PROLOG_1
-	b	handle_page_fault_tramp_1
-#else	/* CONFIG_VMAP_STACK */
-	EXCEPTION_PROLOG handle_dar_dsisr=1
-	get_and_save_dar_dsisr_on_stack	r4, r5, r11
-#ifdef CONFIG_PPC_BOOK3S_604
-BEGIN_MMU_FTR_SECTION
-	andis.	r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
-	bne	handle_page_fault_tramp_2	/* if not, try to put a PTE */
-	rlwinm	r3, r5, 32 - 15, 21, 21		/* DSISR_STORE -> _PAGE_RW */
-	bl	hash_page
-	b	handle_page_fault_tramp_1
-MMU_FTR_SECTION_ELSE
-#endif
-	b	handle_page_fault_tramp_2
-#ifdef CONFIG_PPC_BOOK3S_604
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
-#endif	/* CONFIG_VMAP_STACK */
+	EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	lwz	r5, _DSISR(r11)
+	andis.	r0, r5, DSISR_DABRMATCH@h
+	bne-	1f
+	bl	do_page_fault
+	b	interrupt_return
+1:	bl	do_break
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
 
 /* Instruction access exception. */
-	. = 0x400
-	DO_KVM  0x400
-InstructionAccess:
-#ifdef CONFIG_VMAP_STACK
+	START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess)
 	mtspr	SPRN_SPRG_SCRATCH0,r10
 	mtspr	SPRN_SPRG_SCRATCH1,r11
 	mfspr	r10, SPRN_SPRG_THREAD
@@ -352,43 +331,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	andi.	r11, r11, MSR_PR
 
 	EXCEPTION_PROLOG_1
-	EXCEPTION_PROLOG_2
-#else	/* CONFIG_VMAP_STACK */
-	EXCEPTION_PROLOG
-	andis.	r0,r9,SRR1_ISI_NOPT@h	/* no pte found? */
-	beq	1f			/* if so, try to put a PTE */
-	li	r3,0			/* into the hash table */
-	mr	r4,r12			/* SRR0 is fault address */
-#ifdef CONFIG_PPC_BOOK3S_604
-BEGIN_MMU_FTR_SECTION
-	bl	hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
-#endif	/* CONFIG_VMAP_STACK */
+	EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess
 	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
 	stw	r5, _DSISR(r11)
 	stw	r12, _DAR(r11)
-	EXC_XFER_LITE(0x400, handle_page_fault)
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
 
 /* External interrupt */
-	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+	EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
 
 /* Alignment exception */
-	. = 0x600
-	DO_KVM  0x600
-Alignment:
-	EXCEPTION_PROLOG handle_dar_dsisr=1
-	save_dar_dsisr_on_stack r4, r5, r11
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	b	alignment_exception_tramp
+	START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+	EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	alignment_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 /* Program check exception */
-	EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+	START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+	EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+	prepare_transfer_to_handler
+	bl	program_check_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 /* Floating-point unavailable */
-	. = 0x800
-	DO_KVM  0x800
-FPUnavailable:
+	START_EXCEPTION(0x800, FPUnavailable)
 #ifdef CONFIG_PPC_FPU
 BEGIN_FTR_SECTION
 /*
@@ -397,30 +368,29 @@ BEGIN_FTR_SECTION
  */
 	b 	ProgramCheck
 END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
-	EXCEPTION_PROLOG
+	EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable
 	beq	1f
 	bl	load_up_fpu		/* if from user, just load it up */
 	b	fast_exception_return
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
+1:	prepare_transfer_to_handler
+	bl	kernel_fp_unavailable_exception
+	b	interrupt_return
 #else
 	b 	ProgramCheck
 #endif
 
 /* Decrementer */
-	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+	EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
 
-	EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0xa00, Trap_0a, unknown_exception)
+	EXCEPTION(0xb00, Trap_0b, unknown_exception)
 
 /* System call */
-	. = 0xc00
-	DO_KVM  0xc00
-SystemCall:
-	SYSCALL_ENTRY	0xc00
+	START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+	SYSCALL_ENTRY	INTERRUPT_SYSCALL
 
-	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
-	EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
+	EXCEPTION(0xe00, Trap_0e, unknown_exception)
 
 /*
  * The Altivec unavailable trap is at 0x0f20.  Foo.
@@ -430,19 +400,18 @@ SystemCall:
  * non-altivec kernel running on a machine with altivec just
  * by executing an altivec instruction.
  */
-	. = 0xf00
-	DO_KVM  0xf00
+	START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap)
 	b	PerformanceMonitor
 
-	. = 0xf20
-	DO_KVM  0xf20
+	START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap)
 	b	AltiVecUnavailable
 
+	__HEAD
 /*
  * Handle TLB miss for instruction on 603/603e.
  * Note: we get an alternate set of r0 - r3 to use automatically.
  */
-	. = 0x1000
+	. = INTERRUPT_INST_TLB_MISS_603
 InstructionTLBMiss:
 /*
  * r0:	scratch
@@ -508,7 +477,7 @@ InstructionAddressInvalid:
 /*
  * Handle TLB miss for DATA Load operation on 603/603e
  */
-	. = 0x1100
+	. = INTERRUPT_DATA_LOAD_TLB_MISS_603
 DataLoadTLBMiss:
 /*
  * r0:	scratch
@@ -586,7 +555,7 @@ DataAddressInvalid:
 /*
  * Handle TLB miss for DATA Store on 603/603e
  */
-	. = 0x1200
+	. = INTERRUPT_DATA_STORE_TLB_MISS_603
 DataStoreTLBMiss:
 /*
  * r0:	scratch
@@ -650,57 +619,39 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
 #define TAUException	unknown_async_exception
 #endif
 
-	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
-	EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
-	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
-	EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
-	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
-	EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception)
+	EXCEPTION(0x1400, SMI, SMIException)
+	EXCEPTION(0x1500, Trap_15, unknown_exception)
+	EXCEPTION(0x1600, Trap_16, altivec_assist_exception)
+	EXCEPTION(0x1700, Trap_17, TAUException)
+	EXCEPTION(0x1800, Trap_18, unknown_exception)
+	EXCEPTION(0x1900, Trap_19, unknown_exception)
+	EXCEPTION(0x1a00, Trap_1a, unknown_exception)
+	EXCEPTION(0x1b00, Trap_1b, unknown_exception)
+	EXCEPTION(0x1c00, Trap_1c, unknown_exception)
+	EXCEPTION(0x1d00, Trap_1d, unknown_exception)
+	EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+	EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+	EXCEPTION(0x2000, RunMode, RunModeException)
+	EXCEPTION(0x2100, Trap_21, unknown_exception)
+	EXCEPTION(0x2200, Trap_22, unknown_exception)
+	EXCEPTION(0x2300, Trap_23, unknown_exception)
+	EXCEPTION(0x2400, Trap_24, unknown_exception)
+	EXCEPTION(0x2500, Trap_25, unknown_exception)
+	EXCEPTION(0x2600, Trap_26, unknown_exception)
+	EXCEPTION(0x2700, Trap_27, unknown_exception)
+	EXCEPTION(0x2800, Trap_28, unknown_exception)
+	EXCEPTION(0x2900, Trap_29, unknown_exception)
+	EXCEPTION(0x2a00, Trap_2a, unknown_exception)
+	EXCEPTION(0x2b00, Trap_2b, unknown_exception)
+	EXCEPTION(0x2c00, Trap_2c, unknown_exception)
+	EXCEPTION(0x2d00, Trap_2d, unknown_exception)
+	EXCEPTION(0x2e00, Trap_2e, unknown_exception)
+	EXCEPTION(0x2f00, Trap_2f, unknown_exception)
 
+	__HEAD
 	. = 0x3000
 
-machine_check_tramp:
-	EXC_XFER_STD(0x200, machine_check_exception)
-
-alignment_exception_tramp:
-	EXC_XFER_STD(0x600, alignment_exception)
-
-handle_page_fault_tramp_1:
-#ifdef CONFIG_VMAP_STACK
-	EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-#endif
-	lwz	r5, _DSISR(r11)
-	/* fall through */
-handle_page_fault_tramp_2:
-	andis.	r0, r5, DSISR_DABRMATCH@h
-	bne-	1f
-	EXC_XFER_LITE(0x300, handle_page_fault)
-1:	EXC_XFER_STD(0x300, do_break)
-
-#ifdef CONFIG_VMAP_STACK
 #ifdef CONFIG_PPC_BOOK3S_604
 .macro save_regs_thread		thread
 	stw	r0, THR0(\thread)
@@ -775,26 +726,31 @@ fast_hash_page_return:
 	rfi
 #endif /* CONFIG_PPC_BOOK3S_604 */
 
-stack_overflow:
+#ifdef CONFIG_VMAP_STACK
 	vmap_stack_overflow_exception
 #endif
 
+	__HEAD
 AltiVecUnavailable:
-	EXCEPTION_PROLOG
+	EXCEPTION_PROLOG 0xf20 AltiVecUnavailable
 #ifdef CONFIG_ALTIVEC
 	beq	1f
 	bl	load_up_altivec		/* if from user, just load it up */
 	b	fast_exception_return
 #endif /* CONFIG_ALTIVEC */
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
+1:	prepare_transfer_to_handler
+	bl	altivec_unavailable_exception
+	b	interrupt_return
 
+	__HEAD
 PerformanceMonitor:
-	EXCEPTION_PROLOG
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0xf00, performance_monitor_exception)
+	EXCEPTION_PROLOG 0xf00 PerformanceMonitor
+	prepare_transfer_to_handler
+	bl	performance_monitor_exception
+	b	interrupt_return
 
 
+	__HEAD
 /*
  * This code is jumped to from the startup code to copy
  * the kernel image to physical address PHYSICAL_START.
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 47857795f50a64c4501406cb344cfb96bacba6dc..f8247009169703ee0c31ba03acbdd3d6a333902d 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -44,7 +44,7 @@ END_BTB_FLUSH_SECTION
 #endif
 
 
-#define NORMAL_EXCEPTION_PROLOG(intno)						     \
+#define NORMAL_EXCEPTION_PROLOG(trapno, intno)						     \
 	mtspr	SPRN_SPRG_WSCRATCH0, r10;	/* save one register */	     \
 	mfspr	r10, SPRN_SPRG_THREAD;					     \
 	stw	r11, THREAD_NORMSAVE(0)(r10);				     \
@@ -53,6 +53,8 @@ END_BTB_FLUSH_SECTION
 	mfspr	r11, SPRN_SRR1;		                                     \
 	DO_KVM	BOOKE_INTERRUPT_##intno SPRN_SRR1;			     \
 	andi.	r11, r11, MSR_PR;	/* check whether user or kernel    */\
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL);				\
+	mtmsr	r11;							\
 	mr	r11, r1;						     \
 	beq	1f;							     \
 	BOOKE_CLEAR_BTB(r11)						\
@@ -76,12 +78,39 @@ END_BTB_FLUSH_SECTION
 	stw	r1, 0(r11);						     \
 	mr	r1, r11;						     \
 	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
-	stw	r0,GPR0(r11);						     \
-	lis	r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
-	addi	r10, r10, STACK_FRAME_REGS_MARKER@l;			     \
-	stw	r10, 8(r11);						     \
-	SAVE_4GPRS(3, r11);						     \
-	SAVE_2GPRS(7, r11)
+	COMMON_EXCEPTION_PROLOG_END trapno
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+	stw	r0,GPR0(r1)
+	lis	r10, STACK_FRAME_REGS_MARKER@ha	/* exception frame marker */
+	addi	r10, r10, STACK_FRAME_REGS_MARKER@l
+	stw	r10, 8(r1)
+	li	r10, \trapno
+	stw	r10,_TRAP(r1)
+	SAVE_4GPRS(3, r1)
+	SAVE_2GPRS(7, r1)
+	SAVE_NVGPRS(r1)
+	stw	r2,GPR2(r1)
+	stw	r12,_NIP(r1)
+	stw	r9,_MSR(r1)
+	mfctr	r10
+	mfspr	r2,SPRN_SPRG_THREAD
+	stw	r10,_CTR(r1)
+	tovirt(r2, r2)
+	mfspr	r10,SPRN_XER
+	addi	r2, r2, -THREAD
+	stw	r10,_XER(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_E500
+	andi.	r12,r9,MSR_PR
+	bne	777f
+	bl	prepare_transfer_to_handler
+777:
+#endif
+.endm
 
 .macro SYSCALL_ENTRY trapno intno srr1
 	mfspr	r10, SPRN_SPRG_THREAD
@@ -180,7 +209,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
  * registers as the normal prolog above. Instead we use a portion of the
  * critical/machine check exception stack at low physical addresses.
  */
-#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \
 	mtspr	SPRN_SPRG_WSCRATCH_##exc_level,r8;			     \
 	BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
 	stw	r9,GPR9(r8);		/* save various registers	   */\
@@ -192,6 +221,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 	DO_KVM	BOOKE_INTERRUPT_##intno exc_level_srr1;		             \
 	BOOKE_CLEAR_BTB(r10)						\
 	andi.	r11,r11,MSR_PR;						     \
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE));	\
+	mtmsr	r11;							\
 	mfspr	r11,SPRN_SPRG_THREAD;	/* if from user, start at top of   */\
 	lwz	r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
 	addi	r11,r11,EXC_LVL_FRAME_OVERHEAD;	/* allocate stack frame    */\
@@ -221,16 +252,44 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 	stw	r1,0(r11);						     \
 	mr	r1,r11;							     \
 	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
-	stw	r0,GPR0(r11);						     \
-	SAVE_4GPRS(3, r11);						     \
-	SAVE_2GPRS(7, r11)
+	COMMON_EXCEPTION_PROLOG_END trapno
+
+#define SAVE_xSRR(xSRR)			\
+	mfspr	r0,SPRN_##xSRR##0;	\
+	stw	r0,_##xSRR##0(r1);	\
+	mfspr	r0,SPRN_##xSRR##1;	\
+	stw	r0,_##xSRR##1(r1)
+
+
+.macro SAVE_MMU_REGS
+#ifdef CONFIG_PPC_BOOK3E_MMU
+	mfspr	r0,SPRN_MAS0
+	stw	r0,MAS0(r1)
+	mfspr	r0,SPRN_MAS1
+	stw	r0,MAS1(r1)
+	mfspr	r0,SPRN_MAS2
+	stw	r0,MAS2(r1)
+	mfspr	r0,SPRN_MAS3
+	stw	r0,MAS3(r1)
+	mfspr	r0,SPRN_MAS6
+	stw	r0,MAS6(r1)
+#ifdef CONFIG_PHYS_64BIT
+	mfspr	r0,SPRN_MAS7
+	stw	r0,MAS7(r1)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+#ifdef CONFIG_44x
+	mfspr	r0,SPRN_MMUCR
+	stw	r0,MMUCR(r1)
+#endif
+.endm
 
-#define CRITICAL_EXCEPTION_PROLOG(intno) \
-		EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
-#define DEBUG_EXCEPTION_PROLOG \
-		EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
-#define MCHECK_EXCEPTION_PROLOG \
-		EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
+#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \
+		EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG(trapno) \
+		EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG(trapno) \
+		EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \
 			SPRN_MCSRR0, SPRN_MCSRR1)
 
 /*
@@ -257,44 +316,34 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
         .align 5;              						     \
 label:
 
-#define EXCEPTION(n, intno, label, hdlr, xfer)			\
+#define EXCEPTION(n, intno, label, hdlr)			\
 	START_EXCEPTION(label);					\
-	NORMAL_EXCEPTION_PROLOG(intno);				\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
-	xfer(n, hdlr)
+	NORMAL_EXCEPTION_PROLOG(n, intno);			\
+	prepare_transfer_to_handler;				\
+	bl	hdlr;						\
+	b	interrupt_return
 
 #define CRITICAL_EXCEPTION(n, intno, label, hdlr)			\
 	START_EXCEPTION(label);						\
-	CRITICAL_EXCEPTION_PROLOG(intno);				\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
-	EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-			  crit_transfer_to_handler, ret_from_crit_exc)
+	CRITICAL_EXCEPTION_PROLOG(n, intno);				\
+	SAVE_MMU_REGS;							\
+	SAVE_xSRR(SRR);							\
+	prepare_transfer_to_handler;					\
+	bl	hdlr;							\
+	b	ret_from_crit_exc
 
 #define MCHECK_EXCEPTION(n, label, hdlr)			\
 	START_EXCEPTION(label);					\
-	MCHECK_EXCEPTION_PROLOG;				\
+	MCHECK_EXCEPTION_PROLOG(n);				\
 	mfspr	r5,SPRN_ESR;					\
 	stw	r5,_ESR(r11);					\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
-	EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-			  mcheck_transfer_to_handler, ret_from_mcheck_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)	\
-	li	r10,trap;					\
-	stw	r10,_TRAP(r11);					\
-	lis	r10,msr@h;					\
-	ori	r10,r10,msr@l;					\
-	bl	tfer;		 				\
-	.long	hdlr;						\
-	.long	ret
-
-#define EXC_XFER_STD(n, hdlr)		\
-	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
-			  ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr)		\
-	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
-			  ret_from_except)
+	SAVE_xSRR(DSRR);					\
+	SAVE_xSRR(CSRR);					\
+	SAVE_MMU_REGS;						\
+	SAVE_xSRR(SRR);						\
+	prepare_transfer_to_handler;				\
+	bl	hdlr;						\
+	b	ret_from_mcheck_exc
 
 /* Check for a single step debug exception while in an exception
  * handler before state has been saved.  This is to catch the case
@@ -311,7 +360,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
  */
 #define DEBUG_DEBUG_EXCEPTION						      \
 	START_EXCEPTION(DebugDebug);					      \
-	DEBUG_EXCEPTION_PROLOG;						      \
+	DEBUG_EXCEPTION_PROLOG(2000);						      \
 									      \
 	/*								      \
 	 * If there is a single step or branch-taken exception in an	      \
@@ -360,12 +409,16 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 	/* continue normal handling for a debug exception... */		      \
 2:	mfspr	r4,SPRN_DBSR;						      \
 	stw	r4,_ESR(r11);		/* DebugException takes DBSR in _ESR */\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
-	EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc)
+	SAVE_xSRR(CSRR);						      \
+	SAVE_MMU_REGS;							      \
+	SAVE_xSRR(SRR);							      \
+	prepare_transfer_to_handler;				      \
+	bl	DebugException;						      \
+	b	ret_from_debug_exc
 
 #define DEBUG_CRIT_EXCEPTION						      \
 	START_EXCEPTION(DebugCrit);					      \
-	CRITICAL_EXCEPTION_PROLOG(DEBUG);				      \
+	CRITICAL_EXCEPTION_PROLOG(2000,DEBUG);				      \
 									      \
 	/*								      \
 	 * If there is a single step or branch-taken exception in an	      \
@@ -414,58 +467,71 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 	/* continue normal handling for a critical exception... */	      \
 2:	mfspr	r4,SPRN_DBSR;						      \
 	stw	r4,_ESR(r11);		/* DebugException takes DBSR in _ESR */\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
-	EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc)
+	SAVE_MMU_REGS;							      \
+	SAVE_xSRR(SRR);							      \
+	prepare_transfer_to_handler;					      \
+	bl	DebugException;						      \
+	b	ret_from_crit_exc
 
 #define DATA_STORAGE_EXCEPTION						      \
 	START_EXCEPTION(DataStorage)					      \
-	NORMAL_EXCEPTION_PROLOG(DATA_STORAGE);		      \
+	NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE);		      \
 	mfspr	r5,SPRN_ESR;		/* Grab the ESR and save it */	      \
 	stw	r5,_ESR(r11);						      \
 	mfspr	r4,SPRN_DEAR;		/* Grab the DEAR */		      \
 	stw	r4, _DEAR(r11);						      \
-	EXC_XFER_LITE(0x0300, handle_page_fault)
+	prepare_transfer_to_handler;					      \
+	bl	do_page_fault;						      \
+	b	interrupt_return
 
 #define INSTRUCTION_STORAGE_EXCEPTION					      \
 	START_EXCEPTION(InstructionStorage)				      \
-	NORMAL_EXCEPTION_PROLOG(INST_STORAGE);		      \
+	NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE);		      \
 	mfspr	r5,SPRN_ESR;		/* Grab the ESR and save it */	      \
 	stw	r5,_ESR(r11);						      \
 	stw	r12, _DEAR(r11);	/* Pass SRR0 as arg2 */		      \
-	EXC_XFER_LITE(0x0400, handle_page_fault)
+	prepare_transfer_to_handler;					      \
+	bl	do_page_fault;						      \
+	b	interrupt_return
 
 #define ALIGNMENT_EXCEPTION						      \
 	START_EXCEPTION(Alignment)					      \
-	NORMAL_EXCEPTION_PROLOG(ALIGNMENT);		      \
+	NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT);		      \
 	mfspr   r4,SPRN_DEAR;           /* Grab the DEAR and save it */	      \
 	stw     r4,_DEAR(r11);						      \
-	addi    r3,r1,STACK_FRAME_OVERHEAD;				      \
-	EXC_XFER_STD(0x0600, alignment_exception)
+	prepare_transfer_to_handler;					      \
+	bl	alignment_exception;					      \
+	REST_NVGPRS(r1);						      \
+	b	interrupt_return
 
 #define PROGRAM_EXCEPTION						      \
 	START_EXCEPTION(Program)					      \
-	NORMAL_EXCEPTION_PROLOG(PROGRAM);		      \
+	NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM);		      \
 	mfspr	r4,SPRN_ESR;		/* Grab the ESR and save it */	      \
 	stw	r4,_ESR(r11);						      \
-	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
-	EXC_XFER_STD(0x0700, program_check_exception)
+	prepare_transfer_to_handler;					      \
+	bl	program_check_exception;				      \
+	REST_NVGPRS(r1);						      \
+	b	interrupt_return
 
 #define DECREMENTER_EXCEPTION						      \
 	START_EXCEPTION(Decrementer)					      \
-	NORMAL_EXCEPTION_PROLOG(DECREMENTER);		      \
+	NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER);		      \
 	lis     r0,TSR_DIS@h;           /* Setup the DEC interrupt mask */    \
 	mtspr   SPRN_TSR,r0;		/* Clear the DEC interrupt */	      \
-	addi    r3,r1,STACK_FRAME_OVERHEAD;				      \
-	EXC_XFER_LITE(0x0900, timer_interrupt)
+	prepare_transfer_to_handler;					      \
+	bl	timer_interrupt;					      \
+	b	interrupt_return
 
 #define FP_UNAVAILABLE_EXCEPTION					      \
 	START_EXCEPTION(FloatingPointUnavailable)			      \
-	NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL);		      \
+	NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL);		      \
 	beq	1f;							      \
 	bl	load_up_fpu;		/* if from user, just load it up */   \
 	b	fast_exception_return;					      \
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD;				      \
-	EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
+1:	prepare_transfer_to_handler;					      \
+	bl	kernel_fp_unavailable_exception;			      \
+	b	interrupt_return
 
 #else /* __ASSEMBLY__ */
 struct exception_regs {
@@ -481,7 +547,6 @@ struct exception_regs {
 	unsigned long csrr1;
 	unsigned long dsrr0;
 	unsigned long dsrr1;
-	unsigned long saved_ksp_limit;
 };
 
 /* ensure this structure is always sized to a multiple of the stack alignment */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 3f4a40cccef5df87e6deeabc82dffa9d7a39201e..a1a5c3f10dc4245d16318f8ee1dc13514d162e29 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -113,7 +113,7 @@ _ENTRY(_start);
 
 1:
 	/*
-	 * We have the runtime (virutal) address of our base.
+	 * We have the runtime (virtual) address of our base.
 	 * We calculate our shift of offset from a 64M page.
 	 * We could map the 64M page we belong to at PAGE_OFFSET and
 	 * get going from there.
@@ -363,23 +363,26 @@ interrupt_base:
 
 	/* Data Storage Interrupt */
 	START_EXCEPTION(DataStorage)
-	NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
+	NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE)
 	mfspr	r5,SPRN_ESR		/* Grab the ESR, save it */
 	stw	r5,_ESR(r11)
 	mfspr	r4,SPRN_DEAR		/* Grab the DEAR, save it */
 	stw	r4, _DEAR(r11)
 	andis.	r10,r5,(ESR_ILK|ESR_DLK)@h
 	bne	1f
-	EXC_XFER_LITE(0x0300, handle_page_fault)
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
 1:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x0300, CacheLockingException)
+	prepare_transfer_to_handler
+	bl	CacheLockingException
+	b	interrupt_return
 
 	/* Instruction Storage Interrupt */
 	INSTRUCTION_STORAGE_EXCEPTION
 
 	/* External Input Interrupt */
-	EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
+	EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ)
 
 	/* Alignment Interrupt */
 	ALIGNMENT_EXCEPTION
@@ -391,8 +394,7 @@ interrupt_base:
 #ifdef CONFIG_PPC_FPU
 	FP_UNAVAILABLE_EXCEPTION
 #else
-	EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
-		  unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception)
 #endif
 
 	/* System Call Interrupt */
@@ -400,16 +402,14 @@ interrupt_base:
 	SYSCALL_ENTRY   0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
 
 	/* Auxiliary Processor Unavailable Interrupt */
-	EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
-		  unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception)
 
 	/* Decrementer Interrupt */
 	DECREMENTER_EXCEPTION
 
 	/* Fixed Internal Timer Interrupt */
 	/* TODO: Add FIT support */
-	EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
-		  unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception)
 
 	/* Watchdog Timer Interrupt */
 #ifdef CONFIG_BOOKE_WDT
@@ -497,7 +497,7 @@ END_BTB_FLUSH_SECTION
 #endif
 #endif
 
-	bne	2f			/* Bail if permission/valid mismach */
+	bne	2f			/* Bail if permission/valid mismatch */
 
 	/* Jump to common tlb load */
 	b	finish_tlb_load
@@ -592,7 +592,7 @@ END_BTB_FLUSH_SECTION
 #endif
 #endif
 
-	bne	2f			/* Bail if permission mismach */
+	bne	2f			/* Bail if permission mismatch */
 
 	/* Jump to common TLB load point */
 	b	finish_tlb_load
@@ -614,38 +614,44 @@ END_BTB_FLUSH_SECTION
 #ifdef CONFIG_SPE
 	/* SPE Unavailable */
 	START_EXCEPTION(SPEUnavailable)
-	NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
+	NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL)
 	beq	1f
 	bl	load_up_spe
 	b	fast_exception_return
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x2010, KernelSPE)
+1:	prepare_transfer_to_handler
+	bl	KernelSPE
+	b	interrupt_return
 #elif defined(CONFIG_SPE_POSSIBLE)
-	EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
-		  unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception)
 #endif /* CONFIG_SPE_POSSIBLE */
 
 	/* SPE Floating Point Data */
 #ifdef CONFIG_SPE
-	EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
-		  SPEFloatingPointException, EXC_XFER_STD)
+	START_EXCEPTION(SPEFloatingPointData)
+	NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA)
+	prepare_transfer_to_handler
+	bl	SPEFloatingPointException
+	REST_NVGPRS(r1)
+	b	interrupt_return
 
 	/* SPE Floating Point Round */
-	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
-		  SPEFloatingPointRoundException, EXC_XFER_STD)
+	START_EXCEPTION(SPEFloatingPointRound)
+	NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND)
+	prepare_transfer_to_handler
+	bl	SPEFloatingPointRoundException
+	REST_NVGPRS(r1)
+	b	interrupt_return
 #elif defined(CONFIG_SPE_POSSIBLE)
-	EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
-		  unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
-		  unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception)
+	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception)
 #endif /* CONFIG_SPE_POSSIBLE */
 
 
 	/* Performance Monitor */
 	EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
-		  performance_monitor_exception, EXC_XFER_STD)
+		  performance_monitor_exception)
 
-	EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
+	EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception)
 
 	CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
 			   CriticalDoorbell, unknown_exception)
@@ -660,10 +666,10 @@ END_BTB_FLUSH_SECTION
 			   unknown_exception)
 
 	/* Hypercall */
-	EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception)
 
 	/* Embedded Hypervisor Privilege */
-	EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
+	EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception)
 
 interrupt_end:
 
@@ -854,7 +860,7 @@ KernelSPE:
 	lwz	r5,_NIP(r1)
 	bl	printk
 #endif
-	b	ret_from_except
+	b	interrupt_return
 #ifdef CONFIG_PRINTK
 87:	.string	"SPE used in kernel  (task=%p, pc=%x)  \n"
 #endif
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
index 867ee4aa026ad7800ff3bfbd10b397d831fd8dbc..675d1f66ab7283206fc550ffdc574f75a334e10e 100644
--- a/arch/powerpc/kernel/hw_breakpoint_constraints.c
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -141,7 +141,7 @@ void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
 {
 	struct instruction_op op;
 
-	if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip))
+	if (__get_user_instr(*instr, (void __user *)regs->nip))
 		return;
 
 	analyse_instr(&op, regs, *instr);
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 69df840f72535f5a52aec84eb086f27f295b223e..13cad9297d8222fb5f37605f5798909095296991 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -145,9 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 
 /*
  * Return from NAP/DOZE mode, restore some CPU specific registers,
- * we are called with DR/IR still off and r2 containing physical
- * address of current.  R11 points to the exception frame (physical
- * address).  We have to preserve r10.
+ * R11 points to the exception frame. We have to preserve r10.
  */
 _GLOBAL(power_save_ppc32_restore)
 	lwz	r9,_LINK(r11)		/* interrupted in ppc6xx_idle: */
@@ -166,11 +164,7 @@ BEGIN_FTR_SECTION
 	mfspr	r9,SPRN_HID0
 	andis.	r9,r9,HID0_NAP@h
 	beq	1f
-#ifdef CONFIG_VMAP_STACK
 	addis	r9, r11, nap_save_msscr0@ha
-#else
-	addis	r9,r11,(nap_save_msscr0-KERNELBASE)@ha
-#endif
 	lwz	r9,nap_save_msscr0@l(r9)
 	mtspr	SPRN_MSSCR0, r9
 	sync
@@ -178,15 +172,11 @@ BEGIN_FTR_SECTION
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
 BEGIN_FTR_SECTION
-#ifdef CONFIG_VMAP_STACK
 	addis	r9, r11, nap_save_hid1@ha
-#else
-	addis	r9,r11,(nap_save_hid1-KERNELBASE)@ha
-#endif
 	lwz	r9,nap_save_hid1@l(r9)
 	mtspr	SPRN_HID1, r9
 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
-	b	transfer_to_handler_cont
+	blr
 _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
 
 	.data
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index f9e6d83e67207d3638555b14555a055c420a4cd9..abb719b21cae7214be8d98d5a229ba10d21ce924 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -209,4 +209,8 @@ _GLOBAL(power4_idle_nap)
 	mtmsrd	r7
 	isync
 	b	1b
+
+	.globl power4_idle_nap_return
+power4_idle_nap_return:
+	blr
 #endif
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 72c85b6f3898b409b9365ed765bf7fb3a0bf812f..9e1bc4502c507d5e329ceae631157fd16bc13717 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -74,20 +74,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 /*
  * Return from NAP/DOZE mode, restore some CPU specific registers,
- * r2 containing physical address of current.
- * r11 points to the exception frame (physical address).
+ * r2 containing address of current.
+ * r11 points to the exception frame.
  * We have to preserve r10.
  */
 _GLOBAL(power_save_ppc32_restore)
 	lwz	r9,_LINK(r11)		/* interrupted in e500_idle */
 	stw	r9,_NIP(r11)		/* make it do a blr */
-
-#ifdef CONFIG_SMP
-	lwz	r11,TASK_CPU(r2)		/* get cpu number * 4 */
-	slwi	r11,r11,2
-#else
-	li	r11,0
-#endif
-
-	b	transfer_to_handler_cont
+	blr
 _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index c475a229a42ac492298313b485684e8d541a4139..e4559f8914eb78920bc279430101351c0cdb1047 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -20,6 +20,10 @@
 #include <asm/time.h>
 #include <asm/unistd.h>
 
+#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
+unsigned long global_dbcr0[NR_CPUS];
+#endif
+
 typedef long (*syscall_fn)(long, long, long, long, long, long);
 
 /* Has to run notrace because it is entered not completely "reconciled" */
@@ -29,20 +33,24 @@ notrace long system_call_exception(long r3, long r4, long r5,
 {
 	syscall_fn f;
 
+	kuep_lock();
+#ifdef CONFIG_PPC32
+	kuap_save_and_lock(regs);
+#endif
+
 	regs->orig_gpr3 = r3;
 
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
 
+	trace_hardirqs_off(); /* finish reconciling */
+
 	CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
 	user_exit_irqoff();
 
-	trace_hardirqs_off(); /* finish reconciling */
-
 	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
 		BUG_ON(!(regs->msr & MSR_RI));
 	BUG_ON(!(regs->msr & MSR_PR));
-	BUG_ON(!FULL_REGS(regs));
 	BUG_ON(arch_irq_disabled_regs(regs));
 
 #ifdef CONFIG_PPC_PKEY
@@ -69,9 +77,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 			isync();
 	} else
 #endif
-#ifdef CONFIG_PPC64
-		kuap_check_amr();
-#endif
+		kuap_assert_locked();
 
 	booke_restore_dbcr0();
 
@@ -247,9 +253,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
 	CT_WARN_ON(ct_state() == CONTEXT_USER);
 
-#ifdef CONFIG_PPC64
-	kuap_check_amr();
-#endif
+	kuap_assert_locked();
 
 	regs->result = r3;
 
@@ -344,16 +348,13 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
 	account_cpu_user_exit();
 
-#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */
-	/*
-	 * We do this at the end so that we do context switch with KERNEL AMR
-	 */
+	/* Restore user access locks last */
 	kuap_user_restore(regs);
-#endif
+	kuep_unlock();
+
 	return ret;
 }
 
-#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
 {
 	unsigned long ti_flags;
@@ -363,7 +364,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
 	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
 		BUG_ON(!(regs->msr & MSR_RI));
 	BUG_ON(!(regs->msr & MSR_PR));
-	BUG_ON(!FULL_REGS(regs));
 	BUG_ON(arch_irq_disabled_regs(regs));
 	CT_WARN_ON(ct_state() == CONTEXT_USER);
 
@@ -371,9 +371,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
 	 * We don't need to restore AMR on the way back to userspace for KUAP.
 	 * AMR can only have been unlocked if we interrupted the kernel.
 	 */
-#ifdef CONFIG_PPC64
-	kuap_check_amr();
-#endif
+	kuap_assert_locked();
 
 	local_irq_save(flags);
 
@@ -392,7 +390,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
 		ti_flags = READ_ONCE(current_thread_info()->flags);
 	}
 
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
 		if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
 				unlikely((ti_flags & _TIF_RESTORE_TM))) {
 			restore_tm_state(regs);
@@ -427,12 +425,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
 
 	account_cpu_user_exit();
 
-	/*
-	 * We do this at the end so that we do context switch with KERNEL AMR
-	 */
-#ifdef CONFIG_PPC64
+	/* Restore user access locks last */
 	kuap_user_restore(regs);
-#endif
+
 	return ret;
 }
 
@@ -442,25 +437,20 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 {
 	unsigned long flags;
 	unsigned long ret = 0;
-#ifdef CONFIG_PPC64
-	unsigned long amr;
-#endif
+	unsigned long kuap;
 
 	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
 	    unlikely(!(regs->msr & MSR_RI)))
 		unrecoverable_exception(regs);
 	BUG_ON(regs->msr & MSR_PR);
-	BUG_ON(!FULL_REGS(regs));
 	/*
 	 * CT_WARN_ON comes here via program_check_exception,
 	 * so avoid recursion.
 	 */
-	if (TRAP(regs) != 0x700)
+	if (TRAP(regs) != INTERRUPT_PROGRAM)
 		CT_WARN_ON(ct_state() == CONTEXT_USER);
 
-#ifdef CONFIG_PPC64
-	amr = kuap_get_and_check_amr();
-#endif
+	kuap = kuap_get_and_assert_locked();
 
 	if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
 		clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
@@ -498,14 +488,11 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 #endif
 
 	/*
-	 * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr,
-	 * which would cause Read-After-Write stalls. Hence, we take the AMR
-	 * value from the check above.
+	 * 64s does not want to mfspr(SPRN_AMR) here, because this comes after
+	 * mtmsr, which would cause Read-After-Write stalls. Hence, take the
+	 * AMR value from the check above.
 	 */
-#ifdef CONFIG_PPC64
-	kuap_kernel_restore(regs, amr);
-#endif
+	kuap_kernel_restore(regs, kuap);
 
 	return ret;
 }
-#endif
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index c00214a4355c80490bf4347f0bd2f7b2da082f0f..57d6b85e9b964f1339f9656a893e8c3ba87ee545 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -72,8 +72,7 @@ static void iommu_debugfs_del(struct iommu_table *tbl)
 
 	sprintf(name, "%08lx", tbl->it_index);
 	liobn_entry = debugfs_lookup(name, iommu_debugfs_dir);
-	if (liobn_entry)
-		debugfs_remove(liobn_entry);
+	debugfs_remove(liobn_entry);
 }
 #else
 static void iommu_debugfs_add(struct iommu_table *tbl){}
@@ -297,6 +296,15 @@ static unsigned long iommu_range_alloc(struct device *dev,
 			pass++;
 			goto again;
 
+		} else if (pass == tbl->nr_pools + 1) {
+			/* Last resort: try largepool */
+			spin_unlock(&pool->lock);
+			pool = &tbl->large_pool;
+			spin_lock(&pool->lock);
+			pool->hint = pool->start;
+			pass++;
+			goto again;
+
 		} else {
 			/* Give up */
 			spin_unlock_irqrestore(&(pool->lock), flags);
@@ -719,7 +727,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
 {
 	unsigned long sz;
 	static int welcomed = 0;
-	struct page *page;
 	unsigned int i;
 	struct iommu_pool *p;
 
@@ -728,11 +735,11 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
 	/* number of bytes needed for the bitmap */
 	sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
 
-	page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
-	if (!page)
-		panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
-	tbl->it_map = page_address(page);
-	memset(tbl->it_map, 0, sz);
+	tbl->it_map = vzalloc_node(sz, nid);
+	if (!tbl->it_map) {
+		pr_err("%s: Can't allocate %ld bytes\n", __func__, sz);
+		return NULL;
+	}
 
 	iommu_table_reserve_pages(tbl, res_start, res_end);
 
@@ -774,8 +781,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
 
 static void iommu_table_free(struct kref *kref)
 {
-	unsigned long bitmap_sz;
-	unsigned int order;
 	struct iommu_table *tbl;
 
 	tbl = container_of(kref, struct iommu_table, it_kref);
@@ -796,12 +801,8 @@ static void iommu_table_free(struct kref *kref)
 	if (!bitmap_empty(tbl->it_map, tbl->it_size))
 		pr_warn("%s: Unexpected TCEs\n", __func__);
 
-	/* calculate bitmap size in bytes */
-	bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
-
 	/* free bitmap */
-	order = get_order(bitmap_sz);
-	free_pages((unsigned long) tbl->it_map, order);
+	vfree(tbl->it_map);
 
 	/* free table */
 	kfree(tbl);
@@ -897,6 +898,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 	unsigned int order;
 	unsigned int nio_pages, io_order;
 	struct page *page;
+	size_t size_io = size;
 
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
@@ -923,8 +925,9 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 	memset(ret, 0, size);
 
 	/* Set up tces to cover the allocated range */
-	nio_pages = size >> tbl->it_page_shift;
-	io_order = get_iommu_order(size, tbl);
+	size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
+	nio_pages = size_io >> tbl->it_page_shift;
+	io_order = get_iommu_order(size_io, tbl);
 	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
 			      mask >> tbl->it_page_shift, io_order, 0);
 	if (mapping == DMA_MAPPING_ERROR) {
@@ -939,10 +942,9 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
 	if (tbl) {
-		unsigned int nio_pages;
+		size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
+		unsigned int nio_pages = size_io >> tbl->it_page_shift;
 
-		size = PAGE_ALIGN(size);
-		nio_pages = size >> tbl->it_page_shift;
 		iommu_free(tbl, dma_handle, nio_pages);
 		size = PAGE_ALIGN(size);
 		free_pages((unsigned long)vaddr, get_order(size));
@@ -1096,7 +1098,7 @@ int iommu_take_ownership(struct iommu_table *tbl)
 
 	spin_lock_irqsave(&tbl->large_pool.lock, flags);
 	for (i = 0; i < tbl->nr_pools; i++)
-		spin_lock(&tbl->pools[i].lock);
+		spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
 
 	iommu_table_release_pages(tbl);
 
@@ -1124,7 +1126,7 @@ void iommu_release_ownership(struct iommu_table *tbl)
 
 	spin_lock_irqsave(&tbl->large_pool.lock, flags);
 	for (i = 0; i < tbl->nr_pools; i++)
-		spin_lock(&tbl->pools[i].lock);
+		spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
 
 	memset(tbl->it_map, 0, sz);
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index d71fd10a1dd46bb9daa025b10d9e51b738ae6f8e..72cb45393ef29568dee0bb339842b2c7dce2b52d 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -104,82 +104,6 @@ static inline notrace unsigned long get_irq_happened(void)
 	return happened;
 }
 
-#ifdef CONFIG_PPC_BOOK3E
-
-/* This is called whenever we are re-enabling interrupts
- * and returns either 0 (nothing to do) or 500/900/280 if
- * there's an EE, DEC or DBELL to generate.
- *
- * This is called in two contexts: From arch_local_irq_restore()
- * before soft-enabling interrupts, and from the exception exit
- * path when returning from an interrupt from a soft-disabled to
- * a soft enabled context. In both case we have interrupts hard
- * disabled.
- *
- * We take care of only clearing the bits we handled in the
- * PACA irq_happened field since we can only re-emit one at a
- * time and we don't want to "lose" one.
- */
-notrace unsigned int __check_irq_replay(void)
-{
-	/*
-	 * We use local_paca rather than get_paca() to avoid all
-	 * the debug_smp_processor_id() business in this low level
-	 * function
-	 */
-	unsigned char happened = local_paca->irq_happened;
-
-	/*
-	 * We are responding to the next interrupt, so interrupt-off
-	 * latencies should be reset here.
-	 */
-	trace_hardirqs_on();
-	trace_hardirqs_off();
-
-	if (happened & PACA_IRQ_DEC) {
-		local_paca->irq_happened &= ~PACA_IRQ_DEC;
-		return 0x900;
-	}
-
-	if (happened & PACA_IRQ_EE) {
-		local_paca->irq_happened &= ~PACA_IRQ_EE;
-		return 0x500;
-	}
-
-	if (happened & PACA_IRQ_DBELL) {
-		local_paca->irq_happened &= ~PACA_IRQ_DBELL;
-		return 0x280;
-	}
-
-	if (happened & PACA_IRQ_HARD_DIS)
-		local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-
-	/* There should be nothing left ! */
-	BUG_ON(local_paca->irq_happened != 0);
-
-	return 0;
-}
-
-/*
- * This is specifically called by assembly code to re-enable interrupts
- * if they are currently disabled. This is typically called before
- * schedule() or do_signal() when returning to userspace. We do it
- * in C to avoid the burden of dealing with lockdep etc...
- *
- * NOTE: This is called with interrupts hard disabled but not marked
- * as such in paca->irq_happened, so we need to resync this.
- */
-void notrace restore_interrupts(void)
-{
-	if (irqs_disabled()) {
-		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-		local_irq_enable();
-	} else
-		__hard_irq_enable();
-}
-
-#endif /* CONFIG_PPC_BOOK3E */
-
 void replay_soft_interrupts(void)
 {
 	struct pt_regs regs;
@@ -218,7 +142,7 @@ void replay_soft_interrupts(void)
 	 */
 	if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) {
 		local_paca->irq_happened &= ~PACA_IRQ_HMI;
-		regs.trap = 0xe60;
+		regs.trap = INTERRUPT_HMI;
 		handle_hmi_exception(&regs);
 		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
 			hard_irq_disable();
@@ -226,7 +150,7 @@ void replay_soft_interrupts(void)
 
 	if (local_paca->irq_happened & PACA_IRQ_DEC) {
 		local_paca->irq_happened &= ~PACA_IRQ_DEC;
-		regs.trap = 0x900;
+		regs.trap = INTERRUPT_DECREMENTER;
 		timer_interrupt(&regs);
 		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
 			hard_irq_disable();
@@ -234,7 +158,7 @@ void replay_soft_interrupts(void)
 
 	if (local_paca->irq_happened & PACA_IRQ_EE) {
 		local_paca->irq_happened &= ~PACA_IRQ_EE;
-		regs.trap = 0x500;
+		regs.trap = INTERRUPT_EXTERNAL;
 		do_IRQ(&regs);
 		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
 			hard_irq_disable();
@@ -242,10 +166,7 @@ void replay_soft_interrupts(void)
 
 	if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) {
 		local_paca->irq_happened &= ~PACA_IRQ_DBELL;
-		if (IS_ENABLED(CONFIG_PPC_BOOK3E))
-			regs.trap = 0x280;
-		else
-			regs.trap = 0xa00;
+		regs.trap = INTERRUPT_DOORBELL;
 		doorbell_exception(&regs);
 		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
 			hard_irq_disable();
@@ -254,7 +175,7 @@ void replay_soft_interrupts(void)
 	/* Book3E does not support soft-masking PMI interrupts */
 	if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) {
 		local_paca->irq_happened &= ~PACA_IRQ_PMI;
-		regs.trap = 0xf00;
+		regs.trap = INTERRUPT_PERFMON;
 		performance_monitor_exception(&regs);
 		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
 			hard_irq_disable();
@@ -282,7 +203,7 @@ static inline void replay_soft_interrupts_irqrestore(void)
 	 * and re-locking AMR but we shouldn't get here in the first place,
 	 * hence the warning.
 	 */
-	kuap_check_amr();
+	kuap_assert_locked();
 
 	if (kuap_state != AMR_KUAP_BLOCKED)
 		set_kuap(AMR_KUAP_BLOCKED);
@@ -667,6 +588,47 @@ static inline void check_stack_overflow(void)
 	}
 }
 
+static __always_inline void call_do_softirq(const void *sp)
+{
+	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+	asm volatile (
+		 PPC_STLU "	%%r1, %[offset](%[sp])	;"
+		"mr		%%r1, %[sp]		;"
+		"bl		%[callee]		;"
+		 PPC_LL "	%%r1, 0(%%r1)		;"
+		 : // Outputs
+		 : // Inputs
+		   [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD),
+		   [callee] "i" (__do_softirq)
+		 : // Clobbers
+		   "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+		   "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+		   "r11", "r12"
+	);
+}
+
+static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+	register unsigned long r3 asm("r3") = (unsigned long)regs;
+
+	/* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
+	asm volatile (
+		 PPC_STLU "	%%r1, %[offset](%[sp])	;"
+		"mr		%%r1, %[sp]		;"
+		"bl		%[callee]		;"
+		 PPC_LL "	%%r1, 0(%%r1)		;"
+		 : // Outputs
+		   "+r" (r3)
+		 : // Inputs
+		   [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD),
+		   [callee] "i" (__do_irq)
+		 : // Clobbers
+		   "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+		   "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+		   "r11", "r12"
+	);
+}
+
 void __do_irq(struct pt_regs *regs)
 {
 	unsigned int irq;
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index 144858027fa38f245e04301bef8b6b0539b955d1..ce87dc5ea23cfae216f0297a5f56160753509be0 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -11,10 +11,10 @@
 void arch_jump_label_transform(struct jump_entry *entry,
 			       enum jump_label_type type)
 {
-	struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code;
+	struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry);
 
 	if (type == JUMP_LABEL_JMP)
-		patch_branch(addr, entry->target, 0);
+		patch_branch(addr, jump_entry_target(entry), 0);
 	else
 		patch_instruction(addr, ppc_inst(PPC_INST_NOP));
 }
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 409080208a6c4ae104e01e341ffebd6082585024..7dd2ad3603ad263893662f4d37abd7d8342ffc90 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -376,7 +376,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
 }
 
 /*
- * This function does PowerPC specific procesing for interfacing to gdb.
+ * This function does PowerPC specific processing for interfacing to gdb.
  */
 int kgdb_arch_handle_exception(int vector, int signo, int err_code,
 			       char *remcom_in_buffer, char *remcom_out_buffer,
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index f061e06e9f519a4333a8ff1dc229b1ba3d895849..8b2c1a8553a0ed1d1429ebf56e863d2e1cda8ae4 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -15,6 +15,7 @@
 #include <asm/udbg.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
+#include <asm/early_ioremap.h>
 
 #undef DEBUG
 
@@ -34,6 +35,7 @@ static struct legacy_serial_info {
 	unsigned int			clock;
 	int				irq_check_parent;
 	phys_addr_t			taddr;
+	void __iomem			*early_addr;
 } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
 
 static const struct of_device_id legacy_serial_parents[] __initconst = {
@@ -325,17 +327,16 @@ static void __init setup_legacy_serial_console(int console)
 {
 	struct legacy_serial_info *info = &legacy_serial_infos[console];
 	struct plat_serial8250_port *port = &legacy_serial_ports[console];
-	void __iomem *addr;
 	unsigned int stride;
 
 	stride = 1 << port->regshift;
 
 	/* Check if a translated MMIO address has been found */
 	if (info->taddr) {
-		addr = ioremap(info->taddr, 0x1000);
-		if (addr == NULL)
+		info->early_addr = early_ioremap(info->taddr, 0x1000);
+		if (info->early_addr == NULL)
 			return;
-		udbg_uart_init_mmio(addr, stride);
+		udbg_uart_init_mmio(info->early_addr, stride);
 	} else {
 		/* Check if it's PIO and we support untranslated PIO */
 		if (port->iotype == UPIO_PORT && isa_io_special)
@@ -353,6 +354,30 @@ static void __init setup_legacy_serial_console(int console)
 	udbg_uart_setup(info->speed, info->clock);
 }
 
+static int __init ioremap_legacy_serial_console(void)
+{
+	struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console];
+	struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console];
+	void __iomem *vaddr;
+
+	if (legacy_serial_console < 0)
+		return 0;
+
+	if (!info->early_addr)
+		return 0;
+
+	vaddr = ioremap(info->taddr, 0x1000);
+	if (WARN_ON(!vaddr))
+		return -ENOMEM;
+
+	udbg_uart_init_mmio(vaddr, 1 << port->regshift);
+	early_iounmap(info->early_addr, 0x1000);
+	info->early_addr = NULL;
+
+	return 0;
+}
+early_initcall(ioremap_legacy_serial_console);
+
 /*
  * This is called very early, as part of setup_system() or eventually
  * setup_arch(), basically before anything else in this file. This function
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 11f0cae086edba8b30c02f6ae9c027d57e22aa36..9a3c2a84a2acab32e4c0544d92f9633822a4a430 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -40,7 +40,7 @@ static struct irq_work mce_ue_event_irq_work = {
 	.func = machine_check_ue_irq_work,
 };
 
-DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 
 static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
 
@@ -131,6 +131,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
 	 * Populate the mce error_type and type-specific error_type.
 	 */
 	mce_set_error_info(mce, mce_err);
+	if (mce->error_type == MCE_ERROR_TYPE_UE)
+		mce->u.ue_error.ignore_event = mce_err->ignore_event;
 
 	if (!addr)
 		return;
@@ -159,7 +161,6 @@ void save_mce_event(struct pt_regs *regs, long handled,
 		if (phys_addr != ULONG_MAX) {
 			mce->u.ue_error.physical_address_provided = true;
 			mce->u.ue_error.physical_address = phys_addr;
-			mce->u.ue_error.ignore_event = mce_err->ignore_event;
 			machine_check_ue_event(mce);
 		}
 	}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 717e658b90fd845af254e60f3fec065b003dda51..6a076bef29321527314b4333e2fcd47887dcdd0d 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -27,45 +27,6 @@
 
 	.text
 
-/*
- * We store the saved ksp_limit in the unused part
- * of the STACK_FRAME_OVERHEAD
- */
-_GLOBAL(call_do_softirq)
-	mflr	r0
-	stw	r0,4(r1)
-	lwz	r10,THREAD+KSP_LIMIT(r2)
-	stw	r3, THREAD+KSP_LIMIT(r2)
-	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-	mr	r1,r3
-	stw	r10,8(r1)
-	bl	__do_softirq
-	lwz	r10,8(r1)
-	lwz	r1,0(r1)
-	lwz	r0,4(r1)
-	stw	r10,THREAD+KSP_LIMIT(r2)
-	mtlr	r0
-	blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, void *sp);
- */
-_GLOBAL(call_do_irq)
-	mflr	r0
-	stw	r0,4(r1)
-	lwz	r10,THREAD+KSP_LIMIT(r2)
-	stw	r4, THREAD+KSP_LIMIT(r2)
-	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
-	mr	r1,r4
-	stw	r10,8(r1)
-	bl	__do_irq
-	lwz	r10,8(r1)
-	lwz	r1,0(r1)
-	lwz	r0,4(r1)
-	stw	r10,THREAD+KSP_LIMIT(r2)
-	mtlr	r0
-	blr
-
 /*
  * This returns the high 64 bits of the product of two 64-bit numbers.
  */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 070465825c2102b49005a455a99b663369fcaaa6..4b761a18a74d183adfefc59a5b0367d9dc548686 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -27,28 +27,6 @@
 
 	.text
 
-_GLOBAL(call_do_softirq)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-	mr	r1,r3
-	bl	__do_softirq
-	ld	r1,0(r1)
-	ld	r0,16(r1)
-	mtlr	r0
-	blr
-
-_GLOBAL(call_do_irq)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
-	mr	r1,r4
-	bl	__do_irq
-	ld	r1,0(r1)
-	ld	r0,16(r1)
-	mtlr	r0
-	blr
-
 _GLOBAL(__bswapdi2)
 EXPORT_SYMBOL(__bswapdi2)
 	srdi	r8,r3,32
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index a211b0253cdb49332b4913d1d79e6945d6a24754..fab84024650c8d526453970b4aef0c8b6d7aea29 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -14,6 +14,7 @@
 #include <asm/firmware.h>
 #include <linux/sort.h>
 #include <asm/setup.h>
+#include <asm/sections.h>
 
 static LIST_HEAD(module_bug_list);
 
@@ -88,12 +89,28 @@ int module_finalize(const Elf_Ehdr *hdr,
 }
 
 #ifdef MODULES_VADDR
+static __always_inline void *
+__module_alloc(unsigned long size, unsigned long start, unsigned long end)
+{
+	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
+				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+				    __builtin_return_address(0));
+}
+
 void *module_alloc(unsigned long size)
 {
+	unsigned long limit = (unsigned long)_etext - SZ_32M;
+	void *ptr = NULL;
+
 	BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
 
-	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
-				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
-				    __builtin_return_address(0));
+	/* First try within 32M limit from _etext to avoid branch trampolines */
+	if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
+		ptr = __module_alloc(size, limit, MODULES_END);
+
+	if (!ptr)
+		ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
+
+	return ptr;
 }
 #endif
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 7f7cdbeacd1ac8a1ec17d4f0c5f1194729128fc2..cdf87086fa33a029767e281a809dadb6f50578c4 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -141,11 +141,21 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 	}
 }
 
+static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+	patch_instruction((struct ppc_inst *)addr,
+			  ppc_inst(PPC_RAW_LIS(reg, IMM_H(val))));
+	addr++;
+
+	patch_instruction((struct ppc_inst *)addr,
+			  ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val))));
+}
+
 /*
  * Generate instructions to load provided immediate 64-bit value
  * to register 'reg' and patch these instructions at 'addr'.
  */
-static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr)
 {
 	/* lis reg,(op)@highest */
 	patch_instruction((struct ppc_inst *)addr,
@@ -177,6 +187,14 @@ static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *
 				   ___PPC_RS(reg) | (val & 0xffff)));
 }
 
+static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+	if (IS_ENABLED(CONFIG_PPC64))
+		patch_imm64_load_insns(val, reg, addr);
+	else
+		patch_imm32_load_insns(val, reg, addr);
+}
+
 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
 {
 	struct ppc_inst branch_op_callback, branch_emulate_step, temp;
@@ -230,7 +248,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
 	 * Fixup the template with instructions to:
 	 * 1. load the address of the actual probepoint
 	 */
-	patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
+	patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
 
 	/*
 	 * 2. branch to optimized_callback() and emulate_step()
@@ -264,7 +282,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
 	 * 3. load instruction to be emulated into relevant register, and
 	 */
 	temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
-	patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX);
+	patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
 
 	/*
 	 * 4. branch back from trampoline
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index ff8ba4d3824d52a2a23e556a076662f4208e52db..19ea3312403ca3a302012edc1caa598861f76050 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -9,6 +9,16 @@
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
 
+#ifdef CONFIG_PPC64
+#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base)
+#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS	nop; nop; nop; nop; nop
+#else
+#define SAVE_30GPRS(base) stmw	r2, GPR2(base)
+#define REST_30GPRS(base) lmw	r2, GPR2(base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS	nop; nop; nop
+#endif
+
 #define	OPT_SLOT_SIZE	65536
 
 	.balign	4
@@ -30,39 +40,41 @@ optinsn_slot:
 	.global optprobe_template_entry
 optprobe_template_entry:
 	/* Create an in-memory pt_regs */
-	stdu	r1,-INT_FRAME_SIZE(r1)
+	PPC_STLU	r1,-INT_FRAME_SIZE(r1)
 	SAVE_GPR(0,r1)
 	/* Save the previous SP into stack */
 	addi	r0,r1,INT_FRAME_SIZE
-	std	r0,GPR1(r1)
-	SAVE_10GPRS(2,r1)
-	SAVE_10GPRS(12,r1)
-	SAVE_10GPRS(22,r1)
+	PPC_STL	r0,GPR1(r1)
+	SAVE_30GPRS(r1)
 	/* Save SPRS */
 	mfmsr	r5
-	std	r5,_MSR(r1)
+	PPC_STL	r5,_MSR(r1)
 	li	r5,0x700
-	std	r5,_TRAP(r1)
+	PPC_STL	r5,_TRAP(r1)
 	li	r5,0
-	std	r5,ORIG_GPR3(r1)
-	std	r5,RESULT(r1)
+	PPC_STL	r5,ORIG_GPR3(r1)
+	PPC_STL	r5,RESULT(r1)
 	mfctr	r5
-	std	r5,_CTR(r1)
+	PPC_STL	r5,_CTR(r1)
 	mflr	r5
-	std	r5,_LINK(r1)
+	PPC_STL	r5,_LINK(r1)
 	mfspr	r5,SPRN_XER
-	std	r5,_XER(r1)
+	PPC_STL	r5,_XER(r1)
 	mfcr	r5
-	std	r5,_CCR(r1)
+	PPC_STL	r5,_CCR(r1)
+#ifdef CONFIG_PPC64
 	lbz     r5,PACAIRQSOFTMASK(r13)
 	std     r5,SOFTE(r1)
+#endif
 
 	/*
 	 * We may get here from a module, so load the kernel TOC in r2.
 	 * The original TOC gets restored when pt_regs is restored
 	 * further below.
 	 */
+#ifdef CONFIG_PPC64
 	ld	r2,PACATOC(r13)
+#endif
 
 	.global optprobe_template_op_address
 optprobe_template_op_address:
@@ -70,11 +82,8 @@ optprobe_template_op_address:
 	 * Parameters to optimized_callback():
 	 * 1. optimized_kprobe structure in r3
 	 */
-	nop
-	nop
-	nop
-	nop
-	nop
+	TEMPLATE_FOR_IMM_LOAD_INSNS
+
 	/* 2. pt_regs pointer in r4 */
 	addi	r4,r1,STACK_FRAME_OVERHEAD
 
@@ -92,11 +101,7 @@ optprobe_template_call_handler:
 	.global optprobe_template_insn
 optprobe_template_insn:
 	/* 2, Pass instruction to be emulated in r4 */
-	nop
-	nop
-	nop
-	nop
-	nop
+	TEMPLATE_FOR_IMM_LOAD_INSNS
 
 	.global optprobe_template_call_emulate
 optprobe_template_call_emulate:
@@ -107,20 +112,18 @@ optprobe_template_call_emulate:
 	 * All done.
 	 * Now, restore the registers...
 	 */
-	ld	r5,_MSR(r1)
+	PPC_LL	r5,_MSR(r1)
 	mtmsr	r5
-	ld	r5,_CTR(r1)
+	PPC_LL	r5,_CTR(r1)
 	mtctr	r5
-	ld	r5,_LINK(r1)
+	PPC_LL	r5,_LINK(r1)
 	mtlr	r5
-	ld	r5,_XER(r1)
+	PPC_LL	r5,_XER(r1)
 	mtxer	r5
-	ld	r5,_CCR(r1)
+	PPC_LL	r5,_CCR(r1)
 	mtcr	r5
 	REST_GPR(0,r1)
-	REST_10GPRS(2,r1)
-	REST_10GPRS(12,r1)
-	REST_10GPRS(22,r1)
+	REST_30GPRS(r1)
 	/* Restore the previous SP */
 	addi	r1,r1,INT_FRAME_SIZE
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3231c2df9e261fba57c74792c177256e0dafd678..89e34aa273e21afbc4a1bbad2362176ddd640101 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1117,9 +1117,10 @@ void restore_tm_state(struct pt_regs *regs)
 	regs->msr |= msr_diff;
 }
 
-#else
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
 #define tm_recheckpoint_new_task(new)
 #define __switch_to_tm(prev, new)
+void tm_reclaim_current(uint8_t cause) {}
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 static inline void save_sprs(struct thread_struct *t)
@@ -1255,6 +1256,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	restore_sprs(old_thread, new_thread);
 
+#ifdef CONFIG_PPC32
+	kuap_assert_locked();
+#endif
 	last = _switch(old_thread, new_thread);
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1444,11 +1448,9 @@ static void print_msr_bits(unsigned long val)
 #ifdef CONFIG_PPC64
 #define REG		"%016lx"
 #define REGS_PER_LINE	4
-#define LAST_VOLATILE	13
 #else
 #define REG		"%08lx"
 #define REGS_PER_LINE	8
-#define LAST_VOLATILE	12
 #endif
 
 static void __show_regs(struct pt_regs *regs)
@@ -1465,7 +1467,9 @@ static void __show_regs(struct pt_regs *regs)
 	trap = TRAP(regs);
 	if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR))
 		pr_cont("CFAR: "REG" ", regs->orig_gpr3);
-	if (trap == 0x200 || trap == 0x300 || trap == 0x600) {
+	if (trap == INTERRUPT_MACHINE_CHECK ||
+	    trap == INTERRUPT_DATA_STORAGE ||
+	    trap == INTERRUPT_ALIGNMENT) {
 		if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
 			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
 		else
@@ -1484,8 +1488,6 @@ static void __show_regs(struct pt_regs *regs)
 		if ((i % REGS_PER_LINE) == 0)
 			pr_cont("\nGPR%02d: ", i);
 		pr_cont(REG " ", regs->gpr[i]);
-		if (i == LAST_VOLATILE && !FULL_REGS(regs))
-			break;
 	}
 	pr_cont("\n");
 	/*
@@ -1688,7 +1690,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	} else {
 		/* user thread */
 		struct pt_regs *regs = current_pt_regs();
-		CHECK_FULL_REGS(regs);
 		*childregs = *regs;
 		if (usp)
 			childregs->gpr[1] = usp;
@@ -1724,9 +1725,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	kregs = (struct pt_regs *) sp;
 	sp -= STACK_FRAME_OVERHEAD;
 	p->thread.ksp = sp;
-#ifdef CONFIG_PPC32
-	p->thread.ksp_limit = (unsigned long)end_of_stack(p);
-#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	for (i = 0; i < nr_wp_slots(); i++)
 		p->thread.ptrace_bps[i] = NULL;
@@ -1796,13 +1794,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	regs->ccr = 0;
 	regs->gpr[1] = sp;
 
-	/*
-	 * We have just cleared all the nonvolatile GPRs, so make
-	 * FULL_REGS(regs) return true.  This is necessary to allow
-	 * ptrace to examine the thread immediately after exec.
-	 */
-	SET_FULL_REGS(regs);
-
 #ifdef CONFIG_PPC32
 	regs->mq = 0;
 	regs->nip = start;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9a4797d1d40d5401d80521297f2b0e07edc9fc23..fbe9deebc8e13eff1b844df914cdf9cbb54d0c2d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -65,6 +65,8 @@
 #define DBG(fmt...)
 #endif
 
+int *chip_id_lookup_table;
+
 #ifdef CONFIG_PPC64
 int __initdata iommu_is_off;
 int __initdata iommu_force_on;
@@ -267,7 +269,7 @@ static struct feature_property {
 };
 
 #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
-static inline void identical_pvr_fixup(unsigned long node)
+static __init void identical_pvr_fixup(unsigned long node)
 {
 	unsigned int pvr;
 	const char *model = of_get_flat_dt_prop(node, "model", NULL);
@@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id);
 int cpu_to_chip_id(int cpu)
 {
 	struct device_node *np;
+	int ret = -1, idx;
+
+	idx = cpu / threads_per_core;
+	if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1)
+		return chip_id_lookup_table[idx];
 
 	np = of_get_cpu_node(cpu, NULL);
-	if (!np)
-		return -1;
+	if (np) {
+		ret = of_get_ibm_chip_id(np);
+		of_node_put(np);
+
+		if (chip_id_lookup_table)
+			chip_id_lookup_table[idx] = ret;
+	}
 
-	of_node_put(np);
-	return of_get_ibm_chip_id(np);
+	return ret;
 }
 EXPORT_SYMBOL(cpu_to_chip_id);
 
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ccf77b985c8f6b3b81a7c171c4c68be3d035c474..41ed7e33d8973c55f6294e725f9b40f439a826a3 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2983,7 +2983,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
 				" 0x3 encode-int encode+"
 				" s\" interrupts\" property"
 			" finish-device");
-	};
+	}
 
 	/* Check for a PHY device node - if missing then create one and
 	 * give it's phandle to the ethernet node */
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 6ccffc65ac97ee797a110517a009f3504ba508e5..773bcc4ca8431b3852cdaedb75f1bb527914bb5f 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -111,7 +111,7 @@ static unsigned long get_user_msr(struct task_struct *task)
 	return task->thread.regs->msr | task->thread.fpexc_mode;
 }
 
-static int set_user_msr(struct task_struct *task, unsigned long msr)
+static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr)
 {
 	task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
 	task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
@@ -147,7 +147,7 @@ static int set_user_dscr(struct task_struct *task, unsigned long dscr)
  * We prevent mucking around with the reserved area of trap
  * which are used internally by the kernel.
  */
-static int set_user_trap(struct task_struct *task, unsigned long trap)
+static __always_inline int set_user_trap(struct task_struct *task, unsigned long trap)
 {
 	set_trap(task->thread.regs, trap);
 	return 0;
@@ -221,17 +221,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
 #ifdef CONFIG_PPC64
 	struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe));
 #endif
-	int i;
-
 	if (target->thread.regs == NULL)
 		return -EIO;
 
-	if (!FULL_REGS(target->thread.regs)) {
-		/* We have a partial register set.  Fill 14-31 with bogus values */
-		for (i = 14; i < 32; i++)
-			target->thread.regs->gpr[i] = NV_REG_POISON;
-	}
-
 	membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs));
 
 	membuf_store(&to_msr, get_user_msr(target));
@@ -252,8 +244,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
 	if (target->thread.regs == NULL)
 		return -EIO;
 
-	CHECK_FULL_REGS(target->thread.regs);
-
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				 target->thread.regs,
 				 0, PT_MSR * sizeof(reg));
@@ -659,6 +649,9 @@ int gpr32_set_common(struct task_struct *target,
 	const compat_ulong_t __user *u = ubuf;
 	compat_ulong_t reg;
 
+	if (!kbuf && !user_read_access_begin(u, count))
+		return -EFAULT;
+
 	pos /= sizeof(reg);
 	count /= sizeof(reg);
 
@@ -667,8 +660,7 @@ int gpr32_set_common(struct task_struct *target,
 			regs[pos++] = *k++;
 	else
 		for (; count > 0 && pos < PT_MSR; --count) {
-			if (__get_user(reg, u++))
-				return -EFAULT;
+			unsafe_get_user(reg, u++, Efault);
 			regs[pos++] = reg;
 		}
 
@@ -676,8 +668,8 @@ int gpr32_set_common(struct task_struct *target,
 	if (count > 0 && pos == PT_MSR) {
 		if (kbuf)
 			reg = *k++;
-		else if (__get_user(reg, u++))
-			return -EFAULT;
+		else
+			unsafe_get_user(reg, u++, Efault);
 		set_user_msr(target, reg);
 		++pos;
 		--count;
@@ -690,24 +682,24 @@ int gpr32_set_common(struct task_struct *target,
 			++k;
 	} else {
 		for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
-			if (__get_user(reg, u++))
-				return -EFAULT;
+			unsafe_get_user(reg, u++, Efault);
 			regs[pos++] = reg;
 		}
 		for (; count > 0 && pos < PT_TRAP; --count, ++pos)
-			if (__get_user(reg, u++))
-				return -EFAULT;
+			unsafe_get_user(reg, u++, Efault);
 	}
 
 	if (count > 0 && pos == PT_TRAP) {
 		if (kbuf)
 			reg = *k++;
-		else if (__get_user(reg, u++))
-			return -EFAULT;
+		else
+			unsafe_get_user(reg, u++, Efault);
 		set_user_trap(target, reg);
 		++pos;
 		--count;
 	}
+	if (!kbuf)
+		user_read_access_end();
 
 	kbuf = k;
 	ubuf = u;
@@ -715,25 +707,19 @@ int gpr32_set_common(struct task_struct *target,
 	count *= sizeof(reg);
 	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
 					 (PT_TRAP + 1) * sizeof(reg), -1);
+
+Efault:
+	user_read_access_end();
+	return -EFAULT;
 }
 
 static int gpr32_get(struct task_struct *target,
 		     const struct user_regset *regset,
 		     struct membuf to)
 {
-	int i;
-
 	if (target->thread.regs == NULL)
 		return -EIO;
 
-	if (!FULL_REGS(target->thread.regs)) {
-		/*
-		 * We have a partial register set.
-		 * Fill 14-31 with bogus values.
-		 */
-		for (i = 14; i < 32; i++)
-			target->thread.regs->gpr[i] = NV_REG_POISON;
-	}
 	return gpr32_get_common(target, regset, to,
 			&target->thread.regs->gpr[0]);
 }
@@ -746,7 +732,6 @@ static int gpr32_set(struct task_struct *target,
 	if (target->thread.regs == NULL)
 		return -EIO;
 
-	CHECK_FULL_REGS(target->thread.regs);
 	return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
 			&target->thread.regs->gpr[0]);
 }
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index 4f3d4ff3728cb05a7d3e713b916fdb2737da02a6..0a0a33eb0d282505ddd5ecd68585683dfa907901 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -59,7 +59,6 @@ long arch_ptrace(struct task_struct *child, long request,
 		if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
 			break;
 
-		CHECK_FULL_REGS(child->thread.regs);
 		if (index < PT_FPR0)
 			ret = ptrace_get_reg(child, (int) index, &tmp);
 		else
@@ -81,7 +80,6 @@ long arch_ptrace(struct task_struct *child, long request,
 		if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
 			break;
 
-		CHECK_FULL_REGS(child->thread.regs);
 		if (index < PT_FPR0)
 			ret = ptrace_put_reg(child, index, data);
 		else
@@ -354,8 +352,6 @@ void __init pt_regs_check(void)
 		     offsetof(struct user_pt_regs, nip));
 	BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
 		     offsetof(struct user_pt_regs, msr));
-	BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
-		     offsetof(struct user_pt_regs, msr));
 	BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
 		     offsetof(struct user_pt_regs, orig_gpr3));
 	BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c
index d30b9ad70edc28253ba5c3e75fb12ff4ce3e7048..19c224808982137aa4b43d7543f036c924bb69be 100644
--- a/arch/powerpc/kernel/ptrace/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace/ptrace32.c
@@ -83,7 +83,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if ((addr & 3) || (index > PT_FPSCR32))
 			break;
 
-		CHECK_FULL_REGS(child->thread.regs);
 		if (index < PT_FPR0) {
 			ret = ptrace_get_reg(child, index, &tmp);
 			if (ret)
@@ -133,7 +132,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if ((addr & 3) || numReg > PT_FPSCR)
 			break;
 
-		CHECK_FULL_REGS(child->thread.regs);
 		if (numReg >= PT_FPR0) {
 			flush_fp_to_thread(child);
 			/* get 64 bit FPR */
@@ -187,7 +185,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		if ((addr & 3) || (index > PT_FPSCR32))
 			break;
 
-		CHECK_FULL_REGS(child->thread.regs);
 		if (index < PT_FPR0) {
 			ret = ptrace_put_reg(child, index, data);
 		} else {
@@ -226,7 +223,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		 */
 		if ((addr & 3) || (numReg > PT_FPSCR))
 			break;
-		CHECK_FULL_REGS(child->thread.regs);
 		if (numReg < PT_FPR0) {
 			unsigned long freg;
 			ret = ptrace_get_reg(child, numReg, &freg);
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 2d33f342a29307312bdc160545929529383f6112..6857a5b0a1c39c24208c253e96e56110b3fe5706 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -755,11 +755,18 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-#define RMO_READ_BUF_MAX 30
-
-/* RTAS Userspace access */
+/**
+ * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space.
+ *
+ * Base + size description of a range of RTAS-addressable memory set
+ * aside for user space to use as work area(s) for certain RTAS
+ * functions. User space accesses this region via /dev/mem. Apart from
+ * security policies, the kernel does not arbitrate or serialize
+ * access to this region, and user space must ensure that concurrent
+ * users do not interfere with each other.
+ */
 static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v)
 {
-	seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX);
+	seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_USER_REGION_SIZE);
 	return 0;
 }
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d126d71ea5bd8ff03afc39b0933a0ab4b36732de..6bada744402b1c6fc19afc93b5470ea93a515561 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -828,7 +828,6 @@ void rtas_activate_firmware(void)
 		pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
 }
 
-static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
 #ifdef CONFIG_PPC_PSERIES
 /**
  * rtas_call_reentrant() - Used for reentrant rtas calls
@@ -988,10 +987,10 @@ static struct rtas_filter rtas_filters[] __ro_after_init = {
 static bool in_rmo_buf(u32 base, u32 end)
 {
 	return base >= rtas_rmo_buf &&
-		base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) &&
+		base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) &&
 		base <= end &&
 		end >= rtas_rmo_buf &&
-		end < (rtas_rmo_buf + RTAS_RMOBUF_MAX);
+		end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
 }
 
 static bool block_rtas_call(int token, int nargs,
@@ -1052,6 +1051,14 @@ static bool block_rtas_call(int token, int nargs,
 	return true;
 }
 
+static void __init rtas_syscall_filter_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(rtas_filters); i++)
+		rtas_filters[i].token = rtas_token(rtas_filters[i].name);
+}
+
 #else
 
 static bool block_rtas_call(int token, int nargs,
@@ -1060,6 +1067,10 @@ static bool block_rtas_call(int token, int nargs,
 	return false;
 }
 
+static void __init rtas_syscall_filter_init(void)
+{
+}
+
 #endif /* CONFIG_PPC_RTAS_FILTER */
 
 /* We assume to be passed big endian arguments */
@@ -1103,7 +1114,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 		return -EINVAL;
 
 	/* Need to handle ibm,suspend_me call specially */
-	if (token == ibm_suspend_me_token) {
+	if (token == rtas_token("ibm,suspend-me")) {
 
 		/*
 		 * rtas_ibm_suspend_me assumes the streamid handle is in cpu
@@ -1163,9 +1174,6 @@ void __init rtas_initialize(void)
 	unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
 	u32 base, size, entry;
 	int no_base, no_size, no_entry;
-#ifdef CONFIG_PPC_RTAS_FILTER
-	int i;
-#endif
 
 	/* Get RTAS dev node and fill up our "rtas" structure with infos
 	 * about it.
@@ -1191,12 +1199,10 @@ void __init rtas_initialize(void)
 	 * the stop-self token if any
 	 */
 #ifdef CONFIG_PPC64
-	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+	if (firmware_has_feature(FW_FEATURE_LPAR))
 		rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
-		ibm_suspend_me_token = rtas_token("ibm,suspend-me");
-	}
 #endif
-	rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
+	rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE,
 						 0, rtas_region);
 	if (!rtas_rmo_buf)
 		panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
@@ -1206,11 +1212,7 @@ void __init rtas_initialize(void)
 	rtas_last_error_token = rtas_token("rtas-last-error");
 #endif
 
-#ifdef CONFIG_PPC_RTAS_FILTER
-	for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
-		rtas_filters[i].token = rtas_token(rtas_filters[i].name);
-	}
-#endif
+	rtas_syscall_filter_init();
 }
 
 int __init early_init_dt_scan_rtas(unsigned long node,
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index e4e1a94ccf6a6fa541ab9f9dd6193f11ed509c6d..0fdfcdd9d880cc9cf6e743bd11cbed8d5ad709ac 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -7,6 +7,7 @@
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
+#include <linux/memblock.h>
 #include <linux/nospec.h>
 #include <linux/prctl.h>
 #include <linux/seq_buf.h>
@@ -18,6 +19,7 @@
 #include <asm/setup.h>
 #include <asm/inst.h>
 
+#include "setup.h"
 
 u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
@@ -250,7 +252,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 
 static enum stf_barrier_type stf_enabled_flush_types;
 static bool no_stf_barrier;
-bool stf_barrier;
+static bool stf_barrier;
 
 static int __init handle_no_stf_barrier(char *p)
 {
@@ -541,6 +543,178 @@ void setup_count_cache_flush(void)
 	toggle_branch_cache_flush(enable);
 }
 
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+static bool no_entry_flush;
+static bool no_uaccess_flush;
+bool rfi_flush;
+static bool entry_flush;
+static bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
+
+static int __init handle_no_rfi_flush(char *p)
+{
+	pr_info("rfi-flush: disabled on command line.");
+	no_rfi_flush = true;
+	return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+static int __init handle_no_entry_flush(char *p)
+{
+	pr_info("entry-flush: disabled on command line.");
+	no_entry_flush = true;
+	return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
+static int __init handle_no_uaccess_flush(char *p)
+{
+	pr_info("uaccess-flush: disabled on command line.");
+	no_uaccess_flush = true;
+	return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+	pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+	handle_no_rfi_flush(NULL);
+	return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+	/*
+	 * We don't need to do the flush explicitly, just enter+exit kernel is
+	 * sufficient, the RFI exit handlers will do the right thing.
+	 */
+}
+
+void rfi_flush_enable(bool enable)
+{
+	if (enable) {
+		do_rfi_flush_fixups(enabled_flush_types);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else
+		do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+	rfi_flush = enable;
+}
+
+static void entry_flush_enable(bool enable)
+{
+	if (enable) {
+		do_entry_flush_fixups(enabled_flush_types);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else {
+		do_entry_flush_fixups(L1D_FLUSH_NONE);
+	}
+
+	entry_flush = enable;
+}
+
+static void uaccess_flush_enable(bool enable)
+{
+	if (enable) {
+		do_uaccess_flush_fixups(enabled_flush_types);
+		static_branch_enable(&uaccess_flush_key);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else {
+		static_branch_disable(&uaccess_flush_key);
+		do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+	}
+
+	uaccess_flush = enable;
+}
+
+static void __ref init_fallback_flush(void)
+{
+	u64 l1d_size, limit;
+	int cpu;
+
+	/* Only allocate the fallback flush area once (at boot time). */
+	if (l1d_flush_fallback_area)
+		return;
+
+	l1d_size = ppc64_caches.l1d.size;
+
+	/*
+	 * If there is no d-cache-size property in the device tree, l1d_size
+	 * could be zero. That leads to the loop in the asm wrapping around to
+	 * 2^64-1, and then walking off the end of the fallback area and
+	 * eventually causing a page fault which is fatal. Just default to
+	 * something vaguely sane.
+	 */
+	if (!l1d_size)
+		l1d_size = (64 * 1024);
+
+	limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+	/*
+	 * Align to L1d size, and size it at 2x L1d size, to catch possible
+	 * hardware prefetch runoff. We don't have a recipe for load patterns to
+	 * reliably avoid the prefetcher.
+	 */
+	l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+						l1d_size, MEMBLOCK_LOW_LIMIT,
+						limit, NUMA_NO_NODE);
+	if (!l1d_flush_fallback_area)
+		panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
+		      __func__, l1d_size * 2, l1d_size, &limit);
+
+
+	for_each_possible_cpu(cpu) {
+		struct paca_struct *paca = paca_ptrs[cpu];
+		paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+		paca->l1d_flush_size = l1d_size;
+	}
+}
+
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+	if (types & L1D_FLUSH_FALLBACK) {
+		pr_info("rfi-flush: fallback displacement flush available\n");
+		init_fallback_flush();
+	}
+
+	if (types & L1D_FLUSH_ORI)
+		pr_info("rfi-flush: ori type flush available\n");
+
+	if (types & L1D_FLUSH_MTTRIG)
+		pr_info("rfi-flush: mttrig type flush available\n");
+
+	enabled_flush_types = types;
+
+	if (!cpu_mitigations_off() && !no_rfi_flush)
+		rfi_flush_enable(enable);
+}
+
+void setup_entry_flush(bool enable)
+{
+	if (cpu_mitigations_off())
+		return;
+
+	if (!no_entry_flush)
+		entry_flush_enable(enable);
+}
+
+void setup_uaccess_flush(bool enable)
+{
+	if (cpu_mitigations_off())
+		return;
+
+	if (!no_uaccess_flush)
+		uaccess_flush_enable(enable);
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int count_cache_flush_set(void *data, u64 val)
 {
@@ -579,5 +753,92 @@ static __init int count_cache_flush_debugfs_init(void)
 	return 0;
 }
 device_initcall(count_cache_flush_debugfs_init);
+
+static int rfi_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != rfi_flush)
+		rfi_flush_enable(enable);
+
+	return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+	*val = rfi_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static int entry_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != entry_flush)
+		entry_flush_enable(enable);
+
+	return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+	*val = entry_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
+static int uaccess_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != uaccess_flush)
+		uaccess_flush_enable(enable);
+
+	return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+	*val = uaccess_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+	debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
+	debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
+	return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
 #endif /* CONFIG_DEBUG_FS */
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index bee984b1887b6c5c837b6f351e5df2827bbc295f..74a98fff2c2f916b3454a2d700f7592638b37917 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -69,7 +69,6 @@
 #include "setup.h"
 
 #ifdef DEBUG
-#include <asm/udbg.h>
 #define DBG(fmt...) udbg_printf(fmt)
 #else
 #define DBG(fmt...)
@@ -829,7 +828,7 @@ static __init void print_system_info(void)
 }
 
 #ifdef CONFIG_SMP
-static void smp_setup_pacas(void)
+static void __init smp_setup_pacas(void)
 {
 	int cpu;
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 8ba49a6bf5159e7ff57e8143728958cefbd93e5f..d7c1f92152af6720e26d3220d0a2f2c08cdc809f 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -164,7 +164,7 @@ void __init irqstack_early_init(void)
 }
 
 #ifdef CONFIG_VMAP_STACK
-void *emergency_ctx[NR_CPUS] __ro_after_init;
+void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = &init_stack};
 
 void __init emergency_stack_init(void)
 {
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 560ed8b975e77feee406edacc56e8aafa091dc36..b779d25761cf98f8b33ef22400f3708d665eb035 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -232,10 +232,23 @@ static void cpu_ready_for_interrupts(void)
 	 * If we are not in hypervisor mode the job is done once for
 	 * the whole partition in configure_exceptions().
 	 */
-	if (cpu_has_feature(CPU_FTR_HVMODE) &&
-	    cpu_has_feature(CPU_FTR_ARCH_207S)) {
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
 		unsigned long lpcr = mfspr(SPRN_LPCR);
-		mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+		unsigned long new_lpcr = lpcr;
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			/* P10 DD1 does not have HAIL */
+			if (pvr_version_is(PVR_POWER10) &&
+					(mfspr(SPRN_PVR) & 0xf00) == 0x100)
+				new_lpcr |= LPCR_AIL_3;
+			else
+				new_lpcr |= LPCR_HAIL;
+		} else if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+			new_lpcr |= LPCR_AIL_3;
+		}
+
+		if (new_lpcr != lpcr)
+			mtspr(SPRN_LPCR, new_lpcr);
 	}
 
 	/*
@@ -941,266 +954,3 @@ static int __init disable_hardlockup_detector(void)
 	return 0;
 }
 early_initcall(disable_hardlockup_detector);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-static enum l1d_flush_type enabled_flush_types;
-static void *l1d_flush_fallback_area;
-static bool no_rfi_flush;
-static bool no_entry_flush;
-static bool no_uaccess_flush;
-bool rfi_flush;
-bool entry_flush;
-bool uaccess_flush;
-DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
-EXPORT_SYMBOL(uaccess_flush_key);
-
-static int __init handle_no_rfi_flush(char *p)
-{
-	pr_info("rfi-flush: disabled on command line.");
-	no_rfi_flush = true;
-	return 0;
-}
-early_param("no_rfi_flush", handle_no_rfi_flush);
-
-static int __init handle_no_entry_flush(char *p)
-{
-	pr_info("entry-flush: disabled on command line.");
-	no_entry_flush = true;
-	return 0;
-}
-early_param("no_entry_flush", handle_no_entry_flush);
-
-static int __init handle_no_uaccess_flush(char *p)
-{
-	pr_info("uaccess-flush: disabled on command line.");
-	no_uaccess_flush = true;
-	return 0;
-}
-early_param("no_uaccess_flush", handle_no_uaccess_flush);
-
-/*
- * The RFI flush is not KPTI, but because users will see doco that says to use
- * nopti we hijack that option here to also disable the RFI flush.
- */
-static int __init handle_no_pti(char *p)
-{
-	pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
-	handle_no_rfi_flush(NULL);
-	return 0;
-}
-early_param("nopti", handle_no_pti);
-
-static void do_nothing(void *unused)
-{
-	/*
-	 * We don't need to do the flush explicitly, just enter+exit kernel is
-	 * sufficient, the RFI exit handlers will do the right thing.
-	 */
-}
-
-void rfi_flush_enable(bool enable)
-{
-	if (enable) {
-		do_rfi_flush_fixups(enabled_flush_types);
-		on_each_cpu(do_nothing, NULL, 1);
-	} else
-		do_rfi_flush_fixups(L1D_FLUSH_NONE);
-
-	rfi_flush = enable;
-}
-
-static void entry_flush_enable(bool enable)
-{
-	if (enable) {
-		do_entry_flush_fixups(enabled_flush_types);
-		on_each_cpu(do_nothing, NULL, 1);
-	} else {
-		do_entry_flush_fixups(L1D_FLUSH_NONE);
-	}
-
-	entry_flush = enable;
-}
-
-static void uaccess_flush_enable(bool enable)
-{
-	if (enable) {
-		do_uaccess_flush_fixups(enabled_flush_types);
-		static_branch_enable(&uaccess_flush_key);
-		on_each_cpu(do_nothing, NULL, 1);
-	} else {
-		static_branch_disable(&uaccess_flush_key);
-		do_uaccess_flush_fixups(L1D_FLUSH_NONE);
-	}
-
-	uaccess_flush = enable;
-}
-
-static void __ref init_fallback_flush(void)
-{
-	u64 l1d_size, limit;
-	int cpu;
-
-	/* Only allocate the fallback flush area once (at boot time). */
-	if (l1d_flush_fallback_area)
-		return;
-
-	l1d_size = ppc64_caches.l1d.size;
-
-	/*
-	 * If there is no d-cache-size property in the device tree, l1d_size
-	 * could be zero. That leads to the loop in the asm wrapping around to
-	 * 2^64-1, and then walking off the end of the fallback area and
-	 * eventually causing a page fault which is fatal. Just default to
-	 * something vaguely sane.
-	 */
-	if (!l1d_size)
-		l1d_size = (64 * 1024);
-
-	limit = min(ppc64_bolted_size(), ppc64_rma_size);
-
-	/*
-	 * Align to L1d size, and size it at 2x L1d size, to catch possible
-	 * hardware prefetch runoff. We don't have a recipe for load patterns to
-	 * reliably avoid the prefetcher.
-	 */
-	l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
-						l1d_size, MEMBLOCK_LOW_LIMIT,
-						limit, NUMA_NO_NODE);
-	if (!l1d_flush_fallback_area)
-		panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
-		      __func__, l1d_size * 2, l1d_size, &limit);
-
-
-	for_each_possible_cpu(cpu) {
-		struct paca_struct *paca = paca_ptrs[cpu];
-		paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
-		paca->l1d_flush_size = l1d_size;
-	}
-}
-
-void setup_rfi_flush(enum l1d_flush_type types, bool enable)
-{
-	if (types & L1D_FLUSH_FALLBACK) {
-		pr_info("rfi-flush: fallback displacement flush available\n");
-		init_fallback_flush();
-	}
-
-	if (types & L1D_FLUSH_ORI)
-		pr_info("rfi-flush: ori type flush available\n");
-
-	if (types & L1D_FLUSH_MTTRIG)
-		pr_info("rfi-flush: mttrig type flush available\n");
-
-	enabled_flush_types = types;
-
-	if (!cpu_mitigations_off() && !no_rfi_flush)
-		rfi_flush_enable(enable);
-}
-
-void setup_entry_flush(bool enable)
-{
-	if (cpu_mitigations_off())
-		return;
-
-	if (!no_entry_flush)
-		entry_flush_enable(enable);
-}
-
-void setup_uaccess_flush(bool enable)
-{
-	if (cpu_mitigations_off())
-		return;
-
-	if (!no_uaccess_flush)
-		uaccess_flush_enable(enable);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static int rfi_flush_set(void *data, u64 val)
-{
-	bool enable;
-
-	if (val == 1)
-		enable = true;
-	else if (val == 0)
-		enable = false;
-	else
-		return -EINVAL;
-
-	/* Only do anything if we're changing state */
-	if (enable != rfi_flush)
-		rfi_flush_enable(enable);
-
-	return 0;
-}
-
-static int rfi_flush_get(void *data, u64 *val)
-{
-	*val = rfi_flush ? 1 : 0;
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
-
-static int entry_flush_set(void *data, u64 val)
-{
-	bool enable;
-
-	if (val == 1)
-		enable = true;
-	else if (val == 0)
-		enable = false;
-	else
-		return -EINVAL;
-
-	/* Only do anything if we're changing state */
-	if (enable != entry_flush)
-		entry_flush_enable(enable);
-
-	return 0;
-}
-
-static int entry_flush_get(void *data, u64 *val)
-{
-	*val = entry_flush ? 1 : 0;
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
-
-static int uaccess_flush_set(void *data, u64 val)
-{
-	bool enable;
-
-	if (val == 1)
-		enable = true;
-	else if (val == 0)
-		enable = false;
-	else
-		return -EINVAL;
-
-	/* Only do anything if we're changing state */
-	if (enable != uaccess_flush)
-		uaccess_flush_enable(enable);
-
-	return 0;
-}
-
-static int uaccess_flush_get(void *data, u64 *val)
-{
-	*val = uaccess_flush ? 1 : 0;
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
-
-static __init int rfi_flush_debugfs_init(void)
-{
-	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
-	debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
-	debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
-	return 0;
-}
-device_initcall(rfi_flush_debugfs_init);
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 2559a681536eaa2a4bb739ab42a5bc45e9e3abb4..f4aafa337c2edbcc26dbd82df8059f0555cd2257 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -19,6 +19,15 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 			      struct task_struct *tsk);
 
+static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src)
+{
+	BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64));
+
+	return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+}
+#define unsafe_get_user_sigset(dst, src, label) \
+	unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label)
+
 #ifdef CONFIG_VSX
 extern unsigned long copy_vsx_to_user(void __user *to,
 				      struct task_struct *task);
@@ -53,6 +62,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
 				&buf[i], label);\
 } while (0)
 
+#define unsafe_copy_fpr_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NFPREG - 1; i++)				\
+		unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \
+	unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label);	\
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
+		unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+				&buf[i], label);			\
+} while (0)
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 #define unsafe_copy_ckfpr_to_user(to, task, label)	do {		\
 	struct task_struct *__t = task;					\
@@ -73,6 +102,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
 		unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
 				&buf[i], label);\
 } while (0)
+
+#define unsafe_copy_ckfpr_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NFPREG - 1 ; i++)				\
+		unsafe_get_user(__t->thread.TS_CKFPR(i), &buf[i], label);\
+	unsafe_get_user(__t->thread.ckfp_state.fpscr, &buf[i], failed);	\
+} while (0)
+
+#define unsafe_copy_ckvsx_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
+		unsafe_get_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
+				&buf[i], label);			\
+} while (0)
 #endif
 #elif defined(CONFIG_PPC_FPU_REGS)
 
@@ -80,6 +129,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
 	unsafe_copy_to_user(to, (task)->thread.fp_state.fpr,	\
 			    ELF_NFPREG * sizeof(double), label)
 
+#define unsafe_copy_fpr_from_user(task, from, label)			\
+	unsafe_copy_from_user((task)->thread.fp_state.fpr, from,	\
+			    ELF_NFPREG * sizeof(double), label)
+
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
 {
@@ -115,6 +168,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
 #else
 #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
 
+#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
 {
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index f651b992fe016ffe78a7442522ac2f432323df61..8f05ed0da292b09e4303919d0f51456f43da3d4a 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -83,24 +83,17 @@
  * implementation that makes things simple for little endian only)
  */
 #define unsafe_put_sigset_t	unsafe_put_compat_sigset
-
-static inline int get_sigset_t(sigset_t *set,
-			       const compat_sigset_t __user *uset)
-{
-	return get_compat_sigset(set, uset);
-}
+#define unsafe_get_sigset_t	unsafe_get_compat_sigset
 
 #define to_user_ptr(p)		ptr_to_compat(p)
 #define from_user_ptr(p)	compat_ptr(p)
 
 static __always_inline int
-save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame)
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
 {
 	elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
 	int val, i;
 
-	WARN_ON(!FULL_REGS(regs));
-
 	for (i = 0; i <= PT_RESULT; i ++) {
 		/* Force usr to alway see softe as 1 (interrupts enabled) */
 		if (i == PT_SOFTE)
@@ -116,8 +109,8 @@ save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame)
 	return 1;
 }
 
-static inline int restore_general_regs(struct pt_regs *regs,
-		struct mcontext __user *sr)
+static __always_inline int
+__unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
 {
 	elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
 	int i;
@@ -125,10 +118,12 @@ static inline int restore_general_regs(struct pt_regs *regs,
 	for (i = 0; i <= PT_RESULT; i++) {
 		if ((i == PT_MSR) || (i == PT_SOFTE))
 			continue;
-		if (__get_user(gregs[i], &sr->mc_gregs[i]))
-			return -EFAULT;
+		unsafe_get_user(gregs[i], &sr->mc_gregs[i], failed);
 	}
 	return 0;
+
+failed:
+	return 1;
 }
 
 #else /* CONFIG_PPC64 */
@@ -142,18 +137,14 @@ static inline int restore_general_regs(struct pt_regs *regs,
 	unsafe_copy_to_user(__us, __s, sizeof(*__us), label);		\
 } while (0)
 
-static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset)
-{
-	return copy_from_user(set, uset, sizeof(*uset));
-}
+#define unsafe_get_sigset_t	unsafe_get_user_sigset
 
 #define to_user_ptr(p)		((unsigned long)(p))
 #define from_user_ptr(p)	((void __user *)(p))
 
 static __always_inline int
-save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame)
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
 {
-	WARN_ON(!FULL_REGS(regs));
 	unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed);
 	return 0;
 
@@ -161,23 +152,30 @@ save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame)
 	return 1;
 }
 
-static inline int restore_general_regs(struct pt_regs *regs,
-		struct mcontext __user *sr)
+static __always_inline
+int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
 {
 	/* copy up to but not including MSR */
-	if (__copy_from_user(regs, &sr->mc_gregs,
-				PT_MSR * sizeof(elf_greg_t)))
-		return -EFAULT;
+	unsafe_copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t), failed);
+
 	/* copy from orig_r3 (the word after the MSR) up to the end */
-	if (__copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
-				GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t)))
-		return -EFAULT;
+	unsafe_copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
+			      GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t), failed);
+
 	return 0;
+
+failed:
+	return 1;
 }
 #endif
 
 #define unsafe_save_general_regs(regs, frame, label) do {	\
-	if (save_general_regs_unsafe(regs, frame))	\
+	if (__unsafe_save_general_regs(regs, frame))		\
+		goto label;					\
+} while (0)
+
+#define unsafe_restore_general_regs(regs, frame, label) do {	\
+	if (__unsafe_restore_general_regs(regs, frame))		\
 		goto label;					\
 } while (0)
 
@@ -260,8 +258,8 @@ static void prepare_save_user_regs(int ctx_has_vsx_region)
 #endif
 }
 
-static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
-				 struct mcontext __user *tm_frame, int ctx_has_vsx_region)
+static int __unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
+				   struct mcontext __user *tm_frame, int ctx_has_vsx_region)
 {
 	unsigned long msr = regs->msr;
 
@@ -338,7 +336,7 @@ static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *f
 }
 
 #define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \
-	if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx))	\
+	if (__unsafe_save_user_regs(regs, frame, tm_frame, has_vsx))	\
 		goto label;						\
 } while (0)
 
@@ -350,7 +348,7 @@ static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *f
  * We also save the transactional registers to a second ucontext in the
  * frame.
  *
- * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts().
+ * See __unsafe_save_user_regs() and signal_64.c:setup_tm_sigcontexts().
  */
 static void prepare_save_tm_user_regs(void)
 {
@@ -441,7 +439,7 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
 	/* SPE regs are not checkpointed with TM, so this section is
-	 * simply the same as in save_user_regs_unsafe().
+	 * simply the same as in __unsafe_save_user_regs().
 	 */
 	if (current->thread.used_spe) {
 		unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr,
@@ -485,26 +483,25 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user
 static long restore_user_regs(struct pt_regs *regs,
 			      struct mcontext __user *sr, int sig)
 {
-	long err;
 	unsigned int save_r2 = 0;
 	unsigned long msr;
 #ifdef CONFIG_VSX
 	int i;
 #endif
 
+	if (!user_read_access_begin(sr, sizeof(*sr)))
+		return 1;
 	/*
 	 * restore general registers but not including MSR or SOFTE. Also
 	 * take care of keeping r2 (TLS) intact if not a signal
 	 */
 	if (!sig)
 		save_r2 = (unsigned int)regs->gpr[2];
-	err = restore_general_regs(regs, sr);
+	unsafe_restore_general_regs(regs, sr, failed);
 	set_trap_norestart(regs);
-	err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
+	unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
 	if (!sig)
 		regs->gpr[2] = (unsigned long) save_r2;
-	if (err)
-		return 1;
 
 	/* if doing signal return, restore the previous little-endian mode */
 	if (sig)
@@ -518,22 +515,19 @@ static long restore_user_regs(struct pt_regs *regs,
 	regs->msr &= ~MSR_VEC;
 	if (msr & MSR_VEC) {
 		/* restore altivec registers from the stack */
-		if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
-				     sizeof(sr->mc_vregs)))
-			return 1;
+		unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
+				      sizeof(sr->mc_vregs), failed);
 		current->thread.used_vr = true;
 	} else if (current->thread.used_vr)
 		memset(&current->thread.vr_state, 0,
 		       ELF_NVRREG * sizeof(vector128));
 
 	/* Always get VRSAVE back */
-	if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
-		return 1;
+	unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		mtspr(SPRN_VRSAVE, current->thread.vrsave);
 #endif /* CONFIG_ALTIVEC */
-	if (copy_fpr_from_user(current, &sr->mc_fregs))
-		return 1;
+	unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
 #ifdef CONFIG_VSX
 	/*
@@ -546,8 +540,7 @@ static long restore_user_regs(struct pt_regs *regs,
 		 * Restore altivec registers from the stack to a local
 		 * buffer, then write this out to the thread_struct
 		 */
-		if (copy_vsx_from_user(current, &sr->mc_vsregs))
-			return 1;
+		unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
 		current->thread.used_vsr = true;
 	} else if (current->thread.used_vsr)
 		for (i = 0; i < 32 ; i++)
@@ -565,19 +558,22 @@ static long restore_user_regs(struct pt_regs *regs,
 	regs->msr &= ~MSR_SPE;
 	if (msr & MSR_SPE) {
 		/* restore spe registers from the stack */
-		if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
-				     ELF_NEVRREG * sizeof(u32)))
-			return 1;
+		unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
+				      ELF_NEVRREG * sizeof(u32), failed);
 		current->thread.used_spe = true;
 	} else if (current->thread.used_spe)
 		memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
 
 	/* Always get SPEFSCR back */
-	if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG))
-		return 1;
+	unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
 #endif /* CONFIG_SPE */
 
+	user_read_access_end();
 	return 0;
+
+failed:
+	user_read_access_end();
+	return 1;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -590,7 +586,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 				 struct mcontext __user *sr,
 				 struct mcontext __user *tm_sr)
 {
-	long err;
 	unsigned long msr, msr_hi;
 #ifdef CONFIG_VSX
 	int i;
@@ -605,15 +600,13 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	 * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
 	 * were set by the signal delivery.
 	 */
-	err = restore_general_regs(regs, tm_sr);
-	err |= restore_general_regs(&current->thread.ckpt_regs, sr);
-
-	err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]);
-
-	err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
-	if (err)
+	if (!user_read_access_begin(sr, sizeof(*sr)))
 		return 1;
 
+	unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
+	unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
+	unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+
 	/* Restore the previous little-endian mode */
 	regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
 
@@ -621,12 +614,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	regs->msr &= ~MSR_VEC;
 	if (msr & MSR_VEC) {
 		/* restore altivec registers from the stack */
-		if (__copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
-				     sizeof(sr->mc_vregs)) ||
-		    __copy_from_user(&current->thread.vr_state,
-				     &tm_sr->mc_vregs,
-				     sizeof(sr->mc_vregs)))
-			return 1;
+		unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
+				      sizeof(sr->mc_vregs), failed);
 		current->thread.used_vr = true;
 	} else if (current->thread.used_vr) {
 		memset(&current->thread.vr_state, 0,
@@ -636,20 +625,15 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	}
 
 	/* Always get VRSAVE back */
-	if (__get_user(current->thread.ckvrsave,
-		       (u32 __user *)&sr->mc_vregs[32]) ||
-	    __get_user(current->thread.vrsave,
-		       (u32 __user *)&tm_sr->mc_vregs[32]))
-		return 1;
+	unsafe_get_user(current->thread.ckvrsave,
+			(u32 __user *)&sr->mc_vregs[32], failed);
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
 #endif /* CONFIG_ALTIVEC */
 
 	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
 
-	if (copy_fpr_from_user(current, &sr->mc_fregs) ||
-	    copy_ckfpr_from_user(current, &tm_sr->mc_fregs))
-		return 1;
+	unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
 #ifdef CONFIG_VSX
 	regs->msr &= ~MSR_VSX;
@@ -658,9 +642,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 		 * Restore altivec registers from the stack to a local
 		 * buffer, then write this out to the thread_struct
 		 */
-		if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) ||
-		    copy_ckvsx_from_user(current, &sr->mc_vsregs))
-			return 1;
+		unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
 		current->thread.used_vsr = true;
 	} else if (current->thread.used_vsr)
 		for (i = 0; i < 32 ; i++) {
@@ -675,23 +657,54 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	 */
 	regs->msr &= ~MSR_SPE;
 	if (msr & MSR_SPE) {
-		if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
-				     ELF_NEVRREG * sizeof(u32)))
-			return 1;
+		unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
+				      ELF_NEVRREG * sizeof(u32), failed);
 		current->thread.used_spe = true;
 	} else if (current->thread.used_spe)
 		memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
 
 	/* Always get SPEFSCR back */
-	if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs
-		       + ELF_NEVRREG))
-		return 1;
+	unsafe_get_user(current->thread.spefscr,
+			(u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
 #endif /* CONFIG_SPE */
 
-	/* Get the top half of the MSR from the user context */
-	if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
+	user_read_access_end();
+
+	if (!user_read_access_begin(tm_sr, sizeof(*tm_sr)))
 		return 1;
+
+	unsafe_restore_general_regs(regs, tm_sr, failed);
+
+#ifdef CONFIG_ALTIVEC
+	/* restore altivec registers from the stack */
+	if (msr & MSR_VEC)
+		unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
+				      sizeof(sr->mc_vregs), failed);
+
+	/* Always get VRSAVE back */
+	unsafe_get_user(current->thread.vrsave,
+			(u32 __user *)&tm_sr->mc_vregs[32], failed);
+#endif /* CONFIG_ALTIVEC */
+
+	unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
+
+#ifdef CONFIG_VSX
+	if (msr & MSR_VSX) {
+		/*
+		 * Restore altivec registers from the stack to a local
+		 * buffer, then write this out to the thread_struct
+		 */
+		unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
+		current->thread.used_vsr = true;
+	}
+#endif /* CONFIG_VSX */
+
+	/* Get the top half of the MSR from the user context */
+	unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
 	msr_hi <<= 32;
+
+	user_read_access_end();
+
 	/* If TM bits are set to the reserved value, it's an invalid context */
 	if (MSR_TM_RESV(msr_hi))
 		return 1;
@@ -739,6 +752,16 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	preempt_enable();
 
 	return 0;
+
+failed:
+	user_read_access_end();
+	return 1;
+}
+#else
+static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr,
+				 struct mcontext __user *tm_sr)
+{
+	return 0;
 }
 #endif
 
@@ -944,28 +967,31 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
 	sigset_t set;
 	struct mcontext __user *mcp;
 
-	if (get_sigset_t(&set, &ucp->uc_sigmask))
+	if (!user_read_access_begin(ucp, sizeof(*ucp)))
 		return -EFAULT;
+
+	unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
 #ifdef CONFIG_PPC64
 	{
 		u32 cmcp;
 
-		if (__get_user(cmcp, &ucp->uc_regs))
-			return -EFAULT;
+		unsafe_get_user(cmcp, &ucp->uc_regs, failed);
 		mcp = (struct mcontext __user *)(u64)cmcp;
-		/* no need to check access_ok(mcp), since mcp < 4GB */
 	}
 #else
-	if (__get_user(mcp, &ucp->uc_regs))
-		return -EFAULT;
-	if (!access_ok(mcp, sizeof(*mcp)))
-		return -EFAULT;
+	unsafe_get_user(mcp, &ucp->uc_regs, failed);
 #endif
+	user_read_access_end();
+
 	set_current_blocked(&set);
 	if (restore_user_regs(regs, mcp, sig))
 		return -EFAULT;
 
 	return 0;
+
+failed:
+	user_read_access_end();
+	return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -979,11 +1005,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
 	u32 cmcp;
 	u32 tm_cmcp;
 
-	if (get_sigset_t(&set, &ucp->uc_sigmask))
+	if (!user_read_access_begin(ucp, sizeof(*ucp)))
 		return -EFAULT;
 
-	if (__get_user(cmcp, &ucp->uc_regs) ||
-	    __get_user(tm_cmcp, &tm_ucp->uc_regs))
+	unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+	unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+
+	user_read_access_end();
+
+	if (__get_user(tm_cmcp, &tm_ucp->uc_regs))
 		return -EFAULT;
 	mcp = (struct mcontext __user *)(u64)cmcp;
 	tm_mcp = (struct mcontext __user *)(u64)tm_cmcp;
@@ -994,6 +1024,10 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
 		return -EFAULT;
 
 	return 0;
+
+failed:
+	user_read_access_end();
+	return -EFAULT;
 }
 #endif
 
@@ -1311,19 +1345,16 @@ SYSCALL_DEFINE0(sigreturn)
 	struct sigcontext __user *sc;
 	struct sigcontext sigctx;
 	struct mcontext __user *sr;
-	void __user *addr;
 	sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	struct mcontext __user *mcp, *tm_mcp;
-	unsigned long msr_hi;
-#endif
+	struct mcontext __user *mcp;
+	struct mcontext __user *tm_mcp = NULL;
+	unsigned long long msr_hi = 0;
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current->restart_block.fn = do_no_restart_syscall;
 
 	sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
 	sc = &sf->sctx;
-	addr = sc;
 	if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
 		goto badframe;
 
@@ -1339,31 +1370,32 @@ SYSCALL_DEFINE0(sigreturn)
 #endif
 	set_current_blocked(&set);
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	mcp = (struct mcontext __user *)&sf->mctx;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
 	if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
 		goto badframe;
+#endif
 	if (MSR_TM_ACTIVE(msr_hi<<32)) {
 		if (!cpu_has_feature(CPU_FTR_TM))
 			goto badframe;
 		if (restore_tm_user_regs(regs, mcp, tm_mcp))
 			goto badframe;
-	} else
-#endif
-	{
+	} else {
 		sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
-		addr = sr;
-		if (!access_ok(sr, sizeof(*sr))
-		    || restore_user_regs(regs, sr, 1))
-			goto badframe;
+		if (restore_user_regs(regs, sr, 1)) {
+			signal_fault(current, regs, "sys_sigreturn", sr);
+
+			force_sig(SIGSEGV);
+			return 0;
+		}
 	}
 
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
 
 badframe:
-	signal_fault(current, regs, "sys_sigreturn", addr);
+	signal_fault(current, regs, "sys_sigreturn", sc);
 
 	force_sig(SIGSEGV);
 	return 0;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index f9e4a1ac440fb094718ee6402f38762a6e9c45de..dca66481d0c21dad54b511b40941531623e70808 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -79,13 +79,36 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
 }
 #endif
 
+static void prepare_setup_sigcontext(struct task_struct *tsk)
+{
+#ifdef CONFIG_ALTIVEC
+	/* save altivec registers */
+	if (tsk->thread.used_vr)
+		flush_altivec_to_thread(tsk);
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+	flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+	if (tsk->thread.used_vsr)
+		flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
+
 /*
  * Set up the sigcontext for the signal frame.
  */
 
-static long setup_sigcontext(struct sigcontext __user *sc,
-		struct task_struct *tsk, int signr, sigset_t *set,
-		unsigned long handler, int ctx_has_vsx_region)
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region, label)\
+do {											\
+	if (__unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region))\
+		goto label;								\
+} while (0)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+					struct task_struct *tsk, int signr, sigset_t *set,
+					unsigned long handler, int ctx_has_vsx_region)
 {
 	/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
 	 * process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -97,25 +120,22 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	 */
 #ifdef CONFIG_ALTIVEC
 	elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
-	unsigned long vrsave;
 #endif
 	struct pt_regs *regs = tsk->thread.regs;
 	unsigned long msr = regs->msr;
-	long err = 0;
 	/* Force usr to alway see softe as 1 (interrupts enabled) */
 	unsigned long softe = 0x1;
 
 	BUG_ON(tsk != current);
 
 #ifdef CONFIG_ALTIVEC
-	err |= __put_user(v_regs, &sc->v_regs);
+	unsafe_put_user(v_regs, &sc->v_regs, efault_out);
 
 	/* save altivec registers */
 	if (tsk->thread.used_vr) {
-		flush_altivec_to_thread(tsk);
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
-		err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
-				      33 * sizeof(vector128));
+		unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+				    33 * sizeof(vector128), efault_out);
 		/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
 		 * contains valid data.
 		 */
@@ -124,19 +144,12 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	/* We always copy to/from vrsave, it's 0 if we don't have or don't
 	 * use altivec.
 	 */
-	vrsave = 0;
-	if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
-		vrsave = mfspr(SPRN_VRSAVE);
-		tsk->thread.vrsave = vrsave;
-	}
-
-	err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
+	unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
 #else /* CONFIG_ALTIVEC */
-	err |= __put_user(0, &sc->v_regs);
+	unsafe_put_user(0, &sc->v_regs, efault_out);
 #endif /* CONFIG_ALTIVEC */
-	flush_fp_to_thread(tsk);
 	/* copy fpr regs and fpscr */
-	err |= copy_fpr_to_user(&sc->fp_regs, tsk);
+	unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
 
 	/*
 	 * Clear the MSR VSX bit to indicate there is no valid state attached
@@ -150,26 +163,27 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	 * VMX data.
 	 */
 	if (tsk->thread.used_vsr && ctx_has_vsx_region) {
-		flush_vsx_to_thread(tsk);
 		v_regs += ELF_NVRREG;
-		err |= copy_vsx_to_user(v_regs, tsk);
+		unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
 		/* set MSR_VSX in the MSR value in the frame to
 		 * indicate that sc->vs_reg) contains valid data.
 		 */
 		msr |= MSR_VSX;
 	}
 #endif /* CONFIG_VSX */
-	err |= __put_user(&sc->gp_regs, &sc->regs);
-	WARN_ON(!FULL_REGS(regs));
-	err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
-	err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
-	err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
-	err |= __put_user(signr, &sc->signal);
-	err |= __put_user(handler, &sc->handler);
+	unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
+	unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+	unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+	unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+	unsafe_put_user(signr, &sc->signal, efault_out);
+	unsafe_put_user(handler, &sc->handler, efault_out);
 	if (set != NULL)
-		err |=  __put_user(set->sig[0], &sc->oldmask);
+		unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
 
-	return err;
+	return 0;
+
+efault_out:
+	return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -294,7 +308,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 
 	err |= __put_user(&sc->gp_regs, &sc->regs);
 	err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs);
-	WARN_ON(!FULL_REGS(regs));
 	err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE);
 	err |= __copy_to_user(&sc->gp_regs,
 			      &tsk->thread.ckpt_regs, GP_REGS_SIZE);
@@ -312,14 +325,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 /*
  * Restore the sigcontext from the signal frame.
  */
-
-static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
-			      struct sigcontext __user *sc)
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, label) do {	\
+	if (__unsafe_restore_sigcontext(tsk, set, sig, sc))		\
+		goto label;						\
+} while (0)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+						int sig, struct sigcontext __user *sc)
 {
 #ifdef CONFIG_ALTIVEC
 	elf_vrreg_t __user *v_regs;
 #endif
-	unsigned long err = 0;
 	unsigned long save_r13 = 0;
 	unsigned long msr;
 	struct pt_regs *regs = tsk->thread.regs;
@@ -334,27 +349,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 		save_r13 = regs->gpr[13];
 
 	/* copy the GPRs */
-	err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
-	err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+	unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr), efault_out);
+	unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
 	/* get MSR separately, transfer the LE bit if doing signal return */
-	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+	unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
 	if (sig)
 		regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
-	err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
-	err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
-	err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
-	err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
-	err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
+	unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+	unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+	unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+	unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+	unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
 	/* Don't allow userspace to set SOFTE */
 	set_trap_norestart(regs);
-	err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
-	err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
-	err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+	unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+	unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+	unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
 
 	if (!sig)
 		regs->gpr[13] = save_r13;
 	if (set != NULL)
-		err |=  __get_user(set->sig[0], &sc->oldmask);
+		unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
 
 	/*
 	 * Force reload of FP/VEC.
@@ -364,29 +379,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
 
 #ifdef CONFIG_ALTIVEC
-	err |= __get_user(v_regs, &sc->v_regs);
-	if (err)
-		return err;
+	unsafe_get_user(v_regs, &sc->v_regs, efault_out);
 	if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
 		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != NULL && (msr & MSR_VEC) != 0) {
-		err |= __copy_from_user(&tsk->thread.vr_state, v_regs,
-					33 * sizeof(vector128));
+		unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+				      33 * sizeof(vector128), efault_out);
 		tsk->thread.used_vr = true;
 	} else if (tsk->thread.used_vr) {
 		memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
 	}
 	/* Always get VRSAVE back */
 	if (v_regs != NULL)
-		err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+		unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
 	else
 		tsk->thread.vrsave = 0;
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
 #endif /* CONFIG_ALTIVEC */
 	/* restore floating point */
-	err |= copy_fpr_from_user(tsk, &sc->fp_regs);
+	unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
 #ifdef CONFIG_VSX
 	/*
 	 * Get additional VSX data. Update v_regs to point after the
@@ -395,14 +408,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 	 */
 	v_regs += ELF_NVRREG;
 	if ((msr & MSR_VSX) != 0) {
-		err |= copy_vsx_from_user(tsk, v_regs);
+		unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
 		tsk->thread.used_vsr = true;
 	} else {
 		for (i = 0; i < 32 ; i++)
 			tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
 	}
 #endif
-	return err;
+	return 0;
+
+efault_out:
+	return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -586,6 +602,12 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
 
 	return err;
 }
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
+static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc,
+				   struct sigcontext __user *tm_sc)
+{
+	return -EINVAL;
+}
 #endif
 
 /*
@@ -655,12 +677,16 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		ctx_has_vsx_region = 1;
 
 	if (old_ctx != NULL) {
-		if (!access_ok(old_ctx, ctx_size)
-		    || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
-					ctx_has_vsx_region)
-		    || __copy_to_user(&old_ctx->uc_sigmask,
-				      &current->blocked, sizeof(sigset_t)))
+		prepare_setup_sigcontext(current);
+		if (!user_write_access_begin(old_ctx, ctx_size))
 			return -EFAULT;
+
+		unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+					0, ctx_has_vsx_region, efault_out);
+		unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+				    sizeof(sigset_t), efault_out);
+
+		user_write_access_end();
 	}
 	if (new_ctx == NULL)
 		return 0;
@@ -680,15 +706,25 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 * We kill the task with a SIGSEGV in this situation.
 	 */
 
-	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
+	if (__get_user_sigset(&set, &new_ctx->uc_sigmask))
 		do_exit(SIGSEGV);
 	set_current_blocked(&set);
-	if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext))
+
+	if (!user_read_access_begin(new_ctx, ctx_size))
+		return -EFAULT;
+	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+		user_read_access_end();
 		do_exit(SIGSEGV);
+	}
+	user_read_access_end();
 
 	/* This returns like rt_sigreturn */
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
+
+efault_out:
+	user_write_access_end();
+	return -EFAULT;
 }
 
 
@@ -701,9 +737,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	struct pt_regs *regs = current_pt_regs();
 	struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
 	sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	unsigned long msr;
-#endif
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current->restart_block.fn = do_no_restart_syscall;
@@ -711,52 +745,54 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	if (!access_ok(uc, sizeof(*uc)))
 		goto badframe;
 
-	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+	if (__get_user_sigset(&set, &uc->uc_sigmask))
 		goto badframe;
 	set_current_blocked(&set);
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/*
-	 * If there is a transactional state then throw it away.
-	 * The purpose of a sigreturn is to destroy all traces of the
-	 * signal frame, this includes any transactional state created
-	 * within in. We only check for suspended as we can never be
-	 * active in the kernel, we are active, there is nothing better to
-	 * do than go ahead and Bad Thing later.
-	 * The cause is not important as there will never be a
-	 * recheckpoint so it's not user visible.
-	 */
-	if (MSR_TM_SUSPENDED(mfmsr()))
-		tm_reclaim_current(0);
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) {
+		/*
+		 * If there is a transactional state then throw it away.
+		 * The purpose of a sigreturn is to destroy all traces of the
+		 * signal frame, this includes any transactional state created
+		 * within in. We only check for suspended as we can never be
+		 * active in the kernel, we are active, there is nothing better to
+		 * do than go ahead and Bad Thing later.
+		 * The cause is not important as there will never be a
+		 * recheckpoint so it's not user visible.
+		 */
+		if (MSR_TM_SUSPENDED(mfmsr()))
+			tm_reclaim_current(0);
 
-	/*
-	 * Disable MSR[TS] bit also, so, if there is an exception in the
-	 * code below (as a page fault in copy_ckvsx_to_user()), it does
-	 * not recheckpoint this task if there was a context switch inside
-	 * the exception.
-	 *
-	 * A major page fault can indirectly call schedule(). A reschedule
-	 * process in the middle of an exception can have a side effect
-	 * (Changing the CPU MSR[TS] state), since schedule() is called
-	 * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
-	 * (switch_to() calls tm_recheckpoint() for the 'new' process). In
-	 * this case, the process continues to be the same in the CPU, but
-	 * the CPU state just changed.
-	 *
-	 * This can cause a TM Bad Thing, since the MSR in the stack will
-	 * have the MSR[TS]=0, and this is what will be used to RFID.
-	 *
-	 * Clearing MSR[TS] state here will avoid a recheckpoint if there
-	 * is any process reschedule in kernel space. The MSR[TS] state
-	 * does not need to be saved also, since it will be replaced with
-	 * the MSR[TS] that came from user context later, at
-	 * restore_tm_sigcontexts.
-	 */
-	regs->msr &= ~MSR_TS_MASK;
+		/*
+		 * Disable MSR[TS] bit also, so, if there is an exception in the
+		 * code below (as a page fault in copy_ckvsx_to_user()), it does
+		 * not recheckpoint this task if there was a context switch inside
+		 * the exception.
+		 *
+		 * A major page fault can indirectly call schedule(). A reschedule
+		 * process in the middle of an exception can have a side effect
+		 * (Changing the CPU MSR[TS] state), since schedule() is called
+		 * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+		 * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+		 * this case, the process continues to be the same in the CPU, but
+		 * the CPU state just changed.
+		 *
+		 * This can cause a TM Bad Thing, since the MSR in the stack will
+		 * have the MSR[TS]=0, and this is what will be used to RFID.
+		 *
+		 * Clearing MSR[TS] state here will avoid a recheckpoint if there
+		 * is any process reschedule in kernel space. The MSR[TS] state
+		 * does not need to be saved also, since it will be replaced with
+		 * the MSR[TS] that came from user context later, at
+		 * restore_tm_sigcontexts.
+		 */
+		regs->msr &= ~MSR_TS_MASK;
 
-	if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
-		goto badframe;
-	if (MSR_TM_ACTIVE(msr)) {
+		if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+			goto badframe;
+	}
+
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) {
 		/* We recheckpoint on return. */
 		struct ucontext __user *uc_transact;
 
@@ -769,9 +805,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
 					   &uc_transact->uc_mcontext))
 			goto badframe;
-	} else
-#endif
-	{
+	} else {
 		/*
 		 * Fall through, for non-TM restore
 		 *
@@ -785,8 +819,13 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		 * causing a TM bad thing.
 		 */
 		current->thread.regs->msr &= ~MSR_TS_MASK;
-		if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+		if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
 			goto badframe;
+
+		unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
+					  badframe_block);
+
+		user_read_access_end();
 	}
 
 	if (restore_altstack(&uc->uc_stack))
@@ -795,6 +834,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
 
+badframe_block:
+	user_read_access_end();
 badframe:
 	signal_fault(current, regs, "rt_sigreturn", uc);
 
@@ -809,46 +850,57 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	unsigned long newsp = 0;
 	long err = 0;
 	struct pt_regs *regs = tsk->thread.regs;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
 	unsigned long msr = regs->msr;
-#endif
 
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
-	if (!access_ok(frame, sizeof(*frame)))
-		goto badframe;
 
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
-	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
-	if (err)
+	/*
+	 * This only applies when calling unsafe_setup_sigcontext() and must be
+	 * called before opening the uaccess window.
+	 */
+	if (!MSR_TM_ACTIVE(msr))
+		prepare_setup_sigcontext(tsk);
+
+	if (!user_write_access_begin(frame, sizeof(*frame)))
 		goto badframe;
 
+	unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+	unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
+
 	if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 		/* The ucontext_t passed to userland points to the second
 		 * ucontext_t (for transactional state) with its uc_link ptr.
 		 */
-		err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+		user_write_access_end();
+
 		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
 					    &frame->uc_transact.uc_mcontext,
 					    tsk, ksig->sig, NULL,
 					    (unsigned long)ksig->ka.sa.sa_handler,
 					    msr);
-	} else
+
+		if (!user_write_access_begin(&frame->uc.uc_sigmask,
+					     sizeof(frame->uc.uc_sigmask)))
+			goto badframe;
+
 #endif
-	{
-		err |= __put_user(0, &frame->uc.uc_link);
-		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+	} else {
+		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+		unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
 					NULL, (unsigned long)ksig->ka.sa.sa_handler,
-					1);
+					1, badframe_block);
 	}
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
-		goto badframe;
+
+	unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+	user_write_access_end();
 
 	/* Make sure signal handler doesn't get spurious FP exceptions */
 	tsk->thread.fp_state.fpscr = 0;
@@ -863,6 +915,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		regs->nip = (unsigned long) &frame->tramp[0];
 	}
 
+
+	/* Save the siginfo outside of the unsafe block. */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		goto badframe;
+
 	/* Allocate a dummy caller frame for the signal handler. */
 	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
 	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
@@ -902,6 +959,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 	return 0;
 
+badframe_block:
+	user_write_access_end();
 badframe:
 	signal_fault(current, regs, "handle_rt_signal64", frame);
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5a4d59a1070d5a3b32f65f7ea9de4c436c08527a..2e05c783440a33c3ed82bc19486736fb1be615db 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -83,7 +83,7 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
-DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
+static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
 
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
@@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata;
  * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
  * the set its siblings that share the L1-cache.
  */
-DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
 
 /*
  * On some big-cores system, thread_group_l2_cache_map for each CPU
  * corresponds to the set its siblings within the core that share the
  * L2-cache.
  */
-DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
 
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
@@ -1057,17 +1057,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 				local_memory_node(numa_cpu_lookup_table[cpu]));
 		}
 #endif
-		/*
-		 * cpu_core_map is now more updated and exists only since
-		 * its been exported for long. It only will have a snapshot
-		 * of cpu_cpu_mask.
-		 */
-		cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
 	}
 
 	/* Init the cpumasks so the boot CPU is related to itself */
 	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
 	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
+	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
 
 	if (has_coregroup_support())
 		cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
@@ -1078,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 				cpu_smallcore_mask(boot_cpuid));
 	}
 
+	if (cpu_to_chip_id(boot_cpuid) != -1) {
+		int idx = num_possible_cpus() / threads_per_core;
+
+		/*
+		 * All threads of a core will all belong to the same core,
+		 * chip_id_lookup_table will have one entry per core.
+		 * Assumption: if boot_cpuid doesn't have a chip-id, then no
+		 * other CPUs, will also not have chip-id.
+		 */
+		chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
+		if (chip_id_lookup_table)
+			memset(chip_id_lookup_table, -1, sizeof(int) * idx);
+	}
+
 	if (smp_ops && smp_ops->probe)
 		smp_ops->probe();
 }
@@ -1408,6 +1417,9 @@ static void remove_cpu_from_masks(int cpu)
 			set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
 	}
 
+	for_each_cpu(i, cpu_core_mask(cpu))
+		set_cpus_unrelated(cpu, i, cpu_core_mask);
+
 	if (has_coregroup_support()) {
 		for_each_cpu(i, cpu_coregroup_mask(cpu))
 			set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
@@ -1468,8 +1480,11 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
 
 static void add_cpu_to_masks(int cpu)
 {
+	struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
 	int first_thread = cpu_first_thread_sibling(cpu);
 	cpumask_var_t mask;
+	int chip_id = -1;
+	bool ret;
 	int i;
 
 	/*
@@ -1485,12 +1500,39 @@ static void add_cpu_to_masks(int cpu)
 	add_cpu_to_smallcore_masks(cpu);
 
 	/* In CPU-hotplug path, hence use GFP_ATOMIC */
-	alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
+	ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
 	update_mask_by_l2(cpu, &mask);
 
 	if (has_coregroup_support())
 		update_coregroup_mask(cpu, &mask);
 
+	if (chip_id_lookup_table && ret)
+		chip_id = cpu_to_chip_id(cpu);
+
+	if (chip_id == -1) {
+		cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
+		goto out;
+	}
+
+	if (shared_caches)
+		submask_fn = cpu_l2_cache_mask;
+
+	/* Update core_mask with all the CPUs that are part of submask */
+	or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
+
+	/* Skip all CPUs already part of current CPU core mask */
+	cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
+
+	for_each_cpu(i, mask) {
+		if (chip_id == cpu_to_chip_id(i)) {
+			or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
+			cpumask_andnot(mask, mask, submask_fn(i));
+		} else {
+			cpumask_andnot(mask, mask, cpu_core_mask(i));
+		}
+	}
+
+out:
 	free_cpumask_var(mask);
 }
 
@@ -1521,6 +1563,9 @@ void start_secondary(void *unused)
 
 	vdso_getcpu_init();
 #endif
+	set_numa_node(numa_cpu_lookup_table[cpu]);
+	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
 	/* Update topology CPU masks */
 	add_cpu_to_masks(cpu);
 
@@ -1539,9 +1584,6 @@ void start_secondary(void *unused)
 			shared_caches = true;
 	}
 
-	set_numa_node(numa_cpu_lookup_table[cpu]);
-	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
-
 	smp_wmb();
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index b6440657ef92d065930a39d20ab6cc23d2d01ddd..1deb1bf331ddbdb1e0b527cdb87f4e159c70cf48 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -23,90 +23,56 @@
 
 #include <asm/paca.h>
 
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- */
-static void save_context_stack(struct stack_trace *trace, unsigned long sp,
-			struct task_struct *tsk, int savesched)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+		     struct task_struct *task, struct pt_regs *regs)
 {
+	unsigned long sp;
+
+	if (regs && !consume_entry(cookie, regs->nip))
+		return;
+
+	if (regs)
+		sp = regs->gpr[1];
+	else if (task == current)
+		sp = current_stack_frame();
+	else
+		sp = task->thread.ksp;
+
 	for (;;) {
 		unsigned long *stack = (unsigned long *) sp;
 		unsigned long newsp, ip;
 
-		if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+		if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD))
 			return;
 
 		newsp = stack[0];
 		ip = stack[STACK_FRAME_LR_SAVE];
 
-		if (savesched || !in_sched_functions(ip)) {
-			if (!trace->skip)
-				trace->entries[trace->nr_entries++] = ip;
-			else
-				trace->skip--;
-		}
-
-		if (trace->nr_entries >= trace->max_entries)
+		if (!consume_entry(cookie, ip))
 			return;
 
 		sp = newsp;
 	}
 }
 
-void save_stack_trace(struct stack_trace *trace)
-{
-	unsigned long sp;
-
-	sp = current_stack_frame();
-
-	save_context_stack(trace, sp, current, 1);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
-	unsigned long sp;
-
-	if (!try_get_task_stack(tsk))
-		return;
-
-	if (tsk == current)
-		sp = current_stack_frame();
-	else
-		sp = tsk->thread.ksp;
-
-	save_context_stack(trace, sp, tsk, 0);
-
-	put_task_stack(tsk);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-void
-save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
-	save_context_stack(trace, regs->gpr[1], current, 0);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_regs);
-
-#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
 /*
  * This function returns an error if it detects any unreliable features of the
  * stack.  Otherwise it guarantees that the stack trace is reliable.
  *
  * If the task is not 'current', the caller *must* ensure the task is inactive.
  */
-static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
-					   struct stack_trace *trace)
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+			     void *cookie, struct task_struct *task)
 {
 	unsigned long sp;
 	unsigned long newsp;
-	unsigned long stack_page = (unsigned long)task_stack_page(tsk);
+	unsigned long stack_page = (unsigned long)task_stack_page(task);
 	unsigned long stack_end;
 	int graph_idx = 0;
 	bool firstframe;
 
 	stack_end = stack_page + THREAD_SIZE;
-	if (!is_idle_task(tsk)) {
+	if (!is_idle_task(task)) {
 		/*
 		 * For user tasks, this is the SP value loaded on
 		 * kernel entry, see "PACAKSAVE(r13)" in _switch() and
@@ -130,10 +96,10 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
 		stack_end -= STACK_FRAME_OVERHEAD;
 	}
 
-	if (tsk == current)
+	if (task == current)
 		sp = current_stack_frame();
 	else
-		sp = tsk->thread.ksp;
+		sp = task->thread.ksp;
 
 	if (sp < stack_page + sizeof(struct thread_struct) ||
 	    sp > stack_end - STACK_FRAME_MIN_SIZE) {
@@ -182,7 +148,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
 		 * FIXME: IMHO these tests do not belong in
 		 * arch-dependent code, they are generic.
 		 */
-		ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
+		ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack);
 #ifdef CONFIG_KPROBES
 		/*
 		 * Mark stacktraces with kretprobed functions on them
@@ -192,36 +158,12 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
 			return -EINVAL;
 #endif
 
-		if (trace->nr_entries >= trace->max_entries)
-			return -E2BIG;
-		if (!trace->skip)
-			trace->entries[trace->nr_entries++] = ip;
-		else
-			trace->skip--;
+		if (!consume_entry(cookie, ip))
+			return -EINVAL;
 	}
 	return 0;
 }
 
-int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-				  struct stack_trace *trace)
-{
-	int ret;
-
-	/*
-	 * If the task doesn't have a stack (e.g., a zombie), the stack is
-	 * "reliably" empty.
-	 */
-	if (!try_get_task_stack(tsk))
-		return 0;
-
-	ret = __save_stack_trace_tsk_reliable(tsk, trace);
-
-	put_task_stack(tsk);
-
-	return ret;
-}
-#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
-
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
 static void handle_backtrace_ipi(struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 078608ec2e92180aa7479de688c839e5077d81f2..a552c9e68d7e69d68236e055c825c359863c91be 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -82,16 +82,8 @@ int
 ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
 {
 	if ( (unsigned long)n >= 4096 )
-	{
-		unsigned long __user *buffer = (unsigned long __user *)n;
-		if (!access_ok(buffer, 5*sizeof(unsigned long))
-		    || __get_user(n, buffer)
-		    || __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
-		    || __get_user(outp, ((fd_set  __user * __user *)(buffer+2)))
-		    || __get_user(exp, ((fd_set  __user * __user *)(buffer+3)))
-		    || __get_user(tvp, ((struct __kernel_old_timeval  __user * __user *)(buffer+4))))
-			return -EFAULT;
-	}
+		return sys_old_select((void __user *)n);
+
 	return sys_select(n, inp, outp, exp, tvp);
 }
 #endif
diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile
index 9e3be295dbba26d2506603f36ff6c54769ce8973..5476f62eb80f96ab77933e183c4f370c5b24d3d5 100644
--- a/arch/powerpc/kernel/syscalls/Makefile
+++ b/arch/powerpc/kernel/syscalls/Makefile
@@ -6,53 +6,38 @@ _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')	\
 	  $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
 syscall := $(src)/syscall.tbl
-syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
 
 quiet_cmd_syshdr = SYSHDR  $@
-      cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@'	\
-		   '$(syshdr_abis_$(basetarget))'		\
-		   '$(syshdr_pfx_$(basetarget))'		\
-		   '$(syshdr_offset_$(basetarget))'
+      cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@
 
 quiet_cmd_systbl = SYSTBL  $@
-      cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@'	\
-		   '$(systbl_abis_$(basetarget))'		\
-		   '$(systbl_abi_$(basetarget))'		\
-		   '$(systbl_offset_$(basetarget))'
+      cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
 
-syshdr_abis_unistd_32 := common,nospu,32
+$(uapi)/unistd_32.h: abis := common,nospu,32
 $(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abis_unistd_64 := common,nospu,64
+$(uapi)/unistd_64.h: abis := common,nospu,64
 $(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-systbl_abis_syscall_table_32 := common,nospu,32
-systbl_abi_syscall_table_32 := 32
+$(kapi)/syscall_table_32.h: abis := common,nospu,32
 $(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
 	$(call if_changed,systbl)
 
-systbl_abis_syscall_table_64 := common,nospu,64
-systbl_abi_syscall_table_64 := 64
+$(kapi)/syscall_table_64.h: abis := common,nospu,64
 $(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
 	$(call if_changed,systbl)
 
-systbl_abis_syscall_table_c32 := common,nospu,32
-systbl_abi_syscall_table_c32 := c32
-$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE
-	$(call if_changed,systbl)
-
-systbl_abis_syscall_table_spu := common,spu
-systbl_abi_syscall_table_spu := spu
+$(kapi)/syscall_table_spu.h: abis := common,spu
 $(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE
 	$(call if_changed,systbl)
 
 uapisyshdr-y		+= unistd_32.h unistd_64.h
 kapisyshdr-y		+= syscall_table_32.h		\
 			   syscall_table_64.h		\
-			   syscall_table_c32.h		\
 			   syscall_table_spu.h
 
 uapisyshdr-y	:= $(addprefix $(uapi)/, $(uapisyshdr-y))
diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh
deleted file mode 100644
index 02d6751f3be36c66c01e16959e9bbaef72defcf6..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/syscalls/syscallhdr.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_UAPI_ASM_POWERPC_`basename "$out" | sed \
-	-e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-	-e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
-	printf "#ifndef %s\n" "${fileguard}"
-	printf "#define %s\n" "${fileguard}"
-	printf "\n"
-
-	nxt=0
-	while read nr abi name entry compat ; do
-		if [ -z "$offset" ]; then
-			printf "#define __NR_%s%s\t%s\n" \
-				"${prefix}" "${name}" "${nr}"
-		else
-			printf "#define __NR_%s%s\t(%s + %s)\n" \
-				"${prefix}" "${name}" "${offset}" "${nr}"
-		fi
-		nxt=$((nr+1))
-	done
-
-	printf "\n"
-	printf "#ifdef __KERNEL__\n"
-	printf "#define __NR_syscalls\t%s\n" "${nxt}"
-	printf "#endif\n"
-	printf "\n"
-	printf "#endif /* %s */\n" "${fileguard}"
-) > "$out"
diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh
deleted file mode 100644
index f7393a7b18aa0cd9ddd58ac176db622378584003..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/syscalls/syscalltbl.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-my_abi="$4"
-offset="$5"
-
-emit() {
-	t_nxt="$1"
-	t_nr="$2"
-	t_entry="$3"
-
-	while [ $t_nxt -lt $t_nr ]; do
-		printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
-		t_nxt=$((t_nxt+1))
-	done
-	printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
-}
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
-	nxt=0
-	if [ -z "$offset" ]; then
-		offset=0
-	fi
-
-	while read nr abi name entry compat ; do
-		if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then
-			emit $((nxt+offset)) $((nr+offset)) $compat
-		else
-			emit $((nxt+offset)) $((nr+offset)) $entry
-		fi
-		nxt=$((nr+1))
-	done
-) > "$out"
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index d34276f3c495fe1b1c69eac93451652d21b27000..cb3358886203e9028182c82e577954057c8cb521 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -21,6 +21,7 @@
 #define __SYSCALL(nr, entry)	.long entry
 #endif
 
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
 .globl sys_call_table
 sys_call_table:
 #ifdef CONFIG_PPC64
@@ -30,8 +31,10 @@ sys_call_table:
 #endif
 
 #ifdef CONFIG_COMPAT
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, compat)
 .globl compat_sys_call_table
 compat_sys_call_table:
 #define compat_sys_sigsuspend	sys_sigsuspend
-#include <asm/syscall_table_c32.h>
+#include <asm/syscall_table_32.h>
 #endif
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 42761ebec9f75558202fe6ec48599060df8e89d6..ffe9537195aa33d348b027a6e2e4c868c105f0d2 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -68,7 +68,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
 	 */
 
 	/* read the text we want to modify */
-	if (probe_kernel_read_inst(&replaced, (void *)ip))
+	if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
 		return -EFAULT;
 
 	/* Make sure it is what we expect it to be */
@@ -130,7 +130,7 @@ __ftrace_make_nop(struct module *mod,
 	struct ppc_inst op, pop;
 
 	/* read where this goes */
-	if (probe_kernel_read_inst(&op, (void *)ip)) {
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
 		pr_err("Fetching opcode failed.\n");
 		return -EFAULT;
 	}
@@ -164,7 +164,7 @@ __ftrace_make_nop(struct module *mod,
 	/* When using -mkernel_profile there is no load to jump over */
 	pop = ppc_inst(PPC_INST_NOP);
 
-	if (probe_kernel_read_inst(&op, (void *)(ip - 4))) {
+	if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
 		pr_err("Fetching instruction at %lx failed.\n", ip - 4);
 		return -EFAULT;
 	}
@@ -197,7 +197,7 @@ __ftrace_make_nop(struct module *mod,
 	 * Check what is in the next instruction. We can see ld r2,40(r1), but
 	 * on first pass after boot we will see mflr r0.
 	 */
-	if (probe_kernel_read_inst(&op, (void *)(ip + 4))) {
+	if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
 		pr_err("Fetching op failed.\n");
 		return -EFAULT;
 	}
@@ -349,7 +349,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
 			return -1;
 
 	/* New trampoline -- read where this goes */
-	if (probe_kernel_read_inst(&op, (void *)tramp)) {
+	if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
 		pr_debug("Fetching opcode failed.\n");
 		return -1;
 	}
@@ -399,7 +399,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
 	struct ppc_inst op;
 
 	/* Read where this goes */
-	if (probe_kernel_read_inst(&op, (void *)ip)) {
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
 		pr_err("Fetching opcode failed.\n");
 		return -EFAULT;
 	}
@@ -526,10 +526,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	struct module *mod = rec->arch.mod;
 
 	/* read where this goes */
-	if (probe_kernel_read_inst(op, ip))
+	if (copy_inst_from_kernel_nofault(op, ip))
 		return -EFAULT;
 
-	if (probe_kernel_read_inst(op + 1, ip + 4))
+	if (copy_inst_from_kernel_nofault(op + 1, ip + 4))
 		return -EFAULT;
 
 	if (!expected_nop_sequence(ip, op[0], op[1])) {
@@ -592,7 +592,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	unsigned long ip = rec->ip;
 
 	/* read where this goes */
-	if (probe_kernel_read_inst(&op, (void *)ip))
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip))
 		return -EFAULT;
 
 	/* It should be pointing to a nop */
@@ -648,7 +648,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
 	}
 
 	/* Make sure we have a nop */
-	if (probe_kernel_read_inst(&op, ip)) {
+	if (copy_inst_from_kernel_nofault(&op, ip)) {
 		pr_err("Unable to read ftrace location %p\n", ip);
 		return -EFAULT;
 	}
@@ -726,7 +726,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 	}
 
 	/* read where this goes */
-	if (probe_kernel_read_inst(&op, (void *)ip)) {
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
 		pr_err("Fetching opcode failed.\n");
 		return -EFAULT;
 	}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index a44a30b0688ca5957a8a1265ab23a0add1ebc13e..b4ab95c9e94a89c2f94cd2ea2b68720580492e99 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -53,7 +53,6 @@
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
 #include <asm/processor.h>
-#include <asm/tm.h>
 #endif
 #include <asm/kexec.h>
 #include <asm/ppc-opcode.h>
@@ -222,7 +221,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
 	/*
 	 * system_reset_excption handles debugger, crash dump, panic, for 0x100
 	 */
-	if (TRAP(regs) == 0x100)
+	if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
 		return;
 
 	crash_fadump(regs, "die oops");
@@ -290,7 +289,7 @@ void die(const char *str, struct pt_regs *regs, long err)
 	/*
 	 * system_reset_excption handles debugger, crash dump, panic, for 0x100
 	 */
-	if (TRAP(regs) != 0x100) {
+	if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) {
 		if (debugger(regs))
 			return;
 	}
@@ -405,7 +404,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
 	 * Now test if the interrupt has hit a range that may be using
 	 * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
 	 * problem ranges all run un-relocated. Test real and virt modes
-	 * at the same time by droping the high bit of the nip (virt mode
+	 * at the same time by dropping the high bit of the nip (virt mode
 	 * entry points still have the +0x4000 offset).
 	 */
 	nip &= ~0xc000000000000000ULL;
@@ -864,7 +863,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
 	unsigned long ea, msr, msr_mask;
 	bool swap;
 
-	if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
+	if (__get_user(instr, (unsigned int __user *)regs->nip))
 		return;
 
 	/*
@@ -1079,6 +1078,16 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
 	_exception(SIGTRAP, regs, TRAP_UNK, 0);
 }
 
+DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception)
+{
+	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+	       regs->nip, regs->msr, regs->trap);
+
+	_exception(SIGTRAP, regs, TRAP_UNK, 0);
+
+	return 0;
+}
+
 DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
 {
 	if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
@@ -1309,7 +1318,6 @@ static int emulate_instruction(struct pt_regs *regs)
 
 	if (!user_mode(regs))
 		return -EINVAL;
-	CHECK_FULL_REGS(regs);
 
 	if (get_user(instword, (u32 __user *)(regs->nip)))
 		return -EFAULT;
@@ -1406,7 +1414,6 @@ int is_valid_bugaddr(unsigned long addr)
 static int emulate_math(struct pt_regs *regs)
 {
 	int ret;
-	extern int do_mathemu(struct pt_regs *regs);
 
 	ret = do_mathemu(regs);
 	if (ret >= 0)
@@ -1606,15 +1613,6 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
 		bad_page_fault(regs, sig);
 }
 
-DEFINE_INTERRUPT_HANDLER(StackOverflow)
-{
-	pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
-		current->comm, task_pid_nr(current), regs->gpr[1]);
-	debugger(regs);
-	show_regs(regs);
-	panic("kernel stack overflow");
-}
-
 DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
 {
 	die("Kernel stack overflow", regs, SIGSEGV);
@@ -1693,7 +1691,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
 	u8 status;
 	bool hv;
 
-	hv = (TRAP(regs) == 0xf80);
+	hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL);
 	if (hv)
 		value = mfspr(SPRN_HFSCR);
 	else
@@ -2170,11 +2168,14 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
  * in the MSR is 0.  This indicates that SRR0/1 are live, and that
  * we therefore lost state by taking this exception.
  */
-void unrecoverable_exception(struct pt_regs *regs)
+void __noreturn unrecoverable_exception(struct pt_regs *regs)
 {
 	pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
 		 regs->trap, regs->nip, regs->msr);
 	die("Unrecoverable exception", regs, SIGABRT);
+	/* die() should not return */
+	for (;;)
+		;
 }
 
 #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
@@ -2189,10 +2190,11 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
 	return;
 }
 
-DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */
+DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
 {
 	printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
 	WatchdogHandler(regs);
+	return 0;
 }
 #endif
 
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index e8a63713e65542a02d834197e1b5a302e7da07fe..186f69b11e94d2e70518ca9aa761e43aac04a322 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
 	if (addr & 0x03)
 		return -EINVAL;
 
+	if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+	    ppc_inst_prefixed(auprobe->insn) &&
+	    (addr & 0x3f) == 60) {
+		pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index e839a906fdf230da584b4265f6e7266e2f150200..717f2c9a7573cf395d41c338de1a99fb8d65fbf8 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -18,6 +18,7 @@
 #include <linux/security.h>
 #include <linux/memblock.h>
 #include <linux/syscalls.h>
+#include <linux/time_namespace.h>
 #include <vdso/datapage.h>
 
 #include <asm/syscall.h>
@@ -50,15 +51,21 @@ static union {
 } vdso_data_store __page_aligned_data;
 struct vdso_arch_data *vdso_data = &vdso_data_store.data;
 
+enum vvar_pages {
+	VVAR_DATA_PAGE_OFFSET,
+	VVAR_TIMENS_PAGE_OFFSET,
+	VVAR_NR_PAGES,
+};
+
 static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma,
 		       unsigned long text_size)
 {
 	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
 
-	if (new_size != text_size + PAGE_SIZE)
+	if (new_size != text_size)
 		return -EINVAL;
 
-	current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE;
+	current->mm->context.vdso = (void __user *)new_vma->vm_start;
 
 	return 0;
 }
@@ -73,6 +80,14 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
 	return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
 }
 
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf);
+
+static struct vm_special_mapping vvar_spec __ro_after_init = {
+	.name = "[vvar]",
+	.fault = vvar_fault,
+};
+
 static struct vm_special_mapping vdso32_spec __ro_after_init = {
 	.name = "[vdso]",
 	.mremap = vdso32_mremap,
@@ -83,17 +98,105 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = {
 	.mremap = vdso64_mremap,
 };
 
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+	return ((struct vdso_arch_data *)vvar_page)->data;
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+
+	mmap_read_lock(mm);
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long size = vma->vm_end - vma->vm_start;
+
+		if (vma_is_special_mapping(vma, &vvar_spec))
+			zap_page_range(vma, vma->vm_start, size);
+	}
+
+	mmap_read_unlock(mm);
+	return 0;
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_mm == current->mm))
+		return current->nsproxy->time_ns->vvar_page;
+
+	/*
+	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
+	 * through interfaces like /proc/$pid/mem or
+	 * process_vm_{readv,writev}() as long as there's no .access()
+	 * in special_mapping_vmops.
+	 * For more details check_vma_flags() and __access_remote_vm()
+	 */
+	WARN(1, "vvar_page accessed remotely");
+
+	return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *timens_page = find_timens_vvar_page(vma);
+	unsigned long pfn;
+
+	switch (vmf->pgoff) {
+	case VVAR_DATA_PAGE_OFFSET:
+		if (timens_page)
+			pfn = page_to_pfn(timens_page);
+		else
+			pfn = virt_to_pfn(vdso_data);
+		break;
+#ifdef CONFIG_TIME_NS
+	case VVAR_TIMENS_PAGE_OFFSET:
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+		pfn = virt_to_pfn(vdso_data);
+		break;
+#endif /* CONFIG_TIME_NS */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
 /*
  * This is called from binfmt_elf, we create the special vma for the
  * vDSO and insert it into the mm struct tree
  */
 static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-	struct mm_struct *mm = current->mm;
+	unsigned long vdso_size, vdso_base, mappings_size;
 	struct vm_special_mapping *vdso_spec;
+	unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE;
+	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	unsigned long vdso_size;
-	unsigned long vdso_base;
 
 	if (is_32bit_task()) {
 		vdso_spec = &vdso32_spec;
@@ -110,8 +213,8 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
 		vdso_base = 0;
 	}
 
-	/* Add a page to the vdso size for the data page */
-	vdso_size += PAGE_SIZE;
+	mappings_size = vdso_size + vvar_size;
+	mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK;
 
 	/*
 	 * pick a base address for the vDSO in process space. We try to put it
@@ -119,9 +222,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
 	 * and end up putting it elsewhere.
 	 * Add enough to the size so that the result can be aligned.
 	 */
-	vdso_base = get_unmapped_area(NULL, vdso_base,
-				      vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
-				      0, 0);
+	vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0);
 	if (IS_ERR_VALUE(vdso_base))
 		return vdso_base;
 
@@ -133,7 +234,13 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
 	 * install_special_mapping or the perf counter mmap tracking code
 	 * will fail to recognise it as a vDSO.
 	 */
-	mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE;
+	mm->context.vdso = (void __user *)vdso_base + vvar_size;
+
+	vma = _install_special_mapping(mm, vdso_base, vvar_size,
+				       VM_READ | VM_MAYREAD | VM_IO |
+				       VM_DONTDUMP | VM_PFNMAP, &vvar_spec);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
 
 	/*
 	 * our vma flags don't have VM_WRITE so by default, the process isn't
@@ -145,9 +252,12 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
 	 * It's fine to use that for setting breakpoints in the vDSO code
 	 * pages though.
 	 */
-	vma = _install_special_mapping(mm, vdso_base, vdso_size,
+	vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
 				       VM_READ | VM_EXEC | VM_MAYREAD |
 				       VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
+	if (IS_ERR(vma))
+		do_munmap(mm, vdso_base, vvar_size, NULL);
+
 	return PTR_ERR_OR_ZERO(vma);
 }
 
@@ -249,10 +359,8 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
 	if (!pagelist)
 		panic("%s: Cannot allocate page list for VDSO", __func__);
 
-	pagelist[0] = virt_to_page(vdso_data);
-
 	for (i = 0; i < pages; i++)
-		pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE);
+		pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
 
 	return pagelist;
 }
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index a4b806b0d618a7db6306cb443f10d293be1e28d7..58e0099f70f42e7c07a3e7c337f1ef2a55c3239b 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -17,7 +17,7 @@ ENTRY(_start)
 
 SECTIONS
 {
-	PROVIDE(_vdso_datapage = . - PAGE_SIZE);
+	PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE);
 	. = SIZEOF_HEADERS;
 
 	.hash          	: { *(.hash) }			:text
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 2f3c359cacd3a850a373141f37a46a9238fbd58d..0288cad428b039da38c4e9bab23a781d79bb5eed 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -17,7 +17,7 @@ ENTRY(_start)
 
 SECTIONS
 {
-	PROVIDE(_vdso_datapage = . - PAGE_SIZE);
+	PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE);
 	. = SIZEOF_HEADERS;
 
 	.hash		: { *(.hash) }			:text
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 801dc28fdcca58d448eee70d6a2e28ded5fbfdcf..f5a52f444e3604b26a90b22fc6cc7621a4bed36f 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -67,9 +67,7 @@ _GLOBAL(load_up_altivec)
 #ifdef CONFIG_PPC32
 	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
 	oris	r9,r9,MSR_VEC@h
-#ifdef CONFIG_VMAP_STACK
 	tovirt(r5, r5)
-#endif
 #else
 	ld	r4,PACACURRENT(r13)
 	addi	r5,r4,THREAD		/* Get THREAD */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index c9a889880214eec54114b9f5c4d5b49c26ac7a1a..0196d0c211aca76648a138f9406512c5060a8bed 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -24,6 +24,7 @@
 #include <asm/smp.h>
 #include <asm/setjmp.h>
 #include <asm/debug.h>
+#include <asm/interrupt.h>
 
 /*
  * The primary CPU waits a while for all secondary CPUs to enter. This is to
@@ -336,7 +337,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	 * If we came in via system reset, wait a while for the secondary
 	 * CPUs to enter.
 	 */
-	if (TRAP(regs) == 0x100)
+	if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
 		mdelay(PRIMARY_TIMEOUT);
 
 	crash_kexec_prepare_cpus(crashing_cpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e452158a18d771358b70d49095eb39bd720b1579..c3e31fef0be1c8a373bf88d7f398f52787735754 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kvm_host.h>
+#include <linux/pkeys.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -133,6 +134,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
 	else
 		kvmppc_mmu_flush_icache(pfn);
 
+	rflags |= pte_to_hpte_pkey_bits(0, HPTE_USE_KERNEL_KEY);
 	rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
 
 	/*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 13bad6bf4c9589f827f867663d20a3575d4090c4..4a532410e1282936f9685e9ea5874cea78e03f5d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 		vcpu->arch.dawrx1 = value2;
 		return H_SUCCESS;
 	case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
-		/* KVM does not support mflags=2 (AIL=2) */
+		/*
+		 * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
+		 * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
+		 */
 		if (mflags != 0 && mflags != 3)
 			return H_UNSUPPORTED_FLAG_START;
 		return H_TOO_HARD;
@@ -1635,6 +1638,41 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+/*
+ * Enforce limits on guest LPCR values based on hardware availability,
+ * guest configuration, and possibly hypervisor support and security
+ * concerns.
+ */
+unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
+{
+	/* LPCR_TC only applies to HPT guests */
+	if (kvm_is_radix(kvm))
+		lpcr &= ~LPCR_TC;
+
+	/* On POWER8 and above, userspace can modify AIL */
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		lpcr &= ~LPCR_AIL;
+	if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
+		lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+
+	/*
+	 * On POWER9, allow userspace to enable large decrementer for the
+	 * guest, whether or not the host has it enabled.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		lpcr &= ~LPCR_LD;
+
+	return lpcr;
+}
+
+static void verify_lpcr(struct kvm *kvm, unsigned long lpcr)
+{
+	if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) {
+		WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n",
+			  lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr));
+	}
+}
+
 static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
 		bool preserve_top32)
 {
@@ -1643,6 +1681,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
 	u64 mask;
 
 	spin_lock(&vc->lock);
+
+	/*
+	 * Userspace can only modify
+	 * DPFD (default prefetch depth), ILE (interrupt little-endian),
+	 * TC (translation control), AIL (alternate interrupt location),
+	 * LD (large decrementer).
+	 * These are subject to restrictions from kvmppc_filter_lcpr_hv().
+	 */
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD;
+
+	/* Broken 32-bit version of LPCR must not clear top bits */
+	if (preserve_top32)
+		mask &= 0xFFFFFFFF;
+
+	new_lpcr = kvmppc_filter_lpcr_hv(kvm,
+			(vc->lpcr & ~mask) | (new_lpcr & mask));
+
 	/*
 	 * If ILE (interrupt little-endian) has changed, update the
 	 * MSR_LE bit in the intr_msr for each vcpu in this vcore.
@@ -1661,25 +1716,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
 		}
 	}
 
-	/*
-	 * Userspace can only modify DPFD (default prefetch depth),
-	 * ILE (interrupt little-endian) and TC (translation control).
-	 * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.).
-	 */
-	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
-	if (cpu_has_feature(CPU_FTR_ARCH_207S))
-		mask |= LPCR_AIL;
-	/*
-	 * On POWER9, allow userspace to enable large decrementer for the
-	 * guest, whether or not the host has it enabled.
-	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		mask |= LPCR_LD;
+	vc->lpcr = new_lpcr;
 
-	/* Broken 32-bit version of LPCR must not clear top bits */
-	if (preserve_top32)
-		mask &= 0xFFFFFFFF;
-	vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
 	spin_unlock(&vc->lock);
 }
 
@@ -3728,7 +3766,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vcpu->arch.dec_expires = dec + tb;
 	vcpu->cpu = -1;
 	vcpu->arch.thread_cpu = -1;
+	/* Save guest CTRL register, set runlatch to 1 */
 	vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+	if (!(vcpu->arch.ctrl & 1))
+		mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
 
 	vcpu->arch.iamr = mfspr(SPRN_IAMR);
 	vcpu->arch.pspb = mfspr(SPRN_PSPB);
@@ -3749,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_DSCR, host_dscr);
 	mtspr(SPRN_TIDR, host_tidr);
 	mtspr(SPRN_IAMR, host_iamr);
-	mtspr(SPRN_PSPB, 0);
 
 	if (host_amr != vcpu->arch.amr)
 		mtspr(SPRN_AMR, host_amr);
@@ -4641,8 +4681,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
 		struct kvmppc_vcore *vc = kvm->arch.vcores[i];
 		if (!vc)
 			continue;
+
 		spin_lock(&vc->lock);
 		vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+		verify_lpcr(kvm, vc->lpcr);
 		spin_unlock(&vc->lock);
 		if (++cores_done >= kvm->arch.online_vcores)
 			break;
@@ -4970,6 +5012,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 		kvmppc_setup_partition_table(kvm);
 	}
 
+	verify_lpcr(kvm, lpcr);
 	kvm->arch.lpcr = lpcr;
 
 	/* Initialization for future HPT resizes */
@@ -5369,8 +5412,10 @@ static unsigned int default_hcall_list[] = {
 	H_READ,
 	H_PROTECT,
 	H_BULK_REMOVE,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
 	H_GET_TCE,
 	H_PUT_TCE,
+#endif
 	H_SET_DABR,
 	H_SET_XDABR,
 	H_CEDE,
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 158d309b42a387e9adff5eac7861c793136dd76c..7a0e33a9c980df7f9af49a558df216ab89d7f385 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 
 void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 {
+	/* Guest must always run with ME enabled, HV disabled. */
+	msr = (msr | MSR_ME) & ~MSR_HV;
+
 	/*
 	 * Check for illegal transactional state bit combination
 	 * and if we find it, force the TS field to a safe state.
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 0cd0e7aad588b2db32a8c9899af9617bbf9b5791..60724f6744219dcf1ad6c8bf479be05b8b926f9f 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -132,8 +132,33 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	}
 }
 
+/*
+ * This can result in some L0 HV register state being leaked to an L1
+ * hypervisor when the hv_guest_state is copied back to the guest after
+ * being modified here.
+ *
+ * There is no known problem with such a leak, and in many cases these
+ * register settings could be derived by the guest by observing behaviour
+ * and timing, interrupts, etc., but it is an issue to consider.
+ */
 static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 {
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 mask;
+
+	/*
+	 * Don't let L1 change LPCR bits for the L2 except these:
+	 */
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+		LPCR_LPES | LPCR_MER;
+
+	/*
+	 * Additional filtering is required depending on hardware
+	 * and configuration.
+	 */
+	hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+			(vc->lpcr & ~mask) | (hr->lpcr & mask));
+
 	/*
 	 * Don't let L1 enable features for L2 which we've disabled for L1,
 	 * but preserve the interrupt cause field.
@@ -271,8 +296,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	u64 hv_ptr, regs_ptr;
 	u64 hdec_exp;
 	s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
-	u64 mask;
-	unsigned long lpcr;
 
 	if (vcpu->kvm->arch.l1_ptcr == 0)
 		return H_NOT_AVAILABLE;
@@ -320,10 +343,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	vcpu->arch.nested = l2;
 	vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
 	vcpu->arch.regs = l2_regs;
-	vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
-	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
-		LPCR_LPES | LPCR_MER;
-	lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
+
+	/* Guest must always run with ME enabled, HV disabled. */
+	vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
+
 	sanitise_hv_regs(vcpu, &l2_hv);
 	restore_hv_regs(vcpu, &l2_hv);
 
@@ -335,7 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 			r = RESUME_HOST;
 			break;
 		}
-		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
+		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
 	} while (is_kvmppc_resume_guest(r));
 
 	/* save L2 state for return */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 88da2764c1bb9c63d87a795bedaa5caf08593e75..7af7c70f14680e2e3530c1d174b26454519dd38c 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 }
 
 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
-		      unsigned long pte_index, unsigned long avpn,
-		      unsigned long va)
+		      unsigned long pte_index, unsigned long avpn)
 {
 	struct kvm *kvm = vcpu->kvm;
 	__be64 *hpte;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index d4efc182662a8cecf0b358fa844bd50c15570d76..f2c690ee75d1a483fe3678172fae10970b27160d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
 endif
 
-obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o
+obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o
 
 ifndef CONFIG_KASAN
 obj-y	+=	string.o memcmp_$(BITS).o
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
index b895166afc8283b8f4dbbc753a17d331f10cff1b..f3999cbb2fcc4c8a2191de5424fdf1ea850c65a1 100644
--- a/arch/powerpc/lib/checksum_wrappers.c
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -16,16 +16,12 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
 {
 	__wsum csum;
 
-	might_sleep();
-
-	if (unlikely(!access_ok(src, len)))
+	if (unlikely(!user_read_access_begin(src, len)))
 		return 0;
 
-	allow_read_from_user(src, len);
-
 	csum = csum_partial_copy_generic((void __force *)src, dst, len);
 
-	prevent_read_from_user(src, len);
+	user_read_access_end();
 	return csum;
 }
 EXPORT_SYMBOL(csum_and_copy_from_user);
@@ -34,15 +30,12 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len)
 {
 	__wsum csum;
 
-	might_sleep();
-	if (unlikely(!access_ok(dst, len)))
+	if (unlikely(!user_write_access_begin(dst, len)))
 		return 0;
 
-	allow_write_to_user(dst, len);
-
 	csum = csum_partial_copy_generic(src, (void __force *)dst, len);
 
-	prevent_write_to_user(dst, len);
+	user_write_access_end();
 	return csum;
 }
 EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 2333625b5e315006ac9d5b97477957fc2cf76aff..870b30d9be2f85ca00c73fd7baf27804386bae41 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -21,10 +21,15 @@
 static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr,
 			       struct ppc_inst *patch_addr)
 {
-	if (!ppc_inst_prefixed(instr))
-		__put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw");
-	else
-		__put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std");
+	if (!ppc_inst_prefixed(instr)) {
+		u32 val = ppc_inst_val(instr);
+
+		__put_kernel_nofault(patch_addr, &val, u32, failed);
+	} else {
+		u64 val = ppc_inst_as_ulong(instr);
+
+		__put_kernel_nofault(patch_addr, &val, u64, failed);
+	}
 
 	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
 							    "r" (exec_addr));
diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c
deleted file mode 100644
index 9cc17eb62462a64efcb4f9f26f91765a832467f0..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/inst.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright 2020, IBM Corporation.
- */
-
-#include <linux/uaccess.h>
-#include <asm/disassemble.h>
-#include <asm/inst.h>
-#include <asm/ppc-opcode.h>
-
-#ifdef CONFIG_PPC64
-int probe_user_read_inst(struct ppc_inst *inst,
-			 struct ppc_inst __user *nip)
-{
-	unsigned int val, suffix;
-	int err;
-
-	err = copy_from_user_nofault(&val, nip, sizeof(val));
-	if (err)
-		return err;
-	if (get_op(val) == OP_PREFIX) {
-		err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4);
-		*inst = ppc_inst_prefix(val, suffix);
-	} else {
-		*inst = ppc_inst(val);
-	}
-	return err;
-}
-
-int probe_kernel_read_inst(struct ppc_inst *inst,
-			   struct ppc_inst *src)
-{
-	unsigned int val, suffix;
-	int err;
-
-	err = copy_from_kernel_nofault(&val, src, sizeof(val));
-	if (err)
-		return err;
-	if (get_op(val) == OP_PREFIX) {
-		err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4);
-		*inst = ppc_inst_prefix(val, suffix);
-	} else {
-		*inst = ppc_inst(val);
-	}
-	return err;
-}
-#else /* !CONFIG_PPC64 */
-int probe_user_read_inst(struct ppc_inst *inst,
-			 struct ppc_inst __user *nip)
-{
-	unsigned int val;
-	int err;
-
-	err = copy_from_user_nofault(&val, nip, sizeof(val));
-	if (!err)
-		*inst = ppc_inst(val);
-
-	return err;
-}
-
-int probe_kernel_read_inst(struct ppc_inst *inst,
-			   struct ppc_inst *src)
-{
-	unsigned int val;
-	int err;
-
-	err = copy_from_kernel_nofault(&val, src, sizeof(val));
-	if (!err)
-		*inst = ppc_inst(val);
-
-	return err;
-}
-#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c6aebc149d14129ca17a23a1d289bb8cde827131..45bda25207557796cdd76f7b0d6e8eeb0815a489 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1401,10 +1401,6 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 		break;
 	}
 
-	/* Following cases refer to regs->gpr[], so we need all regs */
-	if (!FULL_REGS(regs))
-		return -1;
-
 	rd = (word >> 21) & 0x1f;
 	ra = (word >> 16) & 0x1f;
 	rb = (word >> 11) & 0x1f;
@@ -3086,15 +3082,6 @@ NOKPROBE_SYMBOL(analyse_instr);
  */
 static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs)
 {
-#ifdef CONFIG_PPC32
-	/*
-	 * Check if we will touch kernel stack overflow
-	 */
-	if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) {
-		printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n");
-		return -EINVAL;
-	}
-#endif /* CONFIG_PPC32 */
 	/*
 	 * Check if we already set since that means we'll
 	 * lose the previous value.
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index 30b4b69c694198dc557d5262d5dba7cd0308ec30..327165f26ca6a66f732e5ac2af0c59904054f1de 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -225,7 +225,7 @@ record_exception(struct pt_regs *regs, int eflag)
 int
 do_mathemu(struct pt_regs *regs)
 {
-	void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0;
+	void *op0 = NULL, *op1 = NULL, *op2 = NULL, *op3 = NULL;
 	unsigned long pc = regs->nip;
 	signed short sdisp;
 	u32 insn = 0;
@@ -234,7 +234,7 @@ do_mathemu(struct pt_regs *regs)
 	int type = 0;
 	int eflag, trap;
 
-	if (get_user(insn, (u32 *)pc))
+	if (get_user(insn, (u32 __user *)pc))
 		return -EFAULT;
 
 	switch (insn >> 26) {
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3b4e9e4e25eaa2837fc11fe7ad9f2bd4211c902e..c3df3a8501d4803aade1525ce3a4fc7c7fa78f63 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -8,7 +8,8 @@ ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
 obj-y				:= fault.o mem.o pgtable.o mmap.o maccess.o \
 				   init_$(BITS).o pgtable_$(BITS).o \
 				   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
-				   init-common.o mmu_context.o drmem.o
+				   init-common.o mmu_context.o drmem.o \
+				   cacheflush.o
 obj-$(CONFIG_PPC_MMU_NOHASH)	+= nohash/
 obj-$(CONFIG_PPC_BOOK3S_32)	+= book3s32/
 obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64/
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
index 446d9de88ce4c1d3a0acd63d7656fbb152ef5f8e..7f0c8a78ba0c08f38eba1cc2fb04a88a89c07be5 100644
--- a/arch/powerpc/mm/book3s32/Makefile
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -9,3 +9,4 @@ endif
 obj-y += mmu.o mmu_context.o
 obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
 obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
+obj-$(CONFIG_PPC_KUEP) += kuep.o
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index 0e6dc830c38bf6cb1b78e41e04502356254299c8..fb4233a5bdf7d791650cb79a15ca99e2e557ba2b 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -140,10 +140,6 @@ _GLOBAL(hash_page)
 	bne-	.Lretry			/* retry if someone got there first */
 
 	mfsrin	r3,r4			/* get segment reg for segment */
-#ifndef CONFIG_VMAP_STACK
-	mfctr	r0
-	stw	r0,_CTR(r11)
-#endif
 	bl	create_hpte		/* add the hash table entry */
 
 #ifdef CONFIG_SMP
@@ -152,17 +148,7 @@ _GLOBAL(hash_page)
 	li	r0,0
 	stw	r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
 #endif
-
-#ifdef CONFIG_VMAP_STACK
 	b	fast_hash_page_return
-#else
-	/* Return from the exception */
-	lwz	r5,_CTR(r11)
-	mtctr	r5
-	lwz	r0,GPR0(r11)
-	lwz	r8,GPR8(r11)
-	b	fast_exception_return
-#endif
 
 #ifdef CONFIG_SMP
 .Lhash_page_out:
diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c
new file mode 100644
index 0000000000000000000000000000000000000000..8ed1b8634839736bd9e8c4a7359c245ee87c9bb2
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/kuep.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <asm/kup.h>
+#include <asm/reg.h>
+#include <asm/task_size_32.h>
+#include <asm/mmu.h>
+
+#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do {		\
+	if (TASK_SIZE > ((n) << 28))			\
+		mtsr(val1, (n) << 28);			\
+	if (TASK_SIZE > (((n) + 1) << 28))		\
+		mtsr(val2, ((n) + 1) << 28);		\
+	val1 = (val1 + 0x222) & 0xf0ffffff;		\
+	val2 = (val2 + 0x222) & 0xf0ffffff;		\
+} while (0)
+
+static __always_inline void kuep_update(u32 val)
+{
+	int val1 = val;
+	int val2 = (val + 0x111) & 0xf0ffffff;
+
+	KUEP_UPDATE_TWO_USER_SEGMENTS(0);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(2);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(4);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(6);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(8);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(10);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(12);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(14);
+}
+
+void kuep_lock(void)
+{
+	kuep_update(mfsr(0) | SR_NX);
+}
+
+void kuep_unlock(void)
+{
+	kuep_update(mfsr(0) & ~SR_NX);
+}
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index d7eb266a3f7ad83840229cc4bfd95810ec1d4550..159930351d9f960b456279beaffa45ce7418a84d 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -162,7 +162,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 	unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
 
 
-	if (debug_pagealloc_enabled() || __map_without_bats) {
+	if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) {
 		pr_debug_once("Read-Write memory mapped without BATs\n");
 		if (base >= border)
 			return base;
@@ -184,17 +184,10 @@ static bool is_module_segment(unsigned long addr)
 {
 	if (!IS_ENABLED(CONFIG_MODULES))
 		return false;
-#ifdef MODULES_VADDR
 	if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
 		return false;
 	if (addr > ALIGN(MODULES_END, SZ_256M) - 1)
 		return false;
-#else
-	if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
-		return false;
-	if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1)
-		return false;
-#endif
 	return true;
 }
 
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 567e0c6b3978e79dde3305f2ee1e2bd2663ef1e8..ad5eff097d319b35db6aaa0d96b3512e74a5a4bb 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/mm.h>
+#include <linux/stop_machine.h>
 
 #include <asm/sections.h>
 #include <asm/mmu.h>
@@ -400,10 +401,103 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
+
+struct change_memory_parms {
+	unsigned long start, end, newpp;
+	unsigned int step, nr_cpus, master_cpu;
+	atomic_t cpu_counter;
+};
+
+// We'd rather this was on the stack but it has to be in the RMO
+static struct change_memory_parms chmem_parms;
+
+// And therefore we need a lock to protect it from concurrent use
+static DEFINE_MUTEX(chmem_lock);
+
+static void change_memory_range(unsigned long start, unsigned long end,
+				unsigned int step, unsigned long newpp)
+{
+	unsigned long idx;
+
+	pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
+		 start, end, newpp, step);
+
+	for (idx = start; idx < end; idx += step)
+		/* Not sure if we can do much with the return value */
+		mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
+							mmu_kernel_ssize);
+}
+
+static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
+{
+	unsigned long msr, tmp, flags;
+	int *p;
+
+	p = &parms->cpu_counter.counter;
+
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	asm volatile (
+	// Switch to real mode and leave interrupts off
+	"mfmsr	%[msr]			;"
+	"li	%[tmp], %[MSR_IR_DR]	;"
+	"andc	%[tmp], %[msr], %[tmp]	;"
+	"mtmsrd %[tmp]			;"
+
+	// Tell the master we are in real mode
+	"1:				"
+	"lwarx	%[tmp], 0, %[p]		;"
+	"addic	%[tmp], %[tmp], -1	;"
+	"stwcx.	%[tmp], 0, %[p]		;"
+	"bne-	1b			;"
+
+	// Spin until the counter goes to zero
+	"2:				;"
+	"lwz	%[tmp], 0(%[p])		;"
+	"cmpwi	%[tmp], 0		;"
+	"bne-	2b			;"
+
+	// Switch back to virtual mode
+	"mtmsrd %[msr]			;"
+
+	: // outputs
+	  [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p)
+	: // inputs
+	  [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR)
+	: // clobbers
+	  "cc", "xer"
+	);
+
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+static int change_memory_range_fn(void *data)
+{
+	struct change_memory_parms *parms = data;
+
+	if (parms->master_cpu != smp_processor_id())
+		return chmem_secondary_loop(parms);
+
+	// Wait for all but one CPU (this one) to call-in
+	while (atomic_read(&parms->cpu_counter) > 1)
+		barrier();
+
+	change_memory_range(parms->start, parms->end, parms->step, parms->newpp);
+
+	mb();
+
+	// Signal the other CPUs that we're done
+	atomic_dec(&parms->cpu_counter);
+
+	return 0;
+}
+
 static bool hash__change_memory_range(unsigned long start, unsigned long end,
 				      unsigned long newpp)
 {
-	unsigned long idx;
 	unsigned int step, shift;
 
 	shift = mmu_psize_defs[mmu_linear_psize].shift;
@@ -415,25 +509,43 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
 	if (start >= end)
 		return false;
 
-	pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
-		 start, end, newpp, step);
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		mutex_lock(&chmem_lock);
 
-	for (idx = start; idx < end; idx += step)
-		/* Not sure if we can do much with the return value */
-		mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
-							mmu_kernel_ssize);
+		chmem_parms.start = start;
+		chmem_parms.end = end;
+		chmem_parms.step = step;
+		chmem_parms.newpp = newpp;
+		chmem_parms.master_cpu = smp_processor_id();
+
+		cpus_read_lock();
+
+		atomic_set(&chmem_parms.cpu_counter, num_online_cpus());
+
+		// Ensure state is consistent before we call the other CPUs
+		mb();
+
+		stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms,
+					cpu_online_mask);
+
+		cpus_read_unlock();
+		mutex_unlock(&chmem_lock);
+	} else
+		change_memory_range(start, end, step, newpp);
 
 	return true;
 }
 
 void hash__mark_rodata_ro(void)
 {
-	unsigned long start, end;
+	unsigned long start, end, pp;
 
 	start = (unsigned long)_stext;
 	end = (unsigned long)__init_begin;
 
-	WARN_ON(!hash__change_memory_range(start, end, PP_RXXX));
+	pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
+
+	WARN_ON(!hash__change_memory_range(start, end, pp));
 }
 
 void hash__mark_initmem_nx(void)
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 581b20a2feaf610af74327f1fabbc2dccb0b080a..96d9aa1640073cc191ed495fad014807f5c32124 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -338,7 +338,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 		      int psize, int ssize)
 {
-	unsigned long vaddr;
+	unsigned long vaddr, time_limit;
 	unsigned int step, shift;
 	int rc;
 	int ret = 0;
@@ -351,8 +351,19 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 
 	/* Unmap the full range specificied */
 	vaddr = ALIGN_DOWN(vstart, step);
+	time_limit = jiffies + HZ;
+
 	for (;vaddr < vend; vaddr += step) {
 		rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
+
+		/*
+		 * For large number of mappings introduce a cond_resched()
+		 * to prevent softlockup warnings.
+		 */
+		if (time_after(jiffies, time_limit)) {
+			cond_resched();
+			time_limit = jiffies + HZ;
+		}
 		if (rc == -ENOENT) {
 			ret = -ENOENT;
 			continue;
@@ -1145,7 +1156,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 
 	/* page is dirty */
 	if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) {
-		if (trap == 0x400) {
+		if (trap == INTERRUPT_INST_STORAGE) {
 			flush_dcache_icache_page(page);
 			set_bit(PG_dcache_clean, &page->flags);
 		} else
@@ -1545,10 +1556,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
 	if (user_mode(regs) || (region_id == USER_REGION_ID))
 		access &= ~_PAGE_PRIVILEGED;
 
-	if (regs->trap == 0x400)
+	if (TRAP(regs) == INTERRUPT_INST_STORAGE)
 		access |= _PAGE_EXEC;
 
-	err = hash_page_mm(mm, ea, access, regs->trap, flags);
+	err = hash_page_mm(mm, ea, access, TRAP(regs), flags);
 	if (unlikely(err < 0)) {
 		// failed to instert a hash PTE due to an hypervisor error
 		if (user_mode(regs)) {
@@ -1572,10 +1583,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
 DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
 {
 	unsigned long dsisr = regs->dsisr;
-	long err;
 
-	if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT)))
-		goto page_fault;
+	if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
+		hash__do_page_fault(regs);
+		return 0;
+	}
 
 	/*
 	 * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
@@ -1595,13 +1607,10 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
 		return 0;
 	}
 
-	err = __do_hash_fault(regs);
-	if (err) {
-page_fault:
-		err = hash__do_page_fault(regs);
-	}
+	if (__do_hash_fault(regs))
+		hash__do_page_fault(regs);
 
-	return err;
+	return 0;
 }
 
 #ifdef CONFIG_PPC_MM_SLICES
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 0c8557220ae28a328a44dbc80d47b7f86be08f60..c10fc8a72fb37aed5c93a3edd095e4b3d70739da 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -119,7 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm)
 		/* This is fork. Copy hash_context details from current->mm */
 		memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
 #ifdef CONFIG_PPC_SUBPAGE_PROT
-		/* inherit subpage prot detalis if we have one. */
+		/* inherit subpage prot details if we have one. */
 		if (current->mm->context.hash_context->spt) {
 			mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
 								GFP_KERNEL);
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 15dcc5ad91c558c8e9007960bfe075568298264e..a2d9ad1387094778146a058953dccf67de33bf04 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -301,19 +301,6 @@ void setup_kuap(bool disabled)
 }
 #endif
 
-static inline void update_current_thread_amr(u64 value)
-{
-	current->thread.regs->amr = value;
-}
-
-static inline void update_current_thread_iamr(u64 value)
-{
-	if (!likely(pkey_execute_disable_supported))
-		return;
-
-	current->thread.regs->iamr = value;
-}
-
 #ifdef CONFIG_PPC_MEM_KEYS
 void pkey_mm_init(struct mm_struct *mm)
 {
@@ -328,7 +315,7 @@ static inline void init_amr(int pkey, u8 init_bits)
 	u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
 	u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
 
-	update_current_thread_amr(old_amr | new_amr_bits);
+	current->thread.regs->amr = old_amr | new_amr_bits;
 }
 
 static inline void init_iamr(int pkey, u8 init_bits)
@@ -336,7 +323,10 @@ static inline void init_iamr(int pkey, u8 init_bits)
 	u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
 	u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
 
-	update_current_thread_iamr(old_iamr | new_iamr_bits);
+	if (!likely(pkey_execute_disable_supported))
+		return;
+
+	current->thread.regs->iamr = old_iamr | new_iamr_bits;
 }
 
 /*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 98f0b243c1ab21e0953645dae57a5bf33b8ac80b..50d536ecc89b1aa95b139268f51abf1a430689ee 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -108,7 +108,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 
 set_the_pte:
 	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
-	smp_wmb();
+	asm volatile("ptesync": : :"memory");
 	return 0;
 }
 
@@ -168,7 +168,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa,
 
 set_the_pte:
 	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
-	smp_wmb();
+	asm volatile("ptesync": : :"memory");
 	return 0;
 }
 
@@ -180,8 +180,8 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void radix__change_memory_range(unsigned long start, unsigned long end,
-				unsigned long clear)
+static void radix__change_memory_range(unsigned long start, unsigned long end,
+				       unsigned long clear)
 {
 	unsigned long idx;
 	pgd_t *pgdp;
@@ -1058,7 +1058,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 		 * Book3S does not require a TLB flush when relaxing access
 		 * restrictions when the address space is not attached to a
 		 * NMMU, because the core MMU will reload the pte after taking
-		 * an access fault, which is defined by the architectue.
+		 * an access fault, which is defined by the architecture.
 		 */
 	}
 	/* See ptesync comment in radix__set_pte_at */
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
new file mode 100644
index 0000000000000000000000000000000000000000..63363787e0007607ae4ee10ed9f0b39df2ec6fc7
--- /dev/null
+++ b/arch/powerpc/mm/cacheflush.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/highmem.h>
+#include <linux/kprobes.h>
+
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(void)
+{
+	/*
+	 * For a snooping icache, we still need a dummy icbi to purge all the
+	 * prefetched instructions from the ifetch buffers. We also need a sync
+	 * before the icbi to order the the actual stores to memory that might
+	 * have modified instructions with the icbi.
+	 */
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+		mb(); /* sync */
+		icbi((void *)PAGE_OFFSET);
+		mb(); /* sync */
+		isync();
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_icache_shift();
+	unsigned long bytes = l1_icache_bytes();
+	char *addr = (char *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		icbi(addr);
+
+	mb(); /* sync */
+	isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+	if (flush_coherent_icache())
+		return;
+
+	clean_dcache_range(start, stop);
+
+	if (IS_ENABLED(CONFIG_44x)) {
+		/*
+		 * Flash invalidate on 44x because we are passed kmapped
+		 * addresses and this doesn't work for userspace pages due to
+		 * the virtually tagged icache.
+		 */
+		iccci((void *)start);
+		mb(); /* sync */
+		isync();
+	} else
+		invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_HIGHMEM
+/**
+ * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+	unsigned long bytes = l1_dcache_bytes();
+	unsigned long nb = PAGE_SIZE / bytes;
+	unsigned long addr = physaddr & PAGE_MASK;
+	unsigned long msr, msr0;
+	unsigned long loop1 = addr, loop2 = addr;
+
+	msr0 = mfmsr();
+	msr = msr0 & ~MSR_DR;
+	/*
+	 * This must remain as ASM to prevent potential memory accesses
+	 * while the data MMU is disabled
+	 */
+	asm volatile(
+		"   mtctr %2;\n"
+		"   mtmsr %3;\n"
+		"   isync;\n"
+		"0: dcbst   0, %0;\n"
+		"   addi    %0, %0, %4;\n"
+		"   bdnz    0b;\n"
+		"   sync;\n"
+		"   mtctr %2;\n"
+		"1: icbi    0, %1;\n"
+		"   addi    %1, %1, %4;\n"
+		"   bdnz    1b;\n"
+		"   sync;\n"
+		"   mtmsr %5;\n"
+		"   isync;\n"
+		: "+&r" (loop1), "+&r" (loop2)
+		: "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+		: "ctr", "memory");
+}
+NOKPROBE_SYMBOL(flush_dcache_icache_phys)
+#else
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+}
+#endif
+
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @p: the address of the page to flush
+ */
+static void __flush_dcache_icache(void *p)
+{
+	unsigned long addr = (unsigned long)p & PAGE_MASK;
+
+	clean_dcache_range(addr, addr + PAGE_SIZE);
+
+	/*
+	 * We don't flush the icache on 44x. Those have a virtual icache and we
+	 * don't have access to the virtual address here (it's not the page
+	 * vaddr but where it's mapped in user space). The flushing of the
+	 * icache on these is handled elsewhere, when a change in the address
+	 * space occurs, before returning to user space.
+	 */
+
+	if (mmu_has_feature(MMU_FTR_TYPE_44x))
+		return;
+
+	invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
+static void flush_dcache_icache_hugepage(struct page *page)
+{
+	int i;
+	int nr = compound_nr(page);
+
+	if (!PageHighMem(page)) {
+		for (i = 0; i < nr; i++)
+			__flush_dcache_icache(lowmem_page_address(page + i));
+	} else {
+		for (i = 0; i < nr; i++) {
+			void *start = kmap_local_page(page + i);
+
+			__flush_dcache_icache(start);
+			kunmap_local(start);
+		}
+	}
+}
+
+void flush_dcache_icache_page(struct page *page)
+{
+	if (flush_coherent_icache())
+		return;
+
+	if (PageCompound(page))
+		return flush_dcache_icache_hugepage(page);
+
+	if (!PageHighMem(page)) {
+		__flush_dcache_icache(lowmem_page_address(page));
+	} else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
+		void *start = kmap_local_page(page);
+
+		__flush_dcache_icache(start);
+		kunmap_local(start);
+	} else {
+		flush_dcache_icache_phys(page_to_phys(page));
+	}
+}
+EXPORT_SYMBOL(flush_dcache_icache_page);
+
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+	clear_page(page);
+
+	/*
+	 * We shouldn't have to do this, but some versions of glibc
+	 * require it (ld.so assumes zero filled pages are icache clean)
+	 * - Anton
+	 */
+	flush_dcache_page(pg);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+		    struct page *pg)
+{
+	copy_page(vto, vfrom);
+
+	/*
+	 * We should be able to use the following optimisation, however
+	 * there are two problems.
+	 * Firstly a bug in some versions of binutils meant PLT sections
+	 * were not marked executable.
+	 * Secondly the first word in the GOT section is blrl, used
+	 * to establish the GOT address. Until recently the GOT was
+	 * not marked executable.
+	 * - Anton
+	 */
+#if 0
+	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+		return;
+#endif
+
+	flush_dcache_page(pg);
+}
+
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	void *maddr;
+
+	maddr = kmap_local_page(page) + (addr & ~PAGE_MASK);
+	flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len);
+	kunmap_local(maddr);
+}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index bb368257b55cb432eedc1486c2ab0b60f2fc1e90..34f641d4a2fe90f6f17604c7684abad102208230 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -32,6 +32,8 @@
 #include <linux/context_tracking.h>
 #include <linux/hugetlb.h>
 #include <linux/uaccess.h>
+#include <linux/kfence.h>
+#include <linux/pkeys.h>
 
 #include <asm/firmware.h>
 #include <asm/interrupt.h>
@@ -87,7 +89,6 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
 	return __bad_area(regs, address, SEGV_MAPERR);
 }
 
-#ifdef CONFIG_PPC_MEM_KEYS
 static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
 				    struct vm_area_struct *vma)
 {
@@ -127,7 +128,6 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
 
 	return 0;
 }
-#endif
 
 static noinline int bad_access(struct pt_regs *regs, unsigned long address)
 {
@@ -197,7 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
 static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 			     unsigned long address, bool is_write)
 {
-	int is_exec = TRAP(regs) == 0x400;
+	int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
 
 	/* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
 	if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
@@ -234,7 +234,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 	return false;
 }
 
-#ifdef CONFIG_PPC_MEM_KEYS
 static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
 			      struct vm_area_struct *vma)
 {
@@ -248,7 +247,6 @@ static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
 
 	return false;
 }
-#endif
 
 static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma)
 {
@@ -393,7 +391,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 	struct vm_area_struct * vma;
 	struct mm_struct *mm = current->mm;
 	unsigned int flags = FAULT_FLAG_DEFAULT;
- 	int is_exec = TRAP(regs) == 0x400;
+	int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
 	int is_user = user_mode(regs);
 	int is_write = page_fault_is_write(error_code);
 	vm_fault_t fault, major = 0;
@@ -418,8 +416,12 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 	 * take a page fault to a kernel address or a page fault to a user
 	 * address outside of dedicated places
 	 */
-	if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write)))
+	if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) {
+		if (kfence_handle_page_fault(address, is_write, regs))
+			return 0;
+
 		return SIGSEGV;
+	}
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -492,11 +494,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 			return bad_area(regs, address);
 	}
 
-#ifdef CONFIG_PPC_MEM_KEYS
 	if (unlikely(access_pkey_error(is_write, is_exec,
 				       (error_code & DSISR_KEYFAULT), vma)))
 		return bad_access_pkey(regs, address, vma);
-#endif /* CONFIG_PPC_MEM_KEYS */
 
 	if (unlikely(access_error(is_write, is_exec, vma)))
 		return bad_access(regs, address);
@@ -539,39 +539,25 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 }
 NOKPROBE_SYMBOL(___do_page_fault);
 
-static long __do_page_fault(struct pt_regs *regs)
+static __always_inline void __do_page_fault(struct pt_regs *regs)
 {
-	const struct exception_table_entry *entry;
 	long err;
 
 	err = ___do_page_fault(regs, regs->dar, regs->dsisr);
-	if (likely(!err))
-		return err;
-
-	entry = search_exception_tables(regs->nip);
-	if (likely(entry)) {
-		instruction_pointer_set(regs, extable_fixup(entry));
-		return 0;
-	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
-		__bad_page_fault(regs, err);
-		return 0;
-	} else {
-		/* 32 and 64e handle the bad page fault in asm */
-		return err;
-	}
+	if (unlikely(err))
+		bad_page_fault(regs, err);
 }
-NOKPROBE_SYMBOL(__do_page_fault);
 
-DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
+DEFINE_INTERRUPT_HANDLER(do_page_fault)
 {
-	return __do_page_fault(regs);
+	__do_page_fault(regs);
 }
 
 #ifdef CONFIG_PPC_BOOK3S_64
 /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
-long hash__do_page_fault(struct pt_regs *regs)
+void hash__do_page_fault(struct pt_regs *regs)
 {
-	return __do_page_fault(regs);
+	__do_page_fault(regs);
 }
 NOKPROBE_SYMBOL(hash__do_page_fault);
 #endif
@@ -581,27 +567,27 @@ NOKPROBE_SYMBOL(hash__do_page_fault);
  * It is called from the DSI and ISI handlers in head.S and from some
  * of the procedures in traps.c.
  */
-void __bad_page_fault(struct pt_regs *regs, int sig)
+static void __bad_page_fault(struct pt_regs *regs, int sig)
 {
 	int is_write = page_fault_is_write(regs->dsisr);
 
 	/* kernel has accessed a bad area */
 
 	switch (TRAP(regs)) {
-	case 0x300:
-	case 0x380:
-	case 0xe00:
+	case INTERRUPT_DATA_STORAGE:
+	case INTERRUPT_DATA_SEGMENT:
+	case INTERRUPT_H_DATA_STORAGE:
 		pr_alert("BUG: %s on %s at 0x%08lx\n",
 			 regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
 			 "Unable to handle kernel data access",
 			 is_write ? "write" : "read", regs->dar);
 		break;
-	case 0x400:
-	case 0x480:
+	case INTERRUPT_INST_STORAGE:
+	case INTERRUPT_INST_SEGMENT:
 		pr_alert("BUG: Unable to handle kernel instruction fetch%s",
 			 regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
 		break;
-	case 0x600:
+	case INTERRUPT_ALIGNMENT:
 		pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
 			 regs->dar);
 		break;
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 02c7db4087cbc764086f3453f113b908a035c362..3d690be48e845e303f448e4da8d03ecb7d251321 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -97,6 +97,9 @@ static void __init MMU_setup(void)
 	if (IS_ENABLED(CONFIG_PPC_8xx))
 		return;
 
+	if (IS_ENABLED(CONFIG_KFENCE))
+		__map_without_ltlbs = 1;
+
 	if (debug_pagealloc_enabled())
 		__map_without_ltlbs = 1;
 
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
index fa9a7a718fc670c8415213edba075a1c9e65f671..a3c30a8840768a6edc8d39d7c7cf6cb6a7bbe7f2 100644
--- a/arch/powerpc/mm/maccess.c
+++ b/arch/powerpc/mm/maccess.c
@@ -3,7 +3,28 @@
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 
+#include <asm/disassemble.h>
+#include <asm/inst.h>
+#include <asm/ppc-opcode.h>
+
 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
 {
 	return is_kernel_addr((unsigned long)unsafe_src);
 }
+
+int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src)
+{
+	unsigned int val, suffix;
+	int err;
+
+	err = copy_from_kernel_nofault(&val, src, sizeof(val));
+	if (err)
+		return err;
+	if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
+		err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4);
+		*inst = ppc_inst_prefix(val, suffix);
+	} else {
+		*inst = ppc_inst(val);
+	}
+	return err;
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 4e8ce6d85232375c09fdf18b401cb19cdf858605..6564b4d813245116d1c5151338c0442163fde097 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -12,49 +12,18 @@
  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  */
 
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/highmem.h>
-#include <linux/initrd.h>
-#include <linux/pagemap.h>
 #include <linux/suspend.h>
-#include <linux/hugetlb.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/memremap.h>
 #include <linux/dma-direct.h>
-#include <linux/kprobes.h>
 
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/mmu.h>
-#include <asm/smp.h>
 #include <asm/machdep.h>
-#include <asm/btext.h>
-#include <asm/tlb.h>
-#include <asm/sections.h>
-#include <asm/sparsemem.h>
-#include <asm/vdso.h>
-#include <asm/fixmap.h>
-#include <asm/swiotlb.h>
 #include <asm/rtas.h>
 #include <asm/kasan.h>
 #include <asm/svm.h>
-#include <asm/mmzone.h>
 
 #include <mm/mmu_decl.h>
 
-static DEFINE_MUTEX(linear_mapping_mutex);
 unsigned long long memory_limit;
 bool init_mem_is_free;
 
@@ -72,6 +41,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 EXPORT_SYMBOL(phys_mem_access_prot);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+static DEFINE_MUTEX(linear_mapping_mutex);
 
 #ifdef CONFIG_NUMA
 int memory_add_physaddr_to_nid(u64 start)
@@ -340,257 +310,6 @@ void free_initmem(void)
 	free_initmem_default(POISON_FREE_INITMEM);
 }
 
-/**
- * flush_coherent_icache() - if a CPU has a coherent icache, flush it
- * @addr: The base address to use (can be any valid address, the whole cache will be flushed)
- * Return true if the cache was flushed, false otherwise
- */
-static inline bool flush_coherent_icache(unsigned long addr)
-{
-	/*
-	 * For a snooping icache, we still need a dummy icbi to purge all the
-	 * prefetched instructions from the ifetch buffers. We also need a sync
-	 * before the icbi to order the the actual stores to memory that might
-	 * have modified instructions with the icbi.
-	 */
-	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
-		mb(); /* sync */
-		allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
-		icbi((void *)addr);
-		prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES);
-		mb(); /* sync */
-		isync();
-		return true;
-	}
-
-	return false;
-}
-
-/**
- * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
- * @start: the start address
- * @stop: the stop address (exclusive)
- */
-static void invalidate_icache_range(unsigned long start, unsigned long stop)
-{
-	unsigned long shift = l1_icache_shift();
-	unsigned long bytes = l1_icache_bytes();
-	char *addr = (char *)(start & ~(bytes - 1));
-	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
-	unsigned long i;
-
-	for (i = 0; i < size >> shift; i++, addr += bytes)
-		icbi(addr);
-
-	mb(); /* sync */
-	isync();
-}
-
-/**
- * flush_icache_range: Write any modified data cache blocks out to memory
- * and invalidate the corresponding blocks in the instruction cache
- *
- * Generic code will call this after writing memory, before executing from it.
- *
- * @start: the start address
- * @stop: the stop address (exclusive)
- */
-void flush_icache_range(unsigned long start, unsigned long stop)
-{
-	if (flush_coherent_icache(start))
-		return;
-
-	clean_dcache_range(start, stop);
-
-	if (IS_ENABLED(CONFIG_44x)) {
-		/*
-		 * Flash invalidate on 44x because we are passed kmapped
-		 * addresses and this doesn't work for userspace pages due to
-		 * the virtually tagged icache.
-		 */
-		iccci((void *)start);
-		mb(); /* sync */
-		isync();
-	} else
-		invalidate_icache_range(start, stop);
-}
-EXPORT_SYMBOL(flush_icache_range);
-
-#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
-/**
- * flush_dcache_icache_phys() - Flush a page by it's physical address
- * @physaddr: the physical address of the page
- */
-static void flush_dcache_icache_phys(unsigned long physaddr)
-{
-	unsigned long bytes = l1_dcache_bytes();
-	unsigned long nb = PAGE_SIZE / bytes;
-	unsigned long addr = physaddr & PAGE_MASK;
-	unsigned long msr, msr0;
-	unsigned long loop1 = addr, loop2 = addr;
-
-	msr0 = mfmsr();
-	msr = msr0 & ~MSR_DR;
-	/*
-	 * This must remain as ASM to prevent potential memory accesses
-	 * while the data MMU is disabled
-	 */
-	asm volatile(
-		"   mtctr %2;\n"
-		"   mtmsr %3;\n"
-		"   isync;\n"
-		"0: dcbst   0, %0;\n"
-		"   addi    %0, %0, %4;\n"
-		"   bdnz    0b;\n"
-		"   sync;\n"
-		"   mtctr %2;\n"
-		"1: icbi    0, %1;\n"
-		"   addi    %1, %1, %4;\n"
-		"   bdnz    1b;\n"
-		"   sync;\n"
-		"   mtmsr %5;\n"
-		"   isync;\n"
-		: "+&r" (loop1), "+&r" (loop2)
-		: "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
-		: "ctr", "memory");
-}
-NOKPROBE_SYMBOL(flush_dcache_icache_phys)
-#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
-
-/*
- * This is called when a page has been modified by the kernel.
- * It just marks the page as not i-cache clean.  We do the i-cache
- * flush later when the page is given to a user process, if necessary.
- */
-void flush_dcache_page(struct page *page)
-{
-	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-		return;
-	/* avoid an atomic op if possible */
-	if (test_bit(PG_dcache_clean, &page->flags))
-		clear_bit(PG_dcache_clean, &page->flags);
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-static void flush_dcache_icache_hugepage(struct page *page)
-{
-	int i;
-	void *start;
-
-	BUG_ON(!PageCompound(page));
-
-	for (i = 0; i < compound_nr(page); i++) {
-		if (!PageHighMem(page)) {
-			__flush_dcache_icache(page_address(page+i));
-		} else {
-			start = kmap_atomic(page+i);
-			__flush_dcache_icache(start);
-			kunmap_atomic(start);
-		}
-	}
-}
-
-void flush_dcache_icache_page(struct page *page)
-{
-
-	if (PageCompound(page))
-		return flush_dcache_icache_hugepage(page);
-
-#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64)
-	/* On 8xx there is no need to kmap since highmem is not supported */
-	__flush_dcache_icache(page_address(page));
-#else
-	if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
-		void *start = kmap_atomic(page);
-		__flush_dcache_icache(start);
-		kunmap_atomic(start);
-	} else {
-		unsigned long addr = page_to_pfn(page) << PAGE_SHIFT;
-
-		if (flush_coherent_icache(addr))
-			return;
-		flush_dcache_icache_phys(addr);
-	}
-#endif
-}
-EXPORT_SYMBOL(flush_dcache_icache_page);
-
-/**
- * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- * @page: the address of the page to flush
- */
-void __flush_dcache_icache(void *p)
-{
-	unsigned long addr = (unsigned long)p;
-
-	if (flush_coherent_icache(addr))
-		return;
-
-	clean_dcache_range(addr, addr + PAGE_SIZE);
-
-	/*
-	 * We don't flush the icache on 44x. Those have a virtual icache and we
-	 * don't have access to the virtual address here (it's not the page
-	 * vaddr but where it's mapped in user space). The flushing of the
-	 * icache on these is handled elsewhere, when a change in the address
-	 * space occurs, before returning to user space.
-	 */
-
-	if (mmu_has_feature(MMU_FTR_TYPE_44x))
-		return;
-
-	invalidate_icache_range(addr, addr + PAGE_SIZE);
-}
-
-void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
-{
-	clear_page(page);
-
-	/*
-	 * We shouldn't have to do this, but some versions of glibc
-	 * require it (ld.so assumes zero filled pages are icache clean)
-	 * - Anton
-	 */
-	flush_dcache_page(pg);
-}
-EXPORT_SYMBOL(clear_user_page);
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-		    struct page *pg)
-{
-	copy_page(vto, vfrom);
-
-	/*
-	 * We should be able to use the following optimisation, however
-	 * there are two problems.
-	 * Firstly a bug in some versions of binutils meant PLT sections
-	 * were not marked executable.
-	 * Secondly the first word in the GOT section is blrl, used
-	 * to establish the GOT address. Until recently the GOT was
-	 * not marked executable.
-	 * - Anton
-	 */
-#if 0
-	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
-		return;
-#endif
-
-	flush_dcache_page(pg);
-}
-
-void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
-			     unsigned long addr, int len)
-{
-	unsigned long maddr;
-
-	maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
-	flush_icache_range(maddr, maddr + len);
-	kunmap(page);
-}
-
 /*
  * System memory should not be in /proc/iomem but various tools expect it
  * (eg kdump).
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 18f20da0d3483fd7627f2c12197519dce4d9e71c..a857af401738fdf883a49501818af9691a4cd139 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -43,24 +43,26 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
 		/*
 		 * This full barrier orders the store to the cpumask above vs
-		 * a subsequent operation which allows this CPU to begin loading
-		 * translations for next.
+		 * a subsequent load which allows this CPU/MMU to begin loading
+		 * translations for 'next' from page table PTEs into the TLB.
 		 *
-		 * When using the radix MMU that operation is the load of the
+		 * When using the radix MMU, that operation is the load of the
 		 * MMU context id, which is then moved to SPRN_PID.
 		 *
 		 * For the hash MMU it is either the first load from slb_cache
-		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
-		 * copy_mm_to_paca().
+		 * in switch_slb() to preload the SLBs, or the load of
+		 * get_user_context which loads the context for the VSID hash
+		 * to insert a new SLB, in the SLB fault handler.
 		 *
 		 * On the other side, the barrier is in mm/tlb-radix.c for
-		 * radix which orders earlier stores to clear the PTEs vs
-		 * the load of mm_cpumask. And pte_xchg which does the same
-		 * thing for hash.
+		 * radix which orders earlier stores to clear the PTEs before
+		 * the load of mm_cpumask to check which CPU TLBs should be
+		 * flushed. For hash, pte_xchg to clear the PTE includes the
+		 * barrier.
 		 *
-		 * This full barrier is needed by membarrier when switching
-		 * between processes after store to rq->curr, before user-space
-		 * memory accesses.
+		 * This full barrier is also needed by membarrier when
+		 * switching between processes after store to rq->curr, before
+		 * user-space memory accesses.
 		 */
 		smp_mb();
 
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 998810e685620dbeffe84b6ca2dc2c49a5fe3019..7dac910c0b217fcfc8b4f3f091d0006adab02778 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -185,3 +185,8 @@ void ptdump_check_wx(void);
 #else
 static inline void ptdump_check_wx(void) { }
 #endif
+
+static inline bool debug_pagealloc_enabled_or_kfence(void)
+{
+	return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled();
+}
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 19a3eec1d8c52b026ee79ce0a47e1b5cb70f5d6f..71bfdbedacee81af869887f8d5329f7a35e2b099 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -149,7 +149,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 {
 	unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
 	unsigned long sinittext = __pa(_sinittext);
-	bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled();
+	bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence();
 	unsigned long boundary = strict_boundary ? sinittext : etext8;
 	unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
 
@@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 		return 0;
 
 	mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true);
-	if (debug_pagealloc_enabled()) {
+	if (debug_pagealloc_enabled_or_kfence()) {
 		top = boundary;
 	} else {
 		mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true);
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index c2dec3a68d4c422e330f407c01348b5564c4b17f..8e60af32e51e14ea100348cea1e245b756088003 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -2,8 +2,4 @@
 #
 # Arch-specific network modules
 #
-ifdef CONFIG_PPC64
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
-else
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
-endif
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d0a67a1bbaf1884d0f7f2574cb71d39ae4e8f2e2..99fad093f43ec105775d1983edfa85a37e0d1f5c 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -26,6 +26,9 @@
 /* Long jump; (unconditional 'branch') */
 #define PPC_JMP(dest)		EMIT(PPC_INST_BRANCH |			      \
 				     (((dest) - (ctx->idx * 4)) & 0x03fffffc))
+/* blr; (unconditional 'branch' with link) to absolute address */
+#define PPC_BL_ABS(dest)	EMIT(PPC_INST_BL |			      \
+				     (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc))
 /* "cond" here covers BO:BI fields. */
 #define PPC_BCC_SHORT(cond, dest)	EMIT(PPC_INST_BRANCH_COND |	      \
 					     (((cond) & 0x3ff) << 16) |	      \
@@ -42,6 +45,10 @@
 				EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));	      \
 		} } while(0)
 
+#ifdef CONFIG_PPC32
+#define PPC_EX32(r, i)		EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0))
+#endif
+
 #define PPC_LI64(d, i)		do {					      \
 		if ((long)(i) >= -2147483648 &&				      \
 				(long)(i) < 2147483648)			      \
@@ -108,6 +115,63 @@ static inline bool is_nearbranch(int offset)
 #define COND_LT		(CR0_LT | COND_CMP_TRUE)
 #define COND_LE		(CR0_GT | COND_CMP_FALSE)
 
+#define SEEN_FUNC	0x20000000 /* might call external helpers */
+#define SEEN_STACK	0x40000000 /* uses BPF stack */
+#define SEEN_TAILCALL	0x80000000 /* uses tail calls */
+
+#define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
+
+#ifdef CONFIG_PPC64
+extern const int b2p[MAX_BPF_JIT_REG + 2];
+#else
+extern const int b2p[MAX_BPF_JIT_REG + 1];
+#endif
+
+struct codegen_context {
+	/*
+	 * This is used to track register usage as well
+	 * as calls to external helpers.
+	 * - register usage is tracked with corresponding
+	 *   bits (r3-r31)
+	 * - rest of the bits can be used to track other
+	 *   things -- for now, we use bits 0 to 2
+	 *   encoded in SEEN_* macros above
+	 */
+	unsigned int seen;
+	unsigned int idx;
+	unsigned int stack_size;
+	int b2p[ARRAY_SIZE(b2p)];
+};
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	smp_wmb();	/* smp write barrier */
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
+{
+	return ctx->seen & (1 << (31 - i));
+}
+
+static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
+{
+	ctx->seen |= 1 << (31 - i);
+}
+
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+	ctx->seen &= ~(1 << (31 - i));
+}
+
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+		       u32 *addrs, bool extra_pass);
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
+
 #endif
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
deleted file mode 100644
index 448dfd4d98e184773968fc7a4b732bbe0877f520..0000000000000000000000000000000000000000
--- a/arch/powerpc/net/bpf_jit32.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * bpf_jit32.h: BPF JIT compiler for PPC
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- *
- * Split from bpf_jit.h
- */
-#ifndef _BPF_JIT32_H
-#define _BPF_JIT32_H
-
-#include <asm/asm-compat.h>
-#include "bpf_jit.h"
-
-#ifdef CONFIG_PPC64
-#define BPF_PPC_STACK_R3_OFF	48
-#define BPF_PPC_STACK_LOCALS	32
-#define BPF_PPC_STACK_BASIC	(48+64)
-#define BPF_PPC_STACK_SAVE	(18*8)
-#define BPF_PPC_STACKFRAME	(BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
-				 BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME	(48+64)
-#else
-#define BPF_PPC_STACK_R3_OFF	24
-#define BPF_PPC_STACK_LOCALS	16
-#define BPF_PPC_STACK_BASIC	(24+32)
-#define BPF_PPC_STACK_SAVE	(18*4)
-#define BPF_PPC_STACKFRAME	(BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
-				 BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME	(24+32)
-#endif
-
-#define REG_SZ         (BITS_PER_LONG/8)
-
-/*
- * Generated code register usage:
- *
- * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
- *
- * skb		r3	(Entry parameter)
- * A register	r4
- * X register	r5
- * addr param	r6
- * r7-r10	scratch
- * skb->data	r14
- * skb headlen	r15	(skb->len - skb->data_len)
- * m[0]		r16
- * m[...]	...
- * m[15]	r31
- */
-#define r_skb		3
-#define r_ret		3
-#define r_A		4
-#define r_X		5
-#define r_addr		6
-#define r_scratch1	7
-#define r_scratch2	8
-#define r_D		14
-#define r_HL		15
-#define r_M		16
-
-#ifndef __ASSEMBLY__
-
-/*
- * Assembly helpers from arch/powerpc/net/bpf_jit.S:
- */
-#define DECLARE_LOAD_FUNC(func)	\
-	extern u8 func[], func##_negative_offset[], func##_positive_offset[]
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-DECLARE_LOAD_FUNC(sk_load_byte_msh);
-
-#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i));   \
-		else {	EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));	      \
-			EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i));     \
-		else {	EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));			\
-			EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i));   \
-		else {	EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));			\
-			EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i));   \
-		else {	EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));			\
-			EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0)
-
-#ifdef CONFIG_PPC64
-#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0)
-#else
-#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0)
-#endif
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PPC64
-#define PPC_BPF_LOAD_CPU(r)		\
-	do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2);	\
-		PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index));	\
-	} while (0)
-#else
-#define PPC_BPF_LOAD_CPU(r)     \
-	do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4);		\
-		PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu));		\
-	} while(0)
-#endif
-#else
-#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0)
-#endif
-
-#define PPC_LHBRX_OFFS(r, base, i) \
-		do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0)
-#ifdef __LITTLE_ENDIAN__
-#define PPC_NTOHS_OFFS(r, base, i)	PPC_LHBRX_OFFS(r, base, i)
-#else
-#define PPC_NTOHS_OFFS(r, base, i)	PPC_LHZ_OFFS(r, base, i)
-#endif
-
-#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0)
-#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0)
-
-#define SEEN_DATAREF 0x10000 /* might call external helpers */
-#define SEEN_XREG    0x20000 /* X reg is used */
-#define SEEN_MEM     0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
-			      * storage */
-#define SEEN_MEM_MSK 0x0ffff
-
-struct codegen_context {
-	unsigned int seen;
-	unsigned int idx;
-	int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
-};
-
-#endif
-
-#endif
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 2e33c6673ff9571b25187919f65e923bc7c27e20..7b713edfa7e2617a9681a135233f078377b90e43 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -39,7 +39,7 @@
 #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
 
 /* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 2] = {
 	/* function return value */
 	[BPF_REG_0] = 8,
 	/* function arguments */
@@ -86,25 +86,6 @@ static const int b2p[] = {
 				} while(0)
 #define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0)
 
-#define SEEN_FUNC	0x1000 /* might call external helpers */
-#define SEEN_STACK	0x2000 /* uses BPF stack */
-#define SEEN_TAILCALL	0x4000 /* uses tail calls */
-
-struct codegen_context {
-	/*
-	 * This is used to track register usage as well
-	 * as calls to external helpers.
-	 * - register usage is tracked with corresponding
-	 *   bits (r3-r10 and r27-r31)
-	 * - rest of the bits can be used to track other
-	 *   things -- for now, we use bits 16 to 23
-	 *   encoded in SEEN_* macros above
-	 */
-	unsigned int seen;
-	unsigned int idx;
-	unsigned int stack_size;
-};
-
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S
deleted file mode 100644
index 2f5030d8383f9d3ff5a944db98c0c25444f5ed10..0000000000000000000000000000000000000000
--- a/arch/powerpc/net/bpf_jit_asm.S
+++ /dev/null
@@ -1,226 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* bpf_jit.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-compat.h>
-#include "bpf_jit32.h"
-
-/*
- * All of these routines are called directly from generated code,
- * whose register usage is:
- *
- * r3		skb
- * r4,r5	A,X
- * r6		*** address parameter to helper ***
- * r7-r10	scratch
- * r14		skb->data
- * r15		skb headlen
- * r16-31	M[]
- */
-
-/*
- * To consider: These helpers are so small it could be better to just
- * generate them inline.  Inline code can do the simple headlen check
- * then branch directly to slow_path_XXX if required.  (In fact, could
- * load a spare GPR with the address of slow_path_generic and pass size
- * as an argument, making the call site a mtlr, li and bllr.)
- */
-	.globl	sk_load_word
-sk_load_word:
-	PPC_LCMPI	r_addr, 0
-	blt	bpf_slow_path_word_neg
-	.globl	sk_load_word_positive_offset
-sk_load_word_positive_offset:
-	/* Are we accessing past headlen? */
-	subi	r_scratch1, r_HL, 4
-	PPC_LCMP	r_scratch1, r_addr
-	blt	bpf_slow_path_word
-	/* Nope, just hitting the header.  cr0 here is eq or gt! */
-#ifdef __LITTLE_ENDIAN__
-	lwbrx	r_A, r_D, r_addr
-#else
-	lwzx	r_A, r_D, r_addr
-#endif
-	blr	/* Return success, cr0 != LT */
-
-	.globl	sk_load_half
-sk_load_half:
-	PPC_LCMPI	r_addr, 0
-	blt	bpf_slow_path_half_neg
-	.globl	sk_load_half_positive_offset
-sk_load_half_positive_offset:
-	subi	r_scratch1, r_HL, 2
-	PPC_LCMP	r_scratch1, r_addr
-	blt	bpf_slow_path_half
-#ifdef __LITTLE_ENDIAN__
-	lhbrx	r_A, r_D, r_addr
-#else
-	lhzx	r_A, r_D, r_addr
-#endif
-	blr
-
-	.globl	sk_load_byte
-sk_load_byte:
-	PPC_LCMPI	r_addr, 0
-	blt	bpf_slow_path_byte_neg
-	.globl	sk_load_byte_positive_offset
-sk_load_byte_positive_offset:
-	PPC_LCMP	r_HL, r_addr
-	ble	bpf_slow_path_byte
-	lbzx	r_A, r_D, r_addr
-	blr
-
-/*
- * BPF_LDX | BPF_B | BPF_MSH: ldxb  4*([offset]&0xf)
- * r_addr is the offset value
- */
-	.globl sk_load_byte_msh
-sk_load_byte_msh:
-	PPC_LCMPI	r_addr, 0
-	blt	bpf_slow_path_byte_msh_neg
-	.globl sk_load_byte_msh_positive_offset
-sk_load_byte_msh_positive_offset:
-	PPC_LCMP	r_HL, r_addr
-	ble	bpf_slow_path_byte_msh
-	lbzx	r_X, r_D, r_addr
-	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
-	blr
-
-/* Call out to skb_copy_bits:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE)				\
-	mflr	r0;						\
-	PPC_STL	r0, PPC_LR_STKOFF(r1);					\
-	/* R3 goes in parameter space of caller's frame */	\
-	PPC_STL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
-	PPC_STL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
-	PPC_STL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
-	addi	r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ);		\
-	PPC_STLU	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
-	/* R3 = r_skb, as passed */				\
-	mr	r4, r_addr;					\
-	li	r6, SIZE;					\
-	bl	skb_copy_bits;					\
-	nop;							\
-	/* R3 = 0 on success */					\
-	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
-	PPC_LL	r0, PPC_LR_STKOFF(r1);					\
-	PPC_LL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
-	PPC_LL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
-	mtlr	r0;						\
-	PPC_LCMPI	r3, 0;						\
-	blt	bpf_error;	/* cr0 = LT */			\
-	PPC_LL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
-	/* Great success! */
-
-bpf_slow_path_word:
-	bpf_slow_path_common(4)
-	/* Data value is on stack, and cr0 != LT */
-	lwz	r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1)
-	blr
-
-bpf_slow_path_half:
-	bpf_slow_path_common(2)
-	lhz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
-	blr
-
-bpf_slow_path_byte:
-	bpf_slow_path_common(1)
-	lbz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
-	blr
-
-bpf_slow_path_byte_msh:
-	bpf_slow_path_common(1)
-	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
-	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
-	blr
-
-/* Call out to bpf_internal_load_pointer_neg_helper:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define sk_negative_common(SIZE)				\
-	mflr	r0;						\
-	PPC_STL	r0, PPC_LR_STKOFF(r1);					\
-	/* R3 goes in parameter space of caller's frame */	\
-	PPC_STL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
-	PPC_STL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
-	PPC_STL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
-	PPC_STLU	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
-	/* R3 = r_skb, as passed */				\
-	mr	r4, r_addr;					\
-	li	r5, SIZE;					\
-	bl	bpf_internal_load_pointer_neg_helper;		\
-	nop;							\
-	/* R3 != 0 on success */				\
-	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
-	PPC_LL	r0, PPC_LR_STKOFF(r1);					\
-	PPC_LL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
-	PPC_LL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
-	mtlr	r0;						\
-	PPC_LCMPLI	r3, 0;						\
-	beq	bpf_error_slow;	/* cr0 = EQ */			\
-	mr	r_addr, r3;					\
-	PPC_LL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
-	/* Great success! */
-
-bpf_slow_path_word_neg:
-	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	.globl	sk_load_word_negative_offset
-sk_load_word_negative_offset:
-	sk_negative_common(4)
-	lwz	r_A, 0(r_addr)
-	blr
-
-bpf_slow_path_half_neg:
-	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	.globl	sk_load_half_negative_offset
-sk_load_half_negative_offset:
-	sk_negative_common(2)
-	lhz	r_A, 0(r_addr)
-	blr
-
-bpf_slow_path_byte_neg:
-	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	.globl	sk_load_byte_negative_offset
-sk_load_byte_negative_offset:
-	sk_negative_common(1)
-	lbz	r_A, 0(r_addr)
-	blr
-
-bpf_slow_path_byte_msh_neg:
-	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	.globl	sk_load_byte_msh_negative_offset
-sk_load_byte_msh_negative_offset:
-	sk_negative_common(1)
-	lbz	r_X, 0(r_addr)
-	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
-	blr
-
-bpf_error_slow:
-	/* fabricate a cr0 = lt */
-	li	r_scratch1, -1
-	PPC_LCMPI	r_scratch1, 0
-bpf_error:
-	/* Entered with cr0 = lt */
-	li	r3, 0
-	/* Generated code will 'blt epilogue', returning 0. */
-	blr
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index e809cb5a16316bc9f9015c3f8df4b4e9277f4434..798ac4350a82c564e6e2e944b67b2fdc61af16e6 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -1,10 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* bpf_jit_comp.c: BPF JIT compiler
+/*
+ * eBPF JIT compiler
  *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ *		  IBM Corporation
  *
- * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
- * Ported to ppc32 by Denis Kirjanov <kda@linux-powerpc.org>
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
  */
 #include <linux/moduleloader.h>
 #include <asm/cacheflush.h>
@@ -12,639 +13,204 @@
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
 
-#include "bpf_jit32.h"
+#include "bpf_jit.h"
 
-static inline void bpf_flush_icache(void *start, void *end)
+static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
 {
-	smp_wmb();
-	flush_icache_range((unsigned long)start, (unsigned long)end);
+	memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
 }
 
-static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
-				   struct codegen_context *ctx)
+/* Fix the branch target addresses for subprog calls */
+static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
+				       struct codegen_context *ctx, u32 *addrs)
 {
-	int i;
-	const struct sock_filter *filter = fp->insns;
-
-	if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
-		/* Make stackframe */
-		if (ctx->seen & SEEN_DATAREF) {
-			/* If we call any helpers (for loads), save LR */
-			EMIT(PPC_INST_MFLR | __PPC_RT(R0));
-			PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
-
-			/* Back up non-volatile regs. */
-			PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D)));
-			PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL)));
-		}
-		if (ctx->seen & SEEN_MEM) {
-			/*
-			 * Conditionally save regs r15-r31 as some will be used
-			 * for M[] data.
-			 */
-			for (i = r_M; i < (r_M+16); i++) {
-				if (ctx->seen & (1 << (i-r_M)))
-					PPC_BPF_STL(i, 1, -(REG_SZ*(32-i)));
-			}
-		}
-		PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
-	}
-
-	if (ctx->seen & SEEN_DATAREF) {
-		/*
-		 * If this filter needs to access skb data,
-		 * prepare r_D and r_HL:
-		 *  r_HL = skb->len - skb->data_len
-		 *  r_D	 = skb->data
-		 */
-		PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
-							 data_len));
-		PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
-		EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1));
-		PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
-	}
+	const struct bpf_insn *insn = fp->insnsi;
+	bool func_addr_fixed;
+	u64 func_addr;
+	u32 tmp_idx;
+	int i, ret;
 
-	if (ctx->seen & SEEN_XREG) {
+	for (i = 0; i < fp->len; i++) {
 		/*
-		 * TODO: Could also detect whether first instr. sets X and
-		 * avoid this (as below, with A).
+		 * During the extra pass, only the branch target addresses for
+		 * the subprog calls need to be fixed. All other instructions
+		 * can left untouched.
+		 *
+		 * The JITed image length does not change because we already
+		 * ensure that the JITed instruction sequence for these calls
+		 * are of fixed length by padding them with NOPs.
 		 */
-		EMIT(PPC_RAW_LI(r_X, 0));
-	}
-
-	/* make sure we dont leak kernel information to user */
-	if (bpf_needs_clear_a(&filter[0]))
-		EMIT(PPC_RAW_LI(r_A, 0));
-}
-
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
-{
-	int i;
-
-	if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
-		EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME));
-		if (ctx->seen & SEEN_DATAREF) {
-			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
-			EMIT(PPC_RAW_MTLR(0));
-			PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D)));
-			PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL)));
-		}
-		if (ctx->seen & SEEN_MEM) {
-			/* Restore any saved non-vol registers */
-			for (i = r_M; i < (r_M+16); i++) {
-				if (ctx->seen & (1 << (i-r_M)))
-					PPC_BPF_LL(i, 1, -(REG_SZ*(32-i)));
-			}
-		}
-	}
-	/* The RETs have left a return value in R3. */
-
-	EMIT(PPC_RAW_BLR());
-}
-
-#define CHOOSE_LOAD_FUNC(K, func) \
-	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
-/* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
-			      struct codegen_context *ctx,
-			      unsigned int *addrs)
-{
-	const struct sock_filter *filter = fp->insns;
-	int flen = fp->len;
-	u8 *func;
-	unsigned int true_cond;
-	int i;
-
-	/* Start of epilogue code */
-	unsigned int exit_addr = addrs[flen];
-
-	for (i = 0; i < flen; i++) {
-		unsigned int K = filter[i].k;
-		u16 code = bpf_anc_helper(&filter[i]);
+		if (insn[i].code == (BPF_JMP | BPF_CALL) &&
+		    insn[i].src_reg == BPF_PSEUDO_CALL) {
+			ret = bpf_jit_get_func_addr(fp, &insn[i], true,
+						    &func_addr,
+						    &func_addr_fixed);
+			if (ret < 0)
+				return ret;
 
-		/*
-		 * addrs[] maps a BPF bytecode address into a real offset from
-		 * the start of the body code.
-		 */
-		addrs[i] = ctx->idx * 4;
-
-		switch (code) {
-			/*** ALU ops ***/
-		case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_ADD(r_A, r_A, r_X));
-			break;
-		case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
-			if (!K)
-				break;
-			EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K)));
-			if (K >= 32768)
-				EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K)));
-			break;
-		case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_SUB(r_A, r_A, r_X));
-			break;
-		case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
-			if (!K)
-				break;
-			EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K)));
-			if (K >= 32768)
-				EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K)));
-			break;
-		case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_MULW(r_A, r_A, r_X));
-			break;
-		case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
-			if (K < 32768)
-				EMIT(PPC_RAW_MULI(r_A, r_A, K));
-			else {
-				PPC_LI32(r_scratch1, K);
-				EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1));
-			}
-			break;
-		case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
-		case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_CMPWI(r_X, 0));
-			if (ctx->pc_ret0 != -1) {
-				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
-			} else {
-				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
-				EMIT(PPC_RAW_LI(r_ret, 0));
-				PPC_JMP(exit_addr);
-			}
-			if (code == (BPF_ALU | BPF_MOD | BPF_X)) {
-				EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X));
-				EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1));
-				EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1));
-			} else {
-				EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X));
-			}
-			break;
-		case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
-			PPC_LI32(r_scratch2, K);
-			EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2));
-			EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1));
-			EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1));
-			break;
-		case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
-			if (K == 1)
-				break;
-			PPC_LI32(r_scratch1, K);
-			EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1));
-			break;
-		case BPF_ALU | BPF_AND | BPF_X:
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_AND(r_A, r_A, r_X));
-			break;
-		case BPF_ALU | BPF_AND | BPF_K:
-			if (!IMM_H(K))
-				EMIT(PPC_RAW_ANDI(r_A, r_A, K));
-			else {
-				PPC_LI32(r_scratch1, K);
-				EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1));
-			}
-			break;
-		case BPF_ALU | BPF_OR | BPF_X:
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_OR(r_A, r_A, r_X));
-			break;
-		case BPF_ALU | BPF_OR | BPF_K:
-			if (IMM_L(K))
-				EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K)));
-			if (K >= 65536)
-				EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K)));
-			break;
-		case BPF_ANC | SKF_AD_ALU_XOR_X:
-		case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_XOR(r_A, r_A, r_X));
-			break;
-		case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
-			if (IMM_L(K))
-				EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K)));
-			if (K >= 65536)
-				EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K)));
-			break;
-		case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_SLW(r_A, r_A, r_X));
-			break;
-		case BPF_ALU | BPF_LSH | BPF_K:
-			if (K == 0)
-				break;
-			else
-				EMIT(PPC_RAW_SLWI(r_A, r_A, K));
-			break;
-		case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_SRW(r_A, r_A, r_X));
-			break;
-		case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
-			if (K == 0)
-				break;
-			else
-				EMIT(PPC_RAW_SRWI(r_A, r_A, K));
-			break;
-		case BPF_ALU | BPF_NEG:
-			EMIT(PPC_RAW_NEG(r_A, r_A));
-			break;
-		case BPF_RET | BPF_K:
-			PPC_LI32(r_ret, K);
-			if (!K) {
-				if (ctx->pc_ret0 == -1)
-					ctx->pc_ret0 = i;
-			}
-			/*
-			 * If this isn't the very last instruction, branch to
-			 * the epilogue if we've stuff to clean up.  Otherwise,
-			 * if there's nothing to tidy, just return.  If we /are/
-			 * the last instruction, we're about to fall through to
-			 * the epilogue to return.
-			 */
-			if (i != flen - 1) {
-				/*
-				 * Note: 'seen' is properly valid only on pass
-				 * #2.	Both parts of this conditional are the
-				 * same instruction size though, meaning the
-				 * first pass will still correctly determine the
-				 * code size/addresses.
-				 */
-				if (ctx->seen)
-					PPC_JMP(exit_addr);
-				else
-					EMIT(PPC_RAW_BLR());
-			}
-			break;
-		case BPF_RET | BPF_A:
-			EMIT(PPC_RAW_MR(r_ret, r_A));
-			if (i != flen - 1) {
-				if (ctx->seen)
-					PPC_JMP(exit_addr);
-				else
-					EMIT(PPC_RAW_BLR());
-			}
-			break;
-		case BPF_MISC | BPF_TAX: /* X = A */
-			EMIT(PPC_RAW_MR(r_X, r_A));
-			break;
-		case BPF_MISC | BPF_TXA: /* A = X */
-			ctx->seen |= SEEN_XREG;
-			EMIT(PPC_RAW_MR(r_A, r_X));
-			break;
-
-			/*** Constant loads/M[] access ***/
-		case BPF_LD | BPF_IMM: /* A = K */
-			PPC_LI32(r_A, K);
-			break;
-		case BPF_LDX | BPF_IMM: /* X = K */
-			PPC_LI32(r_X, K);
-			break;
-		case BPF_LD | BPF_MEM: /* A = mem[K] */
-			EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf)));
-			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
-			break;
-		case BPF_LDX | BPF_MEM: /* X = mem[K] */
-			EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf)));
-			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
-			break;
-		case BPF_ST: /* mem[K] = A */
-			EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A));
-			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
-			break;
-		case BPF_STX: /* mem[K] = X */
-			EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X));
-			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
-			break;
-		case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
-			BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4);
-			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
-			break;
-		case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
-			PPC_LWZ_OFFS(r_A, r_skb, K);
-			break;
-		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
-			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
-			break;
-
-			/*** Ancillary info loads ***/
-		case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
-			BUILD_BUG_ON(sizeof_field(struct sk_buff,
-						  protocol) != 2);
-			PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-							    protocol));
-			break;
-		case BPF_ANC | SKF_AD_IFINDEX:
-		case BPF_ANC | SKF_AD_HATYPE:
-			BUILD_BUG_ON(sizeof_field(struct net_device,
-						ifindex) != 4);
-			BUILD_BUG_ON(sizeof_field(struct net_device,
-						type) != 2);
-			PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
-								dev));
-			EMIT(PPC_RAW_CMPDI(r_scratch1, 0));
-			if (ctx->pc_ret0 != -1) {
-				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
-			} else {
-				/* Exit, returning 0; first pass hits here. */
-				PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12);
-				EMIT(PPC_RAW_LI(r_ret, 0));
-				PPC_JMP(exit_addr);
-			}
-			if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
-				PPC_LWZ_OFFS(r_A, r_scratch1,
-				     offsetof(struct net_device, ifindex));
-			} else {
-				PPC_LHZ_OFFS(r_A, r_scratch1,
-				     offsetof(struct net_device, type));
-			}
-
-			break;
-		case BPF_ANC | SKF_AD_MARK:
-			BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
-			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-							  mark));
-			break;
-		case BPF_ANC | SKF_AD_RXHASH:
-			BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
-			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-							  hash));
-			break;
-		case BPF_ANC | SKF_AD_VLAN_TAG:
-			BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
-
-			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-							  vlan_tci));
-			break;
-		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
-			PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET());
-			if (PKT_VLAN_PRESENT_BIT)
-				EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT));
-			if (PKT_VLAN_PRESENT_BIT < 7)
-				EMIT(PPC_RAW_ANDI(r_A, r_A, 1));
-			break;
-		case BPF_ANC | SKF_AD_QUEUE:
-			BUILD_BUG_ON(sizeof_field(struct sk_buff,
-						  queue_mapping) != 2);
-			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
-							  queue_mapping));
-			break;
-		case BPF_ANC | SKF_AD_PKTTYPE:
-			PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET());
-			EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX));
-			EMIT(PPC_RAW_SRWI(r_A, r_A, 5));
-			break;
-		case BPF_ANC | SKF_AD_CPU:
-			PPC_BPF_LOAD_CPU(r_A);
-			break;
-			/*** Absolute loads from packet header/data ***/
-		case BPF_LD | BPF_W | BPF_ABS:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
-			goto common_load;
-		case BPF_LD | BPF_H | BPF_ABS:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
-			goto common_load;
-		case BPF_LD | BPF_B | BPF_ABS:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
-		common_load:
-			/* Load from [K]. */
-			ctx->seen |= SEEN_DATAREF;
-			PPC_FUNC_ADDR(r_scratch1, func);
-			EMIT(PPC_RAW_MTLR(r_scratch1));
-			PPC_LI32(r_addr, K);
-			EMIT(PPC_RAW_BLRL());
 			/*
-			 * Helper returns 'lt' condition on error, and an
-			 * appropriate return value in r3
+			 * Save ctx->idx as this would currently point to the
+			 * end of the JITed image and set it to the offset of
+			 * the instruction sequence corresponding to the
+			 * subprog call temporarily.
 			 */
-			PPC_BCC(COND_LT, exit_addr);
-			break;
-
-			/*** Indirect loads from packet header/data ***/
-		case BPF_LD | BPF_W | BPF_IND:
-			func = sk_load_word;
-			goto common_load_ind;
-		case BPF_LD | BPF_H | BPF_IND:
-			func = sk_load_half;
-			goto common_load_ind;
-		case BPF_LD | BPF_B | BPF_IND:
-			func = sk_load_byte;
-		common_load_ind:
+			tmp_idx = ctx->idx;
+			ctx->idx = addrs[i] / 4;
+			bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
 			/*
-			 * Load from [X + K].  Negative offsets are tested for
-			 * in the helper functions.
-			 */
-			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
-			PPC_FUNC_ADDR(r_scratch1, func);
-			EMIT(PPC_RAW_MTLR(r_scratch1));
-			EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K)));
-			if (K >= 32768)
-				EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K)));
-			EMIT(PPC_RAW_BLRL());
-			/* If error, cr0.LT set */
-			PPC_BCC(COND_LT, exit_addr);
-			break;
-
-		case BPF_LDX | BPF_B | BPF_MSH:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
-			goto common_load;
-			break;
-
-			/*** Jump and branches ***/
-		case BPF_JMP | BPF_JA:
-			if (K != 0)
-				PPC_JMP(addrs[i + 1 + K]);
-			break;
-
-		case BPF_JMP | BPF_JGT | BPF_K:
-		case BPF_JMP | BPF_JGT | BPF_X:
-			true_cond = COND_GT;
-			goto cond_branch;
-		case BPF_JMP | BPF_JGE | BPF_K:
-		case BPF_JMP | BPF_JGE | BPF_X:
-			true_cond = COND_GE;
-			goto cond_branch;
-		case BPF_JMP | BPF_JEQ | BPF_K:
-		case BPF_JMP | BPF_JEQ | BPF_X:
-			true_cond = COND_EQ;
-			goto cond_branch;
-		case BPF_JMP | BPF_JSET | BPF_K:
-		case BPF_JMP | BPF_JSET | BPF_X:
-			true_cond = COND_NE;
-		cond_branch:
-			/* same targets, can avoid doing the test :) */
-			if (filter[i].jt == filter[i].jf) {
-				if (filter[i].jt > 0)
-					PPC_JMP(addrs[i + 1 + filter[i].jt]);
-				break;
-			}
-
-			switch (code) {
-			case BPF_JMP | BPF_JGT | BPF_X:
-			case BPF_JMP | BPF_JGE | BPF_X:
-			case BPF_JMP | BPF_JEQ | BPF_X:
-				ctx->seen |= SEEN_XREG;
-				EMIT(PPC_RAW_CMPLW(r_A, r_X));
-				break;
-			case BPF_JMP | BPF_JSET | BPF_X:
-				ctx->seen |= SEEN_XREG;
-				EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X));
-				break;
-			case BPF_JMP | BPF_JEQ | BPF_K:
-			case BPF_JMP | BPF_JGT | BPF_K:
-			case BPF_JMP | BPF_JGE | BPF_K:
-				if (K < 32768)
-					EMIT(PPC_RAW_CMPLWI(r_A, K));
-				else {
-					PPC_LI32(r_scratch1, K);
-					EMIT(PPC_RAW_CMPLW(r_A, r_scratch1));
-				}
-				break;
-			case BPF_JMP | BPF_JSET | BPF_K:
-				if (K < 32768)
-					/* PPC_ANDI is /only/ dot-form */
-					EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K));
-				else {
-					PPC_LI32(r_scratch1, K);
-					EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A,
-						    r_scratch1));
-				}
-				break;
-			}
-			/* Sometimes branches are constructed "backward", with
-			 * the false path being the branch and true path being
-			 * a fallthrough to the next instruction.
+			 * Restore ctx->idx here. This is safe as the length
+			 * of the JITed sequence remains unchanged.
 			 */
-			if (filter[i].jt == 0)
-				/* Swap the sense of the branch */
-				PPC_BCC(true_cond ^ COND_CMP_TRUE,
-					addrs[i + 1 + filter[i].jf]);
-			else {
-				PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
-				if (filter[i].jf != 0)
-					PPC_JMP(addrs[i + 1 + filter[i].jf]);
-			}
-			break;
-		default:
-			/* The filter contains something cruel & unusual.
-			 * We don't handle it, but also there shouldn't be
-			 * anything missing from our list.
-			 */
-			if (printk_ratelimit())
-				pr_err("BPF filter opcode %04x (@%d) unsupported\n",
-				       filter[i].code, i);
-			return -ENOTSUPP;
+			ctx->idx = tmp_idx;
 		}
-
 	}
-	/* Set end-of-body-code address for exit. */
-	addrs[i] = ctx->idx * 4;
 
 	return 0;
 }
 
-void bpf_jit_compile(struct bpf_prog *fp)
+struct powerpc64_jit_data {
+	struct bpf_binary_header *header;
+	u32 *addrs;
+	u8 *image;
+	u32 proglen;
+	struct codegen_context ctx;
+};
+
+bool bpf_jit_needs_zext(void)
+{
+	return true;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
-	unsigned int proglen;
-	unsigned int alloclen;
-	u32 *image = NULL;
+	u32 proglen;
+	u32 alloclen;
+	u8 *image = NULL;
 	u32 *code_base;
-	unsigned int *addrs;
+	u32 *addrs;
+	struct powerpc64_jit_data *jit_data;
 	struct codegen_context cgctx;
 	int pass;
-	int flen = fp->len;
+	int flen;
+	struct bpf_binary_header *bpf_hdr;
+	struct bpf_prog *org_fp = fp;
+	struct bpf_prog *tmp_fp;
+	bool bpf_blinded = false;
+	bool extra_pass = false;
+
+	if (!fp->jit_requested)
+		return org_fp;
+
+	tmp_fp = bpf_jit_blind_constants(org_fp);
+	if (IS_ERR(tmp_fp))
+		return org_fp;
+
+	if (tmp_fp != org_fp) {
+		bpf_blinded = true;
+		fp = tmp_fp;
+	}
+
+	jit_data = fp->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			fp = org_fp;
+			goto out;
+		}
+		fp->aux->jit_data = jit_data;
+	}
 
-	if (!bpf_jit_enable)
-		return;
+	flen = fp->len;
+	addrs = jit_data->addrs;
+	if (addrs) {
+		cgctx = jit_data->ctx;
+		image = jit_data->image;
+		bpf_hdr = jit_data->header;
+		proglen = jit_data->proglen;
+		alloclen = proglen + FUNCTION_DESCR_SIZE;
+		extra_pass = true;
+		goto skip_init_ctx;
+	}
 
 	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
-	if (addrs == NULL)
-		return;
+	if (addrs == NULL) {
+		fp = org_fp;
+		goto out_addrs;
+	}
 
-	/*
-	 * There are multiple assembly passes as the generated code will change
-	 * size as it settles down, figuring out the max branch offsets/exit
-	 * paths required.
-	 *
-	 * The range of standard conditional branches is +/- 32Kbytes.	Since
-	 * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
-	 * finish with 8 bytes/instruction.  Not feasible, so long jumps are
-	 * used, distinct from short branches.
-	 *
-	 * Current:
-	 *
-	 * For now, both branch types assemble to 2 words (short branches padded
-	 * with a NOP); this is less efficient, but assembly will always complete
-	 * after exactly 3 passes:
-	 *
-	 * First pass: No code buffer; Program is "faux-generated" -- no code
-	 * emitted but maximum size of output determined (and addrs[] filled
-	 * in).	 Also, we note whether we use M[], whether we use skb data, etc.
-	 * All generation choices assumed to be 'worst-case', e.g. branches all
-	 * far (2 instructions), return path code reduction not available, etc.
-	 *
-	 * Second pass: Code buffer allocated with size determined previously.
-	 * Prologue generated to support features we have seen used.  Exit paths
-	 * determined and addrs[] is filled in again, as code may be slightly
-	 * smaller as a result.
-	 *
-	 * Third pass: Code generated 'for real', and branch destinations
-	 * determined from now-accurate addrs[] map.
-	 *
-	 * Ideal:
-	 *
-	 * If we optimise this, near branches will be shorter.	On the
-	 * first assembly pass, we should err on the side of caution and
-	 * generate the biggest code.  On subsequent passes, branches will be
-	 * generated short or long and code size will reduce.  With smaller
-	 * code, more branches may fall into the short category, and code will
-	 * reduce more.
-	 *
-	 * Finally, if we see one pass generate code the same size as the
-	 * previous pass we have converged and should now generate code for
-	 * real.  Allocating at the end will also save the memory that would
-	 * otherwise be wasted by the (small) current code shrinkage.
-	 * Preferably, we should do a small number of passes (e.g. 5) and if we
-	 * haven't converged by then, get impatient and force code to generate
-	 * as-is, even if the odd branch would be left long.  The chances of a
-	 * long jump are tiny with all but the most enormous of BPF filter
-	 * inputs, so we should usually converge on the third pass.
-	 */
+	memset(&cgctx, 0, sizeof(struct codegen_context));
+	memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
+
+	/* Make sure that the stack is quadword aligned. */
+	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
-	cgctx.idx = 0;
-	cgctx.seen = 0;
-	cgctx.pc_ret0 = -1;
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
 		/* We hit something illegal or unsupported. */
-		goto out;
+		fp = org_fp;
+		goto out_addrs;
+	}
+
+	/*
+	 * If we have seen a tail call, we need a second pass.
+	 * This is because bpf_jit_emit_common_epilogue() is called
+	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+	 */
+	if (cgctx.seen & SEEN_TAILCALL) {
+		cgctx.idx = 0;
+		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+			fp = org_fp;
+			goto out_addrs;
+		}
+	}
 
+	bpf_jit_realloc_regs(&cgctx);
 	/*
 	 * Pretend to build prologue, given the features we've seen.  This will
 	 * update ctgtx.idx as it pretends to output instructions, then we can
 	 * calculate total size from idx.
 	 */
-	bpf_jit_build_prologue(fp, 0, &cgctx);
+	bpf_jit_build_prologue(0, &cgctx);
 	bpf_jit_build_epilogue(0, &cgctx);
 
 	proglen = cgctx.idx * 4;
 	alloclen = proglen + FUNCTION_DESCR_SIZE;
-	image = module_alloc(alloclen);
-	if (!image)
-		goto out;
 
-	code_base = image + (FUNCTION_DESCR_SIZE/4);
+	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
+	if (!bpf_hdr) {
+		fp = org_fp;
+		goto out_addrs;
+	}
+
+skip_init_ctx:
+	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+
+	if (extra_pass) {
+		/*
+		 * Do not touch the prologue and epilogue as they will remain
+		 * unchanged. Only fix the branch target address for subprog
+		 * calls in the body.
+		 *
+		 * This does not change the offsets and lengths of the subprog
+		 * call instruction sequences and hence, the size of the JITed
+		 * image as well.
+		 */
+		bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
+
+		/* There is no need to perform the usual passes. */
+		goto skip_codegen_passes;
+	}
 
 	/* Code generation passes 1-2 */
 	for (pass = 1; pass < 3; pass++) {
 		/* Now build the prologue, body code & epilogue for real. */
 		cgctx.idx = 0;
-		bpf_jit_build_prologue(fp, code_base, &cgctx);
-		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+		bpf_jit_build_prologue(code_base, &cgctx);
+		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
 		bpf_jit_build_epilogue(code_base, &cgctx);
 
 		if (bpf_jit_enable > 1)
@@ -652,15 +218,15 @@ void bpf_jit_compile(struct bpf_prog *fp)
 				proglen - (cgctx.idx * 4), cgctx.seen);
 	}
 
+skip_codegen_passes:
 	if (bpf_jit_enable > 1)
-		/* Note that we output the base address of the code_base
+		/*
+		 * Note that we output the base address of the code_base
 		 * rather than image, since opcodes are in code_base.
 		 */
 		bpf_jit_dump(flen, proglen, pass, code_base);
 
-	bpf_flush_icache(code_base, code_base + (proglen/4));
-
-#ifdef CONFIG_PPC64
+#ifdef PPC64_ELF_ABI_v1
 	/* Function descriptor nastiness: Address + TOC */
 	((u64 *)image)[0] = (u64)code_base;
 	((u64 *)image)[1] = local_paca->kernel_toc;
@@ -668,16 +234,38 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	fp->bpf_func = (void *)image;
 	fp->jited = 1;
+	fp->jited_len = alloclen;
+
+	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+	if (!fp->is_func || extra_pass) {
+		bpf_prog_fill_jited_linfo(fp, addrs);
+out_addrs:
+		kfree(addrs);
+		kfree(jit_data);
+		fp->aux->jit_data = NULL;
+	} else {
+		jit_data->addrs = addrs;
+		jit_data->ctx = cgctx;
+		jit_data->proglen = proglen;
+		jit_data->image = image;
+		jit_data->header = bpf_hdr;
+	}
 
 out:
-	kfree(addrs);
-	return;
+	if (bpf_blinded)
+		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
+
+	return fp;
 }
 
+/* Overriding bpf_jit_free() as we don't set images read-only. */
 void bpf_jit_free(struct bpf_prog *fp)
 {
+	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+	struct bpf_binary_header *bpf_hdr = (void *)addr;
+
 	if (fp->jited)
-		module_memfree(fp->bpf_func);
+		bpf_jit_binary_free(bpf_hdr);
 
 	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
new file mode 100644
index 0000000000000000000000000000000000000000..bbb16099e8c7fa384922f768b5ca3734670cc28b
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -0,0 +1,1100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler for PPC32
+ *
+ * Copyright 2020 Christophe Leroy <christophe.leroy@csgroup.eu>
+ *		  CS GROUP France
+ *
+ * Based on PPC64 eBPF JIT compiler by Naveen N. Rao
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ *
+ *		[	prev sp		] <-------------
+ *		[   nv gpr save area	] 16 * 4	|
+ * fp (r31) -->	[   ebpf stack space	] upto 512	|
+ *		[     frame header	] 16		|
+ * sp (r1) --->	[    stack pointer	] --------------
+ */
+
+/* for gpr non volatile registers r17 to r31 (14) + tail call */
+#define BPF_PPC_STACK_SAVE	(15 * 4 + 4)
+/* stack frame, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME(ctx)	(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size)
+
+/* BPF register usage */
+#define TMP_REG	(MAX_BPF_JIT_REG + 0)
+
+/* BPF to ppc register mappings */
+const int b2p[MAX_BPF_JIT_REG + 1] = {
+	/* function return value */
+	[BPF_REG_0] = 12,
+	/* function arguments */
+	[BPF_REG_1] = 4,
+	[BPF_REG_2] = 6,
+	[BPF_REG_3] = 8,
+	[BPF_REG_4] = 10,
+	[BPF_REG_5] = 22,
+	/* non volatile registers */
+	[BPF_REG_6] = 24,
+	[BPF_REG_7] = 26,
+	[BPF_REG_8] = 28,
+	[BPF_REG_9] = 30,
+	/* frame pointer aka BPF_REG_10 */
+	[BPF_REG_FP] = 18,
+	/* eBPF jit internal registers */
+	[BPF_REG_AX] = 20,
+	[TMP_REG] = 31,		/* 32 bits */
+};
+
+static int bpf_to_ppc(struct codegen_context *ctx, int reg)
+{
+	return ctx->b2p[reg];
+}
+
+/* PPC NVR range -- update this if we ever use NVRs below r17 */
+#define BPF_PPC_NVR_MIN		17
+#define BPF_PPC_TC		16
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+	if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC)
+		return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg);
+
+	WARN(true, "BPF JIT is asking about unknown registers, will crash the stack");
+	/* Use the hole we have left for alignment */
+	return BPF_PPC_STACKFRAME(ctx) - 4;
+}
+
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+	if (ctx->seen & SEEN_FUNC)
+		return;
+
+	while (ctx->seen & SEEN_NVREG_MASK &&
+	      (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+		int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab));
+		int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+		int i;
+
+		for (i = BPF_REG_0; i <= TMP_REG; i++) {
+			if (ctx->b2p[i] != old)
+				continue;
+			ctx->b2p[i] = new;
+			bpf_set_seen_register(ctx, new);
+			bpf_clear_seen_register(ctx, old);
+			if (i != TMP_REG) {
+				bpf_set_seen_register(ctx, new - 1);
+				bpf_clear_seen_register(ctx, old - 1);
+			}
+			break;
+		}
+	}
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+{
+	int i;
+
+	/* First arg comes in as a 32 bits pointer. */
+	EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3));
+	EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0));
+	EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx)));
+
+	/*
+	 * Initialize tail_call_cnt in stack frame if we do tail calls.
+	 * Otherwise, put in NOPs so that it can be skipped when we are
+	 * invoked through a tail call.
+	 */
+	if (ctx->seen & SEEN_TAILCALL) {
+		EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+	} else {
+		EMIT(PPC_RAW_NOP());
+	}
+
+#define BPF_TAILCALL_PROLOGUE_SIZE	16
+
+	/*
+	 * We need a stack frame, but we don't necessarily need to
+	 * save/restore LR unless we call other functions
+	 */
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_MFLR(__REG_R0));
+
+	/*
+	 * Back up non-volatile regs -- registers r18-r31
+	 */
+	for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+		if (bpf_is_seen_register(ctx, i))
+			EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+
+	/* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/
+	if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
+		EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8);
+		EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12);
+	}
+
+	/* Setup frame pointer to point to the bpf stack area */
+	if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) {
+		EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0));
+		EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1,
+				  STACK_FRAME_MIN_SIZE + ctx->stack_size));
+	}
+
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+}
+
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	int i;
+
+	/* Restore NVRs */
+	for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+		if (bpf_is_seen_register(ctx, i))
+			EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0)));
+
+	bpf_jit_emit_common_epilogue(image, ctx);
+
+	/* Tear down our stack frame */
+
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+	EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx)));
+
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_MTLR(__REG_R0));
+
+	EMIT(PPC_RAW_BLR());
+}
+
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
+{
+	s32 rel = (s32)func - (s32)(image + ctx->idx);
+
+	if (image && rel < 0x2000000 && rel >= -0x2000000) {
+		PPC_BL_ABS(func);
+	} else {
+		/* Load function address into r0 */
+		EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func)));
+		EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func)));
+		EMIT(PPC_RAW_MTLR(__REG_R0));
+		EMIT(PPC_RAW_BLRL());
+	}
+}
+
+static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+	/*
+	 * By now, the eBPF program has already setup parameters in r3-r6
+	 * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+	 * r5-r6/BPF_REG_2 - pointer to bpf_array
+	 * r7-r8/BPF_REG_3 - index in bpf_array
+	 */
+	int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2);
+	int b2p_index = bpf_to_ppc(ctx, BPF_REG_3);
+
+	/*
+	 * if (index >= array->map.max_entries)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+	EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0));
+	EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+	PPC_BCC(COND_GE, out);
+
+	/*
+	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT));
+	/* tail_call_cnt++; */
+	EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1));
+	PPC_BCC(COND_GT, out);
+
+	/* prog = array->ptrs[index]; */
+	EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29));
+	EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array));
+	EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs)));
+	EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+
+	/*
+	 * if (prog == NULL)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_CMPLWI(__REG_R3, 0));
+	PPC_BCC(COND_EQ, out);
+
+	/* goto *(prog->bpf_func + prologue_size); */
+	EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func)));
+
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+	EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE));
+
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_MTLR(__REG_R0));
+
+	EMIT(PPC_RAW_MTCTR(__REG_R3));
+
+	EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1)));
+
+	/* tear restore NVRs, ... */
+	bpf_jit_emit_common_epilogue(image, ctx);
+
+	EMIT(PPC_RAW_BCTR());
+	/* out: */
+}
+
+/* Assemble the body code between the prologue & epilogue */
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+		       u32 *addrs, bool extra_pass)
+{
+	const struct bpf_insn *insn = fp->insnsi;
+	int flen = fp->len;
+	int i, ret;
+
+	/* Start of epilogue code - will only be valid 2nd pass onwards */
+	u32 exit_addr = addrs[flen];
+
+	for (i = 0; i < flen; i++) {
+		u32 code = insn[i].code;
+		u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg);
+		u32 dst_reg_h = dst_reg - 1;
+		u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
+		u32 src_reg_h = src_reg - 1;
+		u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
+		s16 off = insn[i].off;
+		s32 imm = insn[i].imm;
+		bool func_addr_fixed;
+		u64 func_addr;
+		u32 true_cond;
+
+		/*
+		 * addrs[] maps a BPF bytecode address into a real offset from
+		 * the start of the body code.
+		 */
+		addrs[i] = ctx->idx * 4;
+
+		/*
+		 * As an optimization, we note down which registers
+		 * are used so that we can only save/restore those in our
+		 * prologue and epilogue. We do this here regardless of whether
+		 * the actual BPF instruction uses src/dst registers or not
+		 * (for instance, BPF_CALL does not use them). The expectation
+		 * is that those instructions will have src_reg/dst_reg set to
+		 * 0. Even otherwise, we just lose some prologue/epilogue
+		 * optimization but everything else should work without
+		 * any issues.
+		 */
+		if (dst_reg >= 3 && dst_reg < 32) {
+			bpf_set_seen_register(ctx, dst_reg);
+			bpf_set_seen_register(ctx, dst_reg_h);
+		}
+
+		if (src_reg >= 3 && src_reg < 32) {
+			bpf_set_seen_register(ctx, src_reg);
+			bpf_set_seen_register(ctx, src_reg_h);
+		}
+
+		switch (code) {
+		/*
+		 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
+		 */
+		case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
+			EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
+			EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg));
+			EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h));
+			break;
+		case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
+			EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
+			EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg));
+			EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h));
+			break;
+		case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+			imm = -imm;
+			fallthrough;
+		case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+			if (IMM_HA(imm) & 0xffff)
+				EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm)));
+			if (IMM_L(imm))
+				EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
+			break;
+		case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+			imm = -imm;
+			fallthrough;
+		case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+			if (!imm)
+				break;
+
+			if (imm >= -32768 && imm < 32768) {
+				EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm));
+			} else {
+				PPC_LI32(__REG_R0, imm);
+				EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0));
+			}
+			if (imm >= 0)
+				EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h));
+			else
+				EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h));
+			break;
+		case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
+			bpf_set_seen_register(ctx, tmp_reg);
+			EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h));
+			EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg));
+			EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg));
+			EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
+			EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+			EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg));
+			break;
+		case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
+			EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
+			if (imm >= -32768 && imm < 32768) {
+				EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm));
+			} else {
+				PPC_LI32(__REG_R0, imm);
+				EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0));
+			}
+			break;
+		case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
+			if (!imm) {
+				PPC_LI32(dst_reg, 0);
+				PPC_LI32(dst_reg_h, 0);
+				break;
+			}
+			if (imm == 1)
+				break;
+			if (imm == -1) {
+				EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+				EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+				break;
+			}
+			bpf_set_seen_register(ctx, tmp_reg);
+			PPC_LI32(tmp_reg, imm);
+			EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg));
+			if (imm < 0)
+				EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg));
+			EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg));
+			EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg));
+			EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+			break;
+		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
+			EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
+			EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg));
+			EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0));
+			EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+			break;
+		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
+			return -EOPNOTSUPP;
+		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
+			return -EOPNOTSUPP;
+		case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
+			if (!imm)
+				return -EINVAL;
+			if (imm == 1)
+				break;
+
+			PPC_LI32(__REG_R0, imm);
+			EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0));
+			break;
+		case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
+			if (!imm)
+				return -EINVAL;
+
+			if (!is_power_of_2((u32)imm)) {
+				bpf_set_seen_register(ctx, tmp_reg);
+				PPC_LI32(tmp_reg, imm);
+				EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg));
+				EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0));
+				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+				break;
+			}
+			if (imm == 1)
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			else
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31));
+
+			break;
+		case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
+			if (!imm)
+				return -EINVAL;
+			if (imm < 0)
+				imm = -imm;
+			if (!is_power_of_2(imm))
+				return -EOPNOTSUPP;
+			if (imm == 1)
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			else
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31));
+			EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			break;
+		case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
+			if (!imm)
+				return -EINVAL;
+			if (!is_power_of_2(abs(imm)))
+				return -EOPNOTSUPP;
+
+			if (imm < 0) {
+				EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+				EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+				imm = -imm;
+			}
+			if (imm == 1)
+				break;
+			imm = ilog2(imm);
+			EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+			EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+			EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm));
+			break;
+		case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
+			EMIT(PPC_RAW_NEG(dst_reg, dst_reg));
+			break;
+		case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+			EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+			EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+			break;
+
+		/*
+		 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
+		 */
+		case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+			EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
+			EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h));
+			break;
+		case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
+			EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+			if (imm >= 0)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			fallthrough;
+		case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
+			if (!IMM_H(imm)) {
+				EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
+			} else if (!IMM_L(imm)) {
+				EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm)));
+			} else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) {
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0,
+						    32 - fls(imm), 32 - ffs(imm)));
+			} else {
+				PPC_LI32(__REG_R0, imm);
+				EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0));
+			}
+			break;
+		case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
+			EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h));
+			break;
+		case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
+			/* Sign-extended */
+			if (imm < 0)
+				EMIT(PPC_RAW_LI(dst_reg_h, -1));
+			fallthrough;
+		case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
+			if (IMM_L(imm))
+				EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
+			if (IMM_H(imm))
+				EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm)));
+			break;
+		case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
+			if (dst_reg == src_reg) {
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			} else {
+				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
+				EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h));
+			}
+			break;
+		case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
+			if (dst_reg == src_reg)
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			else
+				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
+			if (imm < 0)
+				EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h));
+			fallthrough;
+		case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
+			if (IMM_L(imm))
+				EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
+			if (IMM_H(imm))
+				EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm)));
+			break;
+		case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
+			EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
+			bpf_set_seen_register(ctx, tmp_reg);
+			EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+			EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg));
+			EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+			EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0));
+			EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg));
+			EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0));
+			EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+			EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
+			break;
+		case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */
+			if (!imm)
+				break;
+			EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm));
+			break;
+		case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */
+			if (imm < 0)
+				return -EINVAL;
+			if (!imm)
+				break;
+			if (imm < 32) {
+				EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm));
+				EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31));
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm));
+				break;
+			}
+			if (imm < 64)
+				EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm));
+			else
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			EMIT(PPC_RAW_LI(dst_reg, 0));
+			break;
+		case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
+			EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
+			bpf_set_seen_register(ctx, tmp_reg);
+			EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+			EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+			EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+			EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+			EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+			EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg));
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+			break;
+		case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
+			if (!imm)
+				break;
+			EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm));
+			break;
+		case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
+			if (imm < 0)
+				return -EINVAL;
+			if (!imm)
+				break;
+			if (imm < 32) {
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+				EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+				EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31));
+				break;
+			}
+			if (imm < 64)
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31));
+			else
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			break;
+		case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
+			EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
+			bpf_set_seen_register(ctx, tmp_reg);
+			EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+			EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+			EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+			EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+			EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26));
+			EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg));
+			EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg));
+			EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0));
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+			break;
+		case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
+			if (!imm)
+				break;
+			EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm));
+			break;
+		case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
+			if (imm < 0)
+				return -EINVAL;
+			if (!imm)
+				break;
+			if (imm < 32) {
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+				EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+				EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm));
+				break;
+			}
+			if (imm < 64)
+				EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32));
+			else
+				EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31));
+			EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31));
+			break;
+
+		/*
+		 * MOV
+		 */
+		case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+			if (dst_reg == src_reg)
+				break;
+			EMIT(PPC_RAW_MR(dst_reg, src_reg));
+			EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h));
+			break;
+		case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
+			/* special mov32 for zext */
+			if (imm == 1)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			else if (dst_reg != src_reg)
+				EMIT(PPC_RAW_MR(dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
+			PPC_LI32(dst_reg, imm);
+			PPC_EX32(dst_reg_h, imm);
+			break;
+		case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
+			PPC_LI32(dst_reg, imm);
+			break;
+
+		/*
+		 * BPF_FROM_BE/LE
+		 */
+		case BPF_ALU | BPF_END | BPF_FROM_LE:
+			switch (imm) {
+			case 16:
+				/* Copy 16 bits to upper part */
+				EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15));
+				/* Rotate 8 bits right & mask */
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31));
+				break;
+			case 32:
+				/*
+				 * Rotate word left by 8 bits:
+				 * 2 bytes are already in their final position
+				 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
+				 */
+				EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31));
+				/* Rotate 24 bits and insert byte 1 */
+				EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7));
+				/* Rotate 24 bits and insert byte 3 */
+				EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23));
+				EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+				break;
+			case 64:
+				bpf_set_seen_register(ctx, tmp_reg);
+				EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31));
+				EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31));
+				/* Rotate 24 bits and insert byte 1 */
+				EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7));
+				EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7));
+				/* Rotate 24 bits and insert byte 3 */
+				EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23));
+				EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23));
+				EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+				EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
+				break;
+			}
+			break;
+		case BPF_ALU | BPF_END | BPF_FROM_BE:
+			switch (imm) {
+			case 16:
+				/* zero-extend 16 bits into 32 bits */
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31));
+				break;
+			case 32:
+			case 64:
+				/* nop */
+				break;
+			}
+			break;
+
+		/*
+		 * BPF_ST(X)
+		 */
+		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
+			EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+			break;
+		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+			PPC_LI32(__REG_R0, imm);
+			EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+			EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+			break;
+		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+			PPC_LI32(__REG_R0, imm);
+			EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+			EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+			break;
+		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+			PPC_LI32(__REG_R0, imm);
+			EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+			EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off));
+			EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4));
+			break;
+		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+			PPC_LI32(__REG_R0, imm);
+			EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4));
+			PPC_EX32(__REG_R0, imm);
+			EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+			break;
+
+		/*
+		 * BPF_STX XADD (atomic_add)
+		 */
+		case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
+			bpf_set_seen_register(ctx, tmp_reg);
+			/* Get offset into TMP_REG */
+			EMIT(PPC_RAW_LI(tmp_reg, off));
+			/* load value from memory into r0 */
+			EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0));
+			/* add value from src_reg into this */
+			EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg));
+			/* store result back */
+			EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg));
+			/* we're done if this succeeded */
+			PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4);
+			break;
+
+		case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
+			return -EOPNOTSUPP;
+
+		/*
+		 * BPF_LDX
+		 */
+		case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+			EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+			if (!fp->aux->verifier_zext)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			break;
+		case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+			EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+			if (!fp->aux->verifier_zext)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			break;
+		case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+			if (!fp->aux->verifier_zext)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			break;
+		case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+			EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+			break;
+
+		/*
+		 * Doubleword load
+		 * 16 byte instruction that uses two 'struct bpf_insn'
+		 */
+		case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+			PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
+			PPC_LI32(dst_reg, (u32)insn[i].imm);
+			/* Adjust for two bpf instructions */
+			addrs[++i] = ctx->idx * 4;
+			break;
+
+		/*
+		 * Return/Exit
+		 */
+		case BPF_JMP | BPF_EXIT:
+			/*
+			 * If this isn't the very last instruction, branch to
+			 * the epilogue. If we _are_ the last instruction,
+			 * we'll just fall through to the epilogue.
+			 */
+			if (i != flen - 1)
+				PPC_JMP(exit_addr);
+			/* else fall through to the epilogue */
+			break;
+
+		/*
+		 * Call kernel helper or bpf function
+		 */
+		case BPF_JMP | BPF_CALL:
+			ctx->seen |= SEEN_FUNC;
+
+			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+						    &func_addr, &func_addr_fixed);
+			if (ret < 0)
+				return ret;
+
+			if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
+				EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8));
+				EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12));
+			}
+
+			bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
+			EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3));
+			EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4));
+			break;
+
+		/*
+		 * Jumps and branches
+		 */
+		case BPF_JMP | BPF_JA:
+			PPC_JMP(addrs[i + 1 + off]);
+			break;
+
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP32 | BPF_JGT | BPF_K:
+		case BPF_JMP32 | BPF_JGT | BPF_X:
+		case BPF_JMP32 | BPF_JSGT | BPF_K:
+		case BPF_JMP32 | BPF_JSGT | BPF_X:
+			true_cond = COND_GT;
+			goto cond_branch;
+		case BPF_JMP | BPF_JLT | BPF_K:
+		case BPF_JMP | BPF_JLT | BPF_X:
+		case BPF_JMP | BPF_JSLT | BPF_K:
+		case BPF_JMP | BPF_JSLT | BPF_X:
+		case BPF_JMP32 | BPF_JLT | BPF_K:
+		case BPF_JMP32 | BPF_JLT | BPF_X:
+		case BPF_JMP32 | BPF_JSLT | BPF_K:
+		case BPF_JMP32 | BPF_JSLT | BPF_X:
+			true_cond = COND_LT;
+			goto cond_branch;
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JSGE | BPF_K:
+		case BPF_JMP | BPF_JSGE | BPF_X:
+		case BPF_JMP32 | BPF_JGE | BPF_K:
+		case BPF_JMP32 | BPF_JGE | BPF_X:
+		case BPF_JMP32 | BPF_JSGE | BPF_K:
+		case BPF_JMP32 | BPF_JSGE | BPF_X:
+			true_cond = COND_GE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JLE | BPF_K:
+		case BPF_JMP | BPF_JLE | BPF_X:
+		case BPF_JMP | BPF_JSLE | BPF_K:
+		case BPF_JMP | BPF_JSLE | BPF_X:
+		case BPF_JMP32 | BPF_JLE | BPF_K:
+		case BPF_JMP32 | BPF_JLE | BPF_X:
+		case BPF_JMP32 | BPF_JSLE | BPF_K:
+		case BPF_JMP32 | BPF_JSLE | BPF_X:
+			true_cond = COND_LE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP32 | BPF_JEQ | BPF_K:
+		case BPF_JMP32 | BPF_JEQ | BPF_X:
+			true_cond = COND_EQ;
+			goto cond_branch;
+		case BPF_JMP | BPF_JNE | BPF_K:
+		case BPF_JMP | BPF_JNE | BPF_X:
+		case BPF_JMP32 | BPF_JNE | BPF_K:
+		case BPF_JMP32 | BPF_JNE | BPF_X:
+			true_cond = COND_NE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+		case BPF_JMP32 | BPF_JSET | BPF_K:
+		case BPF_JMP32 | BPF_JSET | BPF_X:
+			true_cond = COND_NE;
+			/* fallthrough; */
+
+cond_branch:
+			switch (code) {
+			case BPF_JMP | BPF_JGT | BPF_X:
+			case BPF_JMP | BPF_JLT | BPF_X:
+			case BPF_JMP | BPF_JGE | BPF_X:
+			case BPF_JMP | BPF_JLE | BPF_X:
+			case BPF_JMP | BPF_JEQ | BPF_X:
+			case BPF_JMP | BPF_JNE | BPF_X:
+				/* unsigned comparison */
+				EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h));
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+				EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+				break;
+			case BPF_JMP32 | BPF_JGT | BPF_X:
+			case BPF_JMP32 | BPF_JLT | BPF_X:
+			case BPF_JMP32 | BPF_JGE | BPF_X:
+			case BPF_JMP32 | BPF_JLE | BPF_X:
+			case BPF_JMP32 | BPF_JEQ | BPF_X:
+			case BPF_JMP32 | BPF_JNE | BPF_X:
+				/* unsigned comparison */
+				EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+				break;
+			case BPF_JMP | BPF_JSGT | BPF_X:
+			case BPF_JMP | BPF_JSLT | BPF_X:
+			case BPF_JMP | BPF_JSGE | BPF_X:
+			case BPF_JMP | BPF_JSLE | BPF_X:
+				/* signed comparison */
+				EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h));
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+				EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+				break;
+			case BPF_JMP32 | BPF_JSGT | BPF_X:
+			case BPF_JMP32 | BPF_JSLT | BPF_X:
+			case BPF_JMP32 | BPF_JSGE | BPF_X:
+			case BPF_JMP32 | BPF_JSLE | BPF_X:
+				/* signed comparison */
+				EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
+				break;
+			case BPF_JMP | BPF_JSET | BPF_X:
+				EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h));
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+				EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+				break;
+			case BPF_JMP32 | BPF_JSET | BPF_X: {
+				EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+				break;
+			case BPF_JMP | BPF_JNE | BPF_K:
+			case BPF_JMP | BPF_JEQ | BPF_K:
+			case BPF_JMP | BPF_JGT | BPF_K:
+			case BPF_JMP | BPF_JLT | BPF_K:
+			case BPF_JMP | BPF_JGE | BPF_K:
+			case BPF_JMP | BPF_JLE | BPF_K:
+				/*
+				 * Need sign-extended load, so only positive
+				 * values can be used as imm in cmplwi
+				 */
+				if (imm >= 0 && imm < 32768) {
+					EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0));
+					PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+				} else {
+					/* sign-extending load ... but unsigned comparison */
+					PPC_EX32(__REG_R0, imm);
+					EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0));
+					PPC_LI32(__REG_R0, imm);
+					PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+				}
+				break;
+			case BPF_JMP32 | BPF_JNE | BPF_K:
+			case BPF_JMP32 | BPF_JEQ | BPF_K:
+			case BPF_JMP32 | BPF_JGT | BPF_K:
+			case BPF_JMP32 | BPF_JLT | BPF_K:
+			case BPF_JMP32 | BPF_JGE | BPF_K:
+			case BPF_JMP32 | BPF_JLE | BPF_K:
+				if (imm >= 0 && imm < 65536) {
+					EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+				} else {
+					PPC_LI32(__REG_R0, imm);
+					EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+				}
+				break;
+			}
+			case BPF_JMP | BPF_JSGT | BPF_K:
+			case BPF_JMP | BPF_JSLT | BPF_K:
+			case BPF_JMP | BPF_JSGE | BPF_K:
+			case BPF_JMP | BPF_JSLE | BPF_K:
+				if (imm >= 0 && imm < 65536) {
+					EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+					PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+				} else {
+					/* sign-extending load */
+					EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+					PPC_LI32(__REG_R0, imm);
+					PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+				}
+				break;
+			case BPF_JMP32 | BPF_JSGT | BPF_K:
+			case BPF_JMP32 | BPF_JSLT | BPF_K:
+			case BPF_JMP32 | BPF_JSGE | BPF_K:
+			case BPF_JMP32 | BPF_JSLE | BPF_K:
+				/*
+				 * signed comparison, so any 16-bit value
+				 * can be used in cmpwi
+				 */
+				if (imm >= -32768 && imm < 32768) {
+					EMIT(PPC_RAW_CMPWI(dst_reg, imm));
+				} else {
+					/* sign-extending load */
+					PPC_LI32(__REG_R0, imm);
+					EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0));
+				}
+				break;
+			case BPF_JMP | BPF_JSET | BPF_K:
+				/* andi does not sign-extend the immediate */
+				if (imm >= 0 && imm < 32768) {
+					/* PPC_ANDI is _only/always_ dot-form */
+					EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+				} else {
+					PPC_LI32(__REG_R0, imm);
+					if (imm < 0) {
+						EMIT(PPC_RAW_CMPWI(dst_reg_h, 0));
+						PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					}
+					EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+				}
+				break;
+			case BPF_JMP32 | BPF_JSET | BPF_K:
+				/* andi does not sign-extend the immediate */
+				if (imm >= -32768 && imm < 32768) {
+					/* PPC_ANDI is _only/always_ dot-form */
+					EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+				} else {
+					PPC_LI32(__REG_R0, imm);
+					EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+				}
+				break;
+			}
+			PPC_BCC(true_cond, addrs[i + 1 + off]);
+			break;
+
+		/*
+		 * Tail call
+		 */
+		case BPF_JMP | BPF_TAIL_CALL:
+			ctx->seen |= SEEN_TAILCALL;
+			bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+			break;
+
+		default:
+			/*
+			 * The filter contains something cruel & unusual.
+			 * We don't handle it, but also there shouldn't be
+			 * anything missing from our list.
+			 */
+			pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i);
+			return -EOPNOTSUPP;
+		}
+		if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext &&
+		    !insn_is_zext(&insn[i + 1]))
+			EMIT(PPC_RAW_LI(dst_reg_h, 0));
+	}
+
+	/* Set end-of-body-code address for exit. */
+	addrs[i] = ctx->idx * 4;
+
+	return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index aaf1a887f653b81f6a6641aa3f5906ac1458daf6..57a8c1153851a05d9b40607a84bc11c96159dc47 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -18,27 +18,6 @@
 
 #include "bpf_jit64.h"
 
-static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
-{
-	memset32(area, BREAKPOINT_INSTRUCTION, size/4);
-}
-
-static inline void bpf_flush_icache(void *start, void *end)
-{
-	smp_wmb();
-	flush_icache_range((unsigned long)start, (unsigned long)end);
-}
-
-static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
-{
-	return (ctx->seen & (1 << (31 - b2p[i])));
-}
-
-static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
-{
-	ctx->seen |= (1 << (31 - b2p[i]));
-}
-
 static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
 {
 	/*
@@ -47,7 +26,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
 	 * - the bpf program uses its stack area
 	 * The latter condition is deduced from the usage of BPF_REG_FP
 	 */
-	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
+	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]);
 }
 
 /*
@@ -85,7 +64,11 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
 	BUG();
 }
 
-static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
 	int i;
 
@@ -124,11 +107,11 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 	 * in the protected zone below the previous stack frame
 	 */
 	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-		if (bpf_is_seen_register(ctx, i))
+		if (bpf_is_seen_register(ctx, b2p[i]))
 			PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 
 	/* Setup frame pointer to point to the bpf stack area */
-	if (bpf_is_seen_register(ctx, BPF_REG_FP))
+	if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP]))
 		EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1,
 				STACK_FRAME_MIN_SIZE + ctx->stack_size));
 }
@@ -139,7 +122,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
 
 	/* Restore NVRs */
 	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-		if (bpf_is_seen_register(ctx, i))
+		if (bpf_is_seen_register(ctx, b2p[i]))
 			PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 
 	/* Tear down our stack frame */
@@ -152,7 +135,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
 	}
 }
 
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 {
 	bpf_jit_emit_common_epilogue(image, ctx);
 
@@ -187,8 +170,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
 	EMIT(PPC_RAW_BLRL());
 }
 
-static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
-				       u64 func)
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
 {
 	unsigned int i, ctx_idx = ctx->idx;
 
@@ -289,9 +271,8 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 }
 
 /* Assemble the body code between the prologue & epilogue */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
-			      struct codegen_context *ctx,
-			      u32 *addrs, bool extra_pass)
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+		       u32 *addrs, bool extra_pass)
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
@@ -330,9 +311,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 		 * any issues.
 		 */
 		if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
-			bpf_set_seen_register(ctx, insn[i].dst_reg);
+			bpf_set_seen_register(ctx, dst_reg);
 		if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
-			bpf_set_seen_register(ctx, insn[i].src_reg);
+			bpf_set_seen_register(ctx, src_reg);
 
 		switch (code) {
 		/*
@@ -1026,249 +1007,3 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 
 	return 0;
 }
-
-/* Fix the branch target addresses for subprog calls */
-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
-				       struct codegen_context *ctx, u32 *addrs)
-{
-	const struct bpf_insn *insn = fp->insnsi;
-	bool func_addr_fixed;
-	u64 func_addr;
-	u32 tmp_idx;
-	int i, ret;
-
-	for (i = 0; i < fp->len; i++) {
-		/*
-		 * During the extra pass, only the branch target addresses for
-		 * the subprog calls need to be fixed. All other instructions
-		 * can left untouched.
-		 *
-		 * The JITed image length does not change because we already
-		 * ensure that the JITed instruction sequence for these calls
-		 * are of fixed length by padding them with NOPs.
-		 */
-		if (insn[i].code == (BPF_JMP | BPF_CALL) &&
-		    insn[i].src_reg == BPF_PSEUDO_CALL) {
-			ret = bpf_jit_get_func_addr(fp, &insn[i], true,
-						    &func_addr,
-						    &func_addr_fixed);
-			if (ret < 0)
-				return ret;
-
-			/*
-			 * Save ctx->idx as this would currently point to the
-			 * end of the JITed image and set it to the offset of
-			 * the instruction sequence corresponding to the
-			 * subprog call temporarily.
-			 */
-			tmp_idx = ctx->idx;
-			ctx->idx = addrs[i] / 4;
-			bpf_jit_emit_func_call_rel(image, ctx, func_addr);
-
-			/*
-			 * Restore ctx->idx here. This is safe as the length
-			 * of the JITed sequence remains unchanged.
-			 */
-			ctx->idx = tmp_idx;
-		}
-	}
-
-	return 0;
-}
-
-struct powerpc64_jit_data {
-	struct bpf_binary_header *header;
-	u32 *addrs;
-	u8 *image;
-	u32 proglen;
-	struct codegen_context ctx;
-};
-
-bool bpf_jit_needs_zext(void)
-{
-	return true;
-}
-
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
-{
-	u32 proglen;
-	u32 alloclen;
-	u8 *image = NULL;
-	u32 *code_base;
-	u32 *addrs;
-	struct powerpc64_jit_data *jit_data;
-	struct codegen_context cgctx;
-	int pass;
-	int flen;
-	struct bpf_binary_header *bpf_hdr;
-	struct bpf_prog *org_fp = fp;
-	struct bpf_prog *tmp_fp;
-	bool bpf_blinded = false;
-	bool extra_pass = false;
-
-	if (!fp->jit_requested)
-		return org_fp;
-
-	tmp_fp = bpf_jit_blind_constants(org_fp);
-	if (IS_ERR(tmp_fp))
-		return org_fp;
-
-	if (tmp_fp != org_fp) {
-		bpf_blinded = true;
-		fp = tmp_fp;
-	}
-
-	jit_data = fp->aux->jit_data;
-	if (!jit_data) {
-		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
-		if (!jit_data) {
-			fp = org_fp;
-			goto out;
-		}
-		fp->aux->jit_data = jit_data;
-	}
-
-	flen = fp->len;
-	addrs = jit_data->addrs;
-	if (addrs) {
-		cgctx = jit_data->ctx;
-		image = jit_data->image;
-		bpf_hdr = jit_data->header;
-		proglen = jit_data->proglen;
-		alloclen = proglen + FUNCTION_DESCR_SIZE;
-		extra_pass = true;
-		goto skip_init_ctx;
-	}
-
-	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
-	if (addrs == NULL) {
-		fp = org_fp;
-		goto out_addrs;
-	}
-
-	memset(&cgctx, 0, sizeof(struct codegen_context));
-
-	/* Make sure that the stack is quadword aligned. */
-	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
-
-	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
-		/* We hit something illegal or unsupported. */
-		fp = org_fp;
-		goto out_addrs;
-	}
-
-	/*
-	 * If we have seen a tail call, we need a second pass.
-	 * This is because bpf_jit_emit_common_epilogue() is called
-	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
-	 */
-	if (cgctx.seen & SEEN_TAILCALL) {
-		cgctx.idx = 0;
-		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
-			fp = org_fp;
-			goto out_addrs;
-		}
-	}
-
-	/*
-	 * Pretend to build prologue, given the features we've seen.  This will
-	 * update ctgtx.idx as it pretends to output instructions, then we can
-	 * calculate total size from idx.
-	 */
-	bpf_jit_build_prologue(0, &cgctx);
-	bpf_jit_build_epilogue(0, &cgctx);
-
-	proglen = cgctx.idx * 4;
-	alloclen = proglen + FUNCTION_DESCR_SIZE;
-
-	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
-			bpf_jit_fill_ill_insns);
-	if (!bpf_hdr) {
-		fp = org_fp;
-		goto out_addrs;
-	}
-
-skip_init_ctx:
-	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
-
-	if (extra_pass) {
-		/*
-		 * Do not touch the prologue and epilogue as they will remain
-		 * unchanged. Only fix the branch target address for subprog
-		 * calls in the body.
-		 *
-		 * This does not change the offsets and lengths of the subprog
-		 * call instruction sequences and hence, the size of the JITed
-		 * image as well.
-		 */
-		bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
-
-		/* There is no need to perform the usual passes. */
-		goto skip_codegen_passes;
-	}
-
-	/* Code generation passes 1-2 */
-	for (pass = 1; pass < 3; pass++) {
-		/* Now build the prologue, body code & epilogue for real. */
-		cgctx.idx = 0;
-		bpf_jit_build_prologue(code_base, &cgctx);
-		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
-		bpf_jit_build_epilogue(code_base, &cgctx);
-
-		if (bpf_jit_enable > 1)
-			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
-				proglen - (cgctx.idx * 4), cgctx.seen);
-	}
-
-skip_codegen_passes:
-	if (bpf_jit_enable > 1)
-		/*
-		 * Note that we output the base address of the code_base
-		 * rather than image, since opcodes are in code_base.
-		 */
-		bpf_jit_dump(flen, proglen, pass, code_base);
-
-#ifdef PPC64_ELF_ABI_v1
-	/* Function descriptor nastiness: Address + TOC */
-	((u64 *)image)[0] = (u64)code_base;
-	((u64 *)image)[1] = local_paca->kernel_toc;
-#endif
-
-	fp->bpf_func = (void *)image;
-	fp->jited = 1;
-	fp->jited_len = alloclen;
-
-	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
-	if (!fp->is_func || extra_pass) {
-		bpf_prog_fill_jited_linfo(fp, addrs);
-out_addrs:
-		kfree(addrs);
-		kfree(jit_data);
-		fp->aux->jit_data = NULL;
-	} else {
-		jit_data->addrs = addrs;
-		jit_data->ctx = cgctx;
-		jit_data->proglen = proglen;
-		jit_data->image = image;
-		jit_data->header = bpf_hdr;
-	}
-
-out:
-	if (bpf_blinded)
-		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
-
-	return fp;
-}
-
-/* Overriding bpf_jit_free() as we don't set images read-only. */
-void bpf_jit_free(struct bpf_prog *fp)
-{
-	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
-	struct bpf_binary_header *bpf_hdr = (void *)addr;
-
-	if (fp->jited)
-		bpf_jit_binary_free(bpf_hdr);
-
-	bpf_prog_unlock_free(fp);
-}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 766f064f00fbf25322016546e2bd0c81fe3466eb..16d4d1b6a1ffb5b6cff94f3df6d380b139829318 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -17,6 +17,7 @@
 #include <asm/firmware.h>
 #include <asm/ptrace.h>
 #include <asm/code-patching.h>
+#include <asm/interrupt.h>
 
 #ifdef CONFIG_PPC64
 #include "internal.h"
@@ -168,7 +169,7 @@ static bool regs_use_siar(struct pt_regs *regs)
 	 * they have not been setup using perf_read_regs() and so regs->result
 	 * is something random.
 	 */
-	return ((TRAP(regs) == 0xf00) && regs->result);
+	return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result);
 }
 
 /*
@@ -347,7 +348,7 @@ static inline void perf_read_regs(struct pt_regs *regs)
 	 * hypervisor samples as well as samples in the kernel with
 	 * interrupts off hence the userspace check.
 	 */
-	if (TRAP(regs) != 0xf00)
+	if (TRAP(regs) != INTERRUPT_PERFMON)
 		use_siar = 0;
 	else if ((ppmu->flags & PPMU_NO_SIAR))
 		use_siar = 0;
@@ -1963,6 +1964,17 @@ static int power_pmu_event_init(struct perf_event *event)
 		return -ENOENT;
 	}
 
+	/*
+	 * PMU config registers have fields that are
+	 * reserved and some specific values for bit fields are reserved.
+	 * For ex., MMCRA[61:62] is Randome Sampling Mode (SM)
+	 * and value of 0b11 to this field is reserved.
+	 * Check for invalid values in attr.config.
+	 */
+	if (ppmu->check_attr_config &&
+	    ppmu->check_attr_config(event))
+		return -EINVAL;
+
 	event->hw.config_base = ev;
 	event->hw.idx = 0;
 
@@ -2206,9 +2218,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 						ppmu->get_mem_data_src)
 			ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
 
-		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
+		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
 						ppmu->get_mem_weight)
-			ppmu->get_mem_weight(&data.weight.full);
+			ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
 
 		if (perf_event_overflow(event, &data, regs))
 			power_pmu_stop(event, 0);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index e5eb33255066c4bc1e8638fc9597aa3db6845e28..1816f560a4652329490f502b662b0a6b681512bb 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -226,14 +226,14 @@ static struct attribute_group event_long_desc_group = {
 
 static struct kmem_cache *hv_page_cache;
 
-DEFINE_PER_CPU(int, hv_24x7_txn_flags);
-DEFINE_PER_CPU(int, hv_24x7_txn_err);
+static DEFINE_PER_CPU(int, hv_24x7_txn_flags);
+static DEFINE_PER_CPU(int, hv_24x7_txn_err);
 
 struct hv_24x7_hw {
 	struct perf_event *events[255];
 };
 
-DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
+static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
 
 /*
  * request_buffer and result_buffer are not required to be 4k aligned,
@@ -241,8 +241,8 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
  * the simplest way to ensure that.
  */
 #define H24x7_DATA_BUFFER_SIZE	4096
-DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
-DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
 
 static unsigned int max_num_requests(int interface_version)
 {
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index e4f577da33d8b15b3886571efb62bb53278565f3..f92bf5f6b74f144ab612987779f79cb9b067776c 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -21,7 +21,7 @@ PMU_FORMAT_ATTR(thresh_stop,	"config:32-35");
 PMU_FORMAT_ATTR(thresh_start,	"config:36-39");
 PMU_FORMAT_ATTR(thresh_cmp,	"config:40-49");
 
-struct attribute *isa207_pmu_format_attr[] = {
+static struct attribute *isa207_pmu_format_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_pmcxsel.attr,
 	&format_attr_mark.attr,
@@ -275,17 +275,47 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
 
 	sier = mfspr(SPRN_SIER);
 	val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
-	if (val == 1 || val == 2) {
-		idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
-		sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
+	if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31)))
+		return;
+
+	idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
+	sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
+
+	dsrc->val = isa207_find_source(idx, sub_idx);
+	if (val == 7) {
+		u64 mmcra;
+		u32 op_type;
 
-		dsrc->val = isa207_find_source(idx, sub_idx);
+		/*
+		 * Type 0b111 denotes either larx or stcx instruction. Use the
+		 * MMCRA sampling bits [57:59] along with the type value
+		 * to determine the exact instruction type. If the sampling
+		 * criteria is neither load or store, set the type as default
+		 * to NA.
+		 */
+		mmcra = mfspr(SPRN_MMCRA);
+
+		op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK;
+		switch (op_type) {
+		case 5:
+			dsrc->val |= P(OP, LOAD);
+			break;
+		case 7:
+			dsrc->val |= P(OP, STORE);
+			break;
+		default:
+			dsrc->val |= P(OP, NA);
+			break;
+		}
+	} else {
 		dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE);
 	}
 }
 
-void isa207_get_mem_weight(u64 *weight)
+void isa207_get_mem_weight(u64 *weight, u64 type)
 {
+	union perf_sample_weight *weight_fields;
+	u64 weight_lat;
 	u64 mmcra = mfspr(SPRN_MMCRA);
 	u64 exp = MMCRA_THR_CTR_EXP(mmcra);
 	u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
@@ -295,10 +325,31 @@ void isa207_get_mem_weight(u64 *weight)
 	if (cpu_has_feature(CPU_FTR_ARCH_31))
 		mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
 
-	if (val == 0 || val == 7)
-		*weight = 0;
+	if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31)))
+		weight_lat = 0;
 	else
-		*weight = mantissa << (2 * exp);
+		weight_lat = mantissa << (2 * exp);
+
+	/*
+	 * Use 64 bit weight field (full) if sample type is
+	 * WEIGHT.
+	 *
+	 * if sample type is WEIGHT_STRUCT:
+	 * - store memory latency in the lower 32 bits.
+	 * - For ISA v3.1, use remaining two 16 bit fields of
+	 *   perf_sample_weight to store cycle counter values
+	 *   from sier2.
+	 */
+	weight_fields = (union perf_sample_weight *)weight;
+	if (type & PERF_SAMPLE_WEIGHT)
+		weight_fields->full = weight_lat;
+	else {
+		weight_fields->var1_dw = (u32)weight_lat;
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
+			weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
+		}
+	}
 }
 
 int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
@@ -447,8 +498,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp,
 	 * EBB events are pinned & exclusive, so this should never actually
 	 * hit, but we leave it as a fallback in case.
 	 */
-	mask  |= CNST_EBB_VAL(ebb);
-	value |= CNST_EBB_MASK;
+	mask  |= CNST_EBB_MASK;
+	value |= CNST_EBB_VAL(ebb);
 
 	*maskp = mask;
 	*valp = value;
@@ -694,3 +745,45 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
 
 	return num_alt;
 }
+
+int isa3XX_check_attr_config(struct perf_event *ev)
+{
+	u64 val, sample_mode;
+	u64 event = ev->attr.config;
+
+	val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+	sample_mode = val & 0x3;
+
+	/*
+	 * MMCRA[61:62] is Random Sampling Mode (SM).
+	 * value of 0b11 is reserved.
+	 */
+	if (sample_mode == 0x3)
+		return -EINVAL;
+
+	/*
+	 * Check for all reserved value
+	 * Source: Performance Monitoring Unit User Guide
+	 */
+	switch (val) {
+	case 0x5:
+	case 0x9:
+	case 0xD:
+	case 0x19:
+	case 0x1D:
+	case 0x1A:
+	case 0x1E:
+		return -EINVAL;
+	}
+
+	/*
+	 * MMCRA[48:51]/[52:55]) Threshold Start/Stop
+	 * Events Selection.
+	 * 0b11110000/0b00001111 is reserved.
+	 */
+	val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
+	if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF))
+		return -EINVAL;
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 1af0e8c97ac7bf7b473ed51a3b14e314b00649ff..4a2cbc3dc047b817b2884c40894f766f52d3de63 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -220,6 +220,7 @@
 /* Bits in MMCRA for PowerISA v2.07 */
 #define MMCRA_SAMP_MODE_SHIFT		1
 #define MMCRA_SAMP_ELIG_SHIFT		4
+#define MMCRA_SAMP_ELIG_MASK		7
 #define MMCRA_THR_CTL_SHIFT		8
 #define MMCRA_THR_SEL_SHIFT		16
 #define MMCRA_THR_CMP_SHIFT		32
@@ -265,6 +266,10 @@
 #define ISA207_SIER_DATA_SRC_SHIFT	53
 #define ISA207_SIER_DATA_SRC_MASK	(0x7ull << ISA207_SIER_DATA_SRC_SHIFT)
 
+/* Bits in SIER2/SIER3 for Power10 */
+#define P10_SIER2_FINISH_CYC(sier2)	(((sier2) >> (63 - 37)) & 0x7fful)
+#define P10_SIER2_DISPATCH_CYC(sier2)	(((sier2) >> (63 - 13)) & 0x7fful)
+
 #define P(a, b)				PERF_MEM_S(a, b)
 #define PH(a, b)			(P(LVL, HIT) | P(a, b))
 #define PM(a, b)			(P(LVL, MISS) | P(a, b))
@@ -278,6 +283,8 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
 					const unsigned int ev_alt[][MAX_ALT]);
 void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
 							struct pt_regs *regs);
-void isa207_get_mem_weight(u64 *weight);
+void isa207_get_mem_weight(u64 *weight, u64 type);
+
+int isa3XX_check_attr_config(struct perf_event *ev);
 
 #endif
diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
index e45dafe818ed417bf140d58c5debbfb611fa9b22..93be7197d2502a37d4382b27fd46d1a520e63a75 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -75,5 +75,5 @@ EVENT(PM_RUN_INST_CMPL_ALT,			0x00002);
  *     thresh end (TE)
  */
 
-EVENT(MEM_LOADS,				0x34340401e0);
-EVENT(MEM_STORES,				0x343c0401e0);
+EVENT(MEM_LOADS,				0x35340401e0);
+EVENT(MEM_STORES,				0x353c0401e0);
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index a901c1348cad0ddf571a4b2f27357bf6fa99d1c0..f9d64c63bb4a72b19018d0bbdeca8e8a516becd1 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -106,6 +106,18 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 	return num_alt;
 }
 
+static int power10_check_attr_config(struct perf_event *ev)
+{
+	u64 val;
+	u64 event = ev->attr.config;
+
+	val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+	if (val == 0x10 || isa3XX_check_attr_config(ev))
+		return -EINVAL;
+
+	return 0;
+}
+
 GENERIC_EVENT_ATTR(cpu-cycles,			PM_RUN_CYC);
 GENERIC_EVENT_ATTR(instructions,		PM_RUN_INST_CMPL);
 GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
@@ -559,6 +571,7 @@ static struct power_pmu power10_pmu = {
 	.attr_groups		= power10_pmu_attr_groups,
 	.bhrb_nr		= 32,
 	.capabilities           = PERF_PMU_CAP_EXTENDED_REGS,
+	.check_attr_config	= power10_check_attr_config,
 };
 
 int init_power10_pmu(void)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 2a57e93a79dcf703b920f1b556d46d60308507e4..ff3382140d7e65a159f8be6032587d4ba2ef2422 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -151,6 +151,18 @@ static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 	return num_alt;
 }
 
+static int power9_check_attr_config(struct perf_event *ev)
+{
+	u64 val;
+	u64 event = ev->attr.config;
+
+	val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+	if (val == 0xC || isa3XX_check_attr_config(ev))
+		return -EINVAL;
+
+	return 0;
+}
+
 GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
 GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_ICT_NOSLOT_CYC);
 GENERIC_EVENT_ATTR(stalled-cycles-backend,	PM_CMPLU_STALL);
@@ -437,6 +449,7 @@ static struct power_pmu power9_pmu = {
 	.attr_groups		= power9_pmu_attr_groups,
 	.bhrb_nr		= 32,
 	.capabilities           = PERF_PMU_CAP_EXTENDED_REGS,
+	.check_attr_config	= power9_check_attr_config,
 };
 
 int init_power9_pmu(void)
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 7d41e9264510e5a276a422374728bfa655678553..83975ef50975e738309708f61729880a3befad51 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -5,7 +5,7 @@ config PPC_47x
 	select MPIC
 	help
 	  This option enables support for the 47x family of processors and is
-	  not currently compatible with other 44x or 46x varients
+	  not currently compatible with other 44x or 46x variants
 
 config BAMBOO
 	bool "Bamboo"
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index 11475c58ea431908bd06ecb4eab4c9d4228cb7f3..afee8b1515a8e69f0d45eddd701e11cc04b008de 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -181,7 +181,7 @@ sram_code:
   udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
 	mullw	r12, r12, r11
 	mftb	r13	/* start */
-	addi	r12, r13, r12 /* end */
+	add	r12, r13, r12 /* end */
     1:
 	mftb	r13	/* current */
 	cmp	cr0, r13, r12
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 3ce907523b1e16611470298c39c85bbb9a75f234..e4b05667686eb576a5f5a2117cc038d0ac91edd5 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -101,6 +101,8 @@ config PPC_BOOK3S_64
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select IRQ_WORK
 	select PPC_MM_SLICES
+	select PPC_HAVE_KUEP
+	select PPC_HAVE_KUAP
 
 config PPC_BOOK3E_64
 	bool "Embedded processors"
@@ -306,6 +308,7 @@ config PHYS_64BIT
 config ALTIVEC
 	bool "AltiVec Support"
 	depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 || (PPC_E500MC && PPC64)
+	select PPC_FPU
 	help
 	  This option enables kernel support for the Altivec extensions to the
 	  PowerPC processor. The kernel currently supports saving and restoring
@@ -363,8 +366,6 @@ config PPC_RADIX_MMU
 	bool "Radix MMU Support"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_GIGANTIC_PAGE
-	select PPC_HAVE_KUEP
-	select PPC_HAVE_KUAP
 	default y
 	help
 	  Enable support for the Power ISA 3.0 Radix style MMU. Currently this
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 2124831cf57c081e7625be24fc25ea9c0e0ba286..fa08699aedeb8b7819603a276e730c56002df932 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -486,7 +486,8 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
 	window->table.it_size = size >> window->table.it_page_shift;
 	window->table.it_ops = &cell_iommu_ops;
 
-	iommu_init_table(&window->table, iommu->nid, 0, 0);
+	if (!iommu_init_table(&window->table, iommu->nid, 0, 0))
+		panic("Failed to initialize iommu table");
 
 	pr_debug("\tioid      %d\n", window->ioid);
 	pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index abdef9bcf432411e2c12cf90860c98bf21cb0450..fe0d8797a00a3bd40082a5133b0cc82839392ebf 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -35,9 +35,9 @@
  */
 
 static void *spu_syscall_table[] = {
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
 #define __SYSCALL(nr, entry) [nr] = entry,
 #include <asm/syscall_table_spu.h>
-#undef __SYSCALL
 };
 
 long spu_sys_callback(struct spu_syscall_block *s)
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 8c421dc78b28334bef99bfb13969c6f8dd719096..76e6256cb0a788f4199cf0ea69dd097ec397f67d 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -131,8 +131,7 @@ static struct pci_ops rtas_pci_ops =
 
 volatile struct Hydra __iomem *Hydra = NULL;
 
-int __init
-hydra_init(void)
+static int __init hydra_init(void)
 {
 	struct device_node *np;
 	struct resource r;
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
index c1920961f410445a9c3283bde281d1b57e5c4a5a..4c6d703a4284b988345a3f9f0529a9947ad4e1d7 100644
--- a/arch/powerpc/platforms/embedded6xx/Kconfig
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -71,11 +71,6 @@ config MPC10X_BRIDGE
 	bool
 	select PPC_INDIRECT_PCI
 
-config MV64X60
-	bool
-	select PPC_INDIRECT_PCI
-	select CHECK_CACHE_COHERENCY
-
 config GAMECUBE_COMMON
 	bool
 
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index a20b9576de225d71cdaf76ad708570261150754a..37875e478b3a7186188faf002caf4321343db989 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -34,7 +34,7 @@ static struct pci_controller *u3_agp, *u3_ht, *u4_pcie;
 
 static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
 {
-	for (; node != 0;node = node->sibling) {
+	for (; node; node = node->sibling) {
 		const int *bus_range;
 		const unsigned int *class_code;
 		int len;
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index b500a6e47e6b1181fee9da228edc3e919f0a0d43..5be7242fbd866cf5c3814b4509509eda68728e1c 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -146,7 +146,9 @@ static void iommu_table_iobmap_setup(void)
 	 */
 	iommu_table_iobmap.it_blocksize = 4;
 	iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
-	iommu_init_table(&iommu_table_iobmap, 0, 0, 0);
+	if (!iommu_init_table(&iommu_table_iobmap, 0, 0, 0))
+		panic("Failed to initialize iommu table");
+
 	pr_debug(" <- %s\n", __func__);
 }
 
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 019669eb21d271e51668878246461e582928146c..71c1262589feaff3cc2158359fdd13669dfed9e8 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -46,10 +46,26 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
 	return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
 }
 
+static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct memtrace_entry *ent = filp->private_data;
+
+	if (ent->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
+		return -EINVAL;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+}
+
 static const struct file_operations memtrace_fops = {
 	.llseek = default_llseek,
 	.read	= memtrace_read,
 	.open	= simple_open,
+	.mmap   = memtrace_mmap,
 };
 
 #define FLUSH_CHUNK_SIZE SZ_1G
@@ -187,7 +203,7 @@ static int memtrace_init_debugfs(void)
 		dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
 
 		ent->dir = dir;
-		debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
+		debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops);
 		debugfs_create_x64("start", 0400, dir, &ent->start);
 		debugfs_create_x64("size", 0400, dir, &ent->size);
 	}
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
index 0d9ba70f72517e4f46cd71173299fdce53bdb7fb..5b9736bbc2aa374265a8cfc9a052f79225c9b459 100644
--- a/arch/powerpc/platforms/powernv/opal-core.c
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -71,7 +71,7 @@ static LIST_HEAD(opalcore_list);
 static struct opalcore_config *oc_conf;
 static const struct opal_mpipl_fadump *opalc_metadata;
 static const struct opal_mpipl_fadump *opalc_cpu_metadata;
-struct kobject *mpipl_kobj;
+static struct kobject *mpipl_kobj;
 
 /*
  * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index deddaebf8c14f80f33948487529551ad5e8cf8a9..a191f4c60ce71c72d9d6a3012d3621f42f277510 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -105,7 +105,6 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	size_t addr, size;
 	pgprot_t page_prot;
-	int rc;
 
 	pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
 			vma->vm_start, vma->vm_end, vma->vm_pgoff,
@@ -121,10 +120,8 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
 	page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
 					 size, vma->vm_page_prot);
 
-	rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
 				page_prot);
-
-	return rc;
 }
 
 static bool opal_msg_queue_empty(void)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index f0f901683a2fe1056cc67190cccc09c76732640d..66c3c33373346815c23f0f0dac310d42eb7f945e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1762,7 +1762,8 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
 	tbl->it_ops = &pnv_ioda1_iommu_ops;
 	pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
 	pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
-	iommu_init_table(tbl, phb->hose->node, 0, 0);
+	if (!iommu_init_table(tbl, phb->hose->node, 0, 0))
+		panic("Failed to initialize iommu table");
 
 	pe->dma_setup_done = true;
 	return;
@@ -1930,16 +1931,16 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
 		res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift;
 		res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
 	}
-	iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end);
 
-	rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+	if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end))
+		rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+	else
+		rc = -ENOMEM;
 	if (rc) {
-		pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
-				rc);
+		pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc);
 		iommu_tce_table_put(tbl);
-		return rc;
+		tbl = NULL; /* This clears iommu_table_base below */
 	}
-
 	if (!pnv_iommu_bypass_disabled)
 		pnv_pci_ioda2_set_bypass(pe, true);
 
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index aadf932c4e61bb358a073921415914596100ca7f..a8db3f153063946fa42e035e39d9bef9bb6e80e5 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -157,7 +157,7 @@ static void __init pnv_check_guarded_cores(void)
 	for_each_node_by_type(dn, "cpu") {
 		if (of_property_match_string(dn, "status", "bad") >= 0)
 			bad_count++;
-	};
+	}
 
 	if (bad_count) {
 		printk("  _     _______________\n");
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 233503fcf8f08fb1747633b8f4f41907dab9025b..3ac70790ec7aa54a2f15418b989b3436dbbafe06 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -329,6 +329,20 @@ int dlpar_release_drc(u32 drc_index)
 	return 0;
 }
 
+int dlpar_unisolate_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+				DR_ENTITY_SENSE, drc_index);
+	if (rc || dr_status != DR_ENTITY_PRESENT)
+		return -1;
+
+	rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+
+	return 0;
+}
+
 int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
 	int rc;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 12cbffd3c2e32c129f8558788922f2e13ed0015c..7e970f81d8ff59ab166a6f6708f645c681404c87 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -47,9 +47,6 @@ static void rtas_stop_self(void)
 
 	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
 
-	printk("cpu %u (hwid %u) Ready to die...\n",
-	       smp_processor_id(), hard_smp_processor_id());
-
 	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
 
 	panic("Alas, I survived.\n");
@@ -271,6 +268,19 @@ static int dlpar_offline_cpu(struct device_node *dn)
 			if (!cpu_online(cpu))
 				break;
 
+			/*
+			 * device_offline() will return -EBUSY (via cpu_down()) if there
+			 * is only one CPU left. Check it here to fail earlier and with a
+			 * more informative error message, while also retaining the
+			 * cpu_add_remove_lock to be sure that no CPUs are being
+			 * online/offlined during this check.
+			 */
+			if (num_online_cpus() == 1) {
+				pr_warn("Unable to remove last online CPU %pOFn\n", dn);
+				rc = -EBUSY;
+				goto out_unlock;
+			}
+
 			cpu_maps_update_done();
 			rc = device_offline(get_cpu_device(cpu));
 			if (rc)
@@ -283,6 +293,7 @@ static int dlpar_offline_cpu(struct device_node *dn)
 				thread);
 		}
 	}
+out_unlock:
 	cpu_maps_update_done();
 
 out:
@@ -802,8 +813,16 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 	case PSERIES_HP_ELOG_ACTION_REMOVE:
 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
 			rc = dlpar_cpu_remove_by_count(count);
-		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
 			rc = dlpar_cpu_remove_by_index(drc_index);
+			/*
+			 * Setting the isolation state of an UNISOLATED/CONFIGURED
+			 * device to UNISOLATE is a no-op, but the hypervisor can
+			 * use it as a hint that the CPU removal failed.
+			 */
+			if (rc)
+				dlpar_unisolate_drc(drc_index);
+		}
 		else
 			rc = -EINVAL;
 		break;
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 2c59b4986ea540bd3ebc9e13b09833364b388542..3a50612a78db8be1642fbc14e718d609496549d6 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -26,7 +26,7 @@ struct hcall_stats {
 };
 #define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
 
-DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
 
 /*
  * Routines for displaying the statistics in debugfs
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 9fc5217f0c8e5a1a72cd3c66a5866c7a911590cb..0c55b991f665b56c3473c91dc7924d9256bacb7f 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -638,7 +638,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 
 	iommu_table_setparms(pci->phb, dn, tbl);
 	tbl->it_ops = &iommu_table_pseries_ops;
-	iommu_init_table(tbl, pci->phb->node, 0, 0);
+	if (!iommu_init_table(tbl, pci->phb->node, 0, 0))
+		panic("Failed to initialize iommu table");
 
 	/* Divide the rest (1.75GB) among the children */
 	pci->phb->dma_window_size = 0x80000000ul;
@@ -720,7 +721,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 		iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
 				ppci->table_group, dma_window);
 		tbl->it_ops = &iommu_table_lpar_multi_ops;
-		iommu_init_table(tbl, ppci->phb->node, 0, 0);
+		if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+			panic("Failed to initialize iommu table");
 		iommu_register_group(ppci->table_group,
 				pci_domain_nr(bus), 0);
 		pr_debug("  created table: %p\n", ppci->table_group);
@@ -749,7 +751,9 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 		tbl = PCI_DN(dn)->table_group->tables[0];
 		iommu_table_setparms(phb, dn, tbl);
 		tbl->it_ops = &iommu_table_pseries_ops;
-		iommu_init_table(tbl, phb->node, 0, 0);
+		if (!iommu_init_table(tbl, phb->node, 0, 0))
+			panic("Failed to initialize iommu table");
+
 		set_iommu_table_base(&dev->dev, tbl);
 		return;
 	}
@@ -1099,6 +1103,33 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
 			 ret);
 }
 
+/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */
+static int iommu_get_page_shift(u32 query_page_size)
+{
+	/* Supported IO page-sizes according to LoPAR */
+	const int shift[] = {
+		__builtin_ctzll(SZ_4K),   __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M),
+		__builtin_ctzll(SZ_32M),  __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M),
+		__builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G)
+	};
+
+	int i = ARRAY_SIZE(shift) - 1;
+
+	/*
+	 * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field:
+	 * - bit 31 means 4k pages are supported,
+	 * - bit 30 means 64k pages are supported, and so on.
+	 * Larger pagesizes map more memory with the same amount of TCEs, so start probing them.
+	 */
+	for (; i >= 0 ; i--) {
+		if (query_page_size & (1 << i))
+			return shift[i];
+	}
+
+	/* No valid page size found. */
+	return 0;
+}
+
 /*
  * If the PE supports dynamic dma windows, and there is space for a table
  * that can map all pages in a linear offset, then setup such a table,
@@ -1206,13 +1237,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 			goto out_failed;
 		}
 	}
-	if (query.page_size & 4) {
-		page_shift = 24; /* 16MB */
-	} else if (query.page_size & 2) {
-		page_shift = 16; /* 64kB */
-	} else if (query.page_size & 1) {
-		page_shift = 12; /* 4kB */
-	} else {
+
+	page_shift = iommu_get_page_shift(query.page_size);
+	if (!page_shift) {
 		dev_dbg(&dev->dev, "no supported direct page size in mask %x",
 			  query.page_size);
 		goto out_failed;
@@ -1229,7 +1256,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	if (pmem_present) {
 		if (query.largest_available_block >=
 		    (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
-			len = MAX_PHYSMEM_BITS - page_shift;
+			len = MAX_PHYSMEM_BITS;
 		else
 			dev_info(&dev->dev, "Skipping ibm,pmemory");
 	}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3805519a6469765adb67786e972bb36740caff26..1f3152ad7213262987c51ec10d9b46cdcbc032c0 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -977,11 +977,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
 	BUG_ON(slot == -1);
 
-	flags = newpp & 7;
+	flags = newpp & (HPTE_R_PP | HPTE_R_N);
 	if (mmu_has_feature(MMU_FTR_KERNEL_RO))
 		/* Move pp0 into bit 8 (IBM 55) */
 		flags |= (newpp & HPTE_R_PP0) >> 55;
 
+	flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO);
+
 	lpar_rc = plpar_pte_protect(flags, slot, 0);
 
 	BUG_ON(lpar_rc != H_SUCCESS);
@@ -1630,7 +1632,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 		}
 		msleep(delay);
 		rc = plpar_resize_hpt_prepare(0, shift);
-	};
+	}
 
 	switch (rc) {
 	case H_SUCCESS:
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index e278390ab28d14c53071ffc99c447c07fb487dbc..f71eac74ea92abba6ae3f6a1cf160f865e961c48 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -537,6 +537,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 	parse_em_data(m);
 	maxmem_data(m);
 
+	seq_printf(m, "security_flavor=%u\n", pseries_security_flavor);
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 835163f54244ab005041989f275ccb54774da000..ef26fe40efb03cfafdb86c1a20fc3f194e208489 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -93,6 +93,7 @@ struct papr_scm_priv {
 	uint64_t block_size;
 	int metadata_size;
 	bool is_volatile;
+	bool hcall_flush_required;
 
 	uint64_t bound_addr;
 
@@ -117,6 +118,38 @@ struct papr_scm_priv {
 	size_t stat_buffer_len;
 };
 
+static int papr_scm_pmem_flush(struct nd_region *nd_region,
+			       struct bio *bio __maybe_unused)
+{
+	struct papr_scm_priv *p = nd_region_provider_data(nd_region);
+	unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0;
+	long rc;
+
+	dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index);
+
+	do {
+		rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token);
+		token = ret_buf[0];
+
+		/* Check if we are stalled for some time */
+		if (H_IS_LONG_BUSY(rc)) {
+			msleep(get_longbusy_msecs(rc));
+			rc = H_BUSY;
+		} else if (rc == H_BUSY) {
+			cond_resched();
+		}
+	} while (rc == H_BUSY);
+
+	if (rc) {
+		dev_err(&p->pdev->dev, "flush error: %ld", rc);
+		rc = -EIO;
+	} else {
+		dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index);
+	}
+
+	return rc;
+}
+
 static LIST_HEAD(papr_nd_regions);
 static DEFINE_MUTEX(papr_ndr_lock);
 
@@ -914,6 +947,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	dimm_flags = 0;
 	set_bit(NDD_LABELING, &dimm_flags);
 
+	/*
+	 * Check if the nvdimm is unarmed. No locking needed as we are still
+	 * initializing. Ignore error encountered if any.
+	 */
+	__drc_pmem_query_health(p);
+
+	if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK)
+		set_bit(NDD_UNARMED, &dimm_flags);
+
 	p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups,
 				  dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
 	if (!p->nvdimm) {
@@ -943,6 +985,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	ndr_desc.num_mappings = 1;
 	ndr_desc.nd_set = &p->nd_set;
 
+	if (p->hcall_flush_required) {
+		set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+		ndr_desc.flush = papr_scm_pmem_flush;
+	}
+
 	if (p->is_volatile)
 		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
 	else {
@@ -1088,6 +1135,7 @@ static int papr_scm_probe(struct platform_device *pdev)
 	p->block_size = block_size;
 	p->blocks = blocks;
 	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
+	p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
 
 	/* We just need to ensure that set cookies are unique across */
 	uuid_parse(uuid_str, (uuid_t *) uuid);
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index f9ae17e8a0f468bb1bf1c0f66143b552d243d5e6..a8f9140a24fa3c5ad14fcee2bda20b52494c67f0 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic);
 int remove_phb_dynamic(struct pci_controller *phb)
 {
 	struct pci_bus *b = phb->bus;
+	struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge);
 	struct resource *res;
 	int rc, i;
 
@@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
 	/* Remove the PCI bus and unregister the bridge device from sysfs */
 	phb->bus = NULL;
 	pci_remove_bus(b);
-	device_unregister(b->bridge);
+	host_bridge->bus = NULL;
+	device_unregister(&host_bridge->dev);
 
 	/* Now release the IO resource */
 	if (res->flags & IORESOURCE_IO)
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index e1dc5d3254df9cccf298d117e30d134353d95b4a..439ac72c247088d19a66a81a96f749f2af8b985f 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -139,7 +139,7 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
 	return rc;
 }
 
-const struct of_device_id drc_pmem_match[] = {
+static const struct of_device_id drc_pmem_match[] = {
 	{ .type = "ibm,persistent-memory", },
 	{}
 };
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 4fe48c04c6c20abea5879f51f873bca7f35aa509..1f051a786fb317ca265ef6fcabc1b87f884ba240 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void);
 /* Poweron flag used for enabling auto ups restart */
 extern unsigned long rtas_poweron_auto;
 
-/* Provided by HVC VIO */
-extern void hvc_vio_init_early(void);
-
 /* Dynamic logical Partitioning/Mobility */
 extern void dlpar_free_cc_nodes(struct device_node *);
 extern void dlpar_free_cc_property(struct property *);
@@ -55,6 +52,7 @@ extern int dlpar_attach_node(struct device_node *, struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
 extern int dlpar_acquire_drc(u32 drc_index);
 extern int dlpar_release_drc(u32 drc_index);
+extern int dlpar_unisolate_drc(u32 drc_index);
 
 void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
 int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
@@ -111,6 +109,7 @@ static inline unsigned long cmo_get_page_size(void)
 
 int dlpar_workqueue_init(void);
 
+extern u32 pseries_security_flavor;
 void pseries_setup_security_mitigations(void);
 void pseries_lpar_read_hblkrm_characteristics(void);
 
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index f8b390a9d9fb3169c587b3a625a00ac474f55ac1..9d4ef65da7f395f6aab3e7a9e544f27c2117f0e5 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -699,7 +699,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
 		mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
 		break;
 	case MC_ERROR_TYPE_I_CACHE:
-		mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
+		mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
 		break;
 	case MC_ERROR_TYPE_UNKNOWN:
 	default:
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
index 81343908ed3346825daad16e40eedebad50fd935..f8f73b47b10794f1304b4935067501f9d0828a62 100644
--- a/arch/powerpc/platforms/pseries/rtas-fadump.c
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id)
 	return i;
 }
 
-void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
 {
 	int i;
 
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 46e1540abc229720a2fe4c5e219249aeb48dc96d..754e493b7c05bbbe4d35df7f5f247aa0ed85375c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -71,6 +71,7 @@
 #include <asm/swiotlb.h>
 #include <asm/svm.h>
 #include <asm/dtl.h>
+#include <asm/hvconsole.h>
 
 #include "pseries.h"
 #include "../../../../drivers/pci/pci.h"
@@ -85,6 +86,7 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 int ibm_nmi_interlock_token;
+u32 pseries_security_flavor;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
 {
@@ -534,9 +536,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 	/*
 	 * The features below are enabled by default, so we instead look to see
 	 * if firmware has *disabled* them, and clear them if so.
+	 * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if
+	 * H_CPU_BEHAV_FAVOUR_SECURITY is.
 	 */
 	if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
 		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+	else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
+		pseries_security_flavor = 1;
+	else
+		pseries_security_flavor = 2;
 
 	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
 		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 9cb4fc839fd5d89b3ec9426cfce7fe82742757d6..e00f3725ec960624d6591d54b695ebde63449df4 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/dma-map-ops.h>
 #include <linux/kobject.h>
+#include <linux/kexec.h>
 
 #include <asm/iommu.h>
 #include <asm/dma.h>
@@ -1278,6 +1279,20 @@ static int vio_bus_remove(struct device *dev)
 	return 0;
 }
 
+static void vio_bus_shutdown(struct device *dev)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct vio_driver *viodrv;
+
+	if (dev->driver) {
+		viodrv = to_vio_driver(dev->driver);
+		if (viodrv->shutdown)
+			viodrv->shutdown(viodev);
+		else if (kexec_in_progress)
+			vio_bus_remove(dev);
+	}
+}
+
 /**
  * vio_register_driver: - Register a new vio driver
  * @viodrv:	The vio_driver structure to be registered.
@@ -1285,6 +1300,10 @@ static int vio_bus_remove(struct device *dev)
 int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
 			  const char *mod_name)
 {
+	// vio_bus_type is only initialised for pseries
+	if (!machine_is(pseries))
+		return -ENODEV;
+
 	pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
 
 	/* fill in 'struct driver' fields */
@@ -1613,6 +1632,7 @@ struct bus_type vio_bus_type = {
 	.match = vio_bus_match,
 	.probe = vio_bus_probe,
 	.remove = vio_bus_remove,
+	.shutdown = vio_bus_shutdown,
 };
 
 /**
diff --git a/arch/powerpc/purgatory/trampoline_64.S b/arch/powerpc/purgatory/trampoline_64.S
index d956b8a35fd13721d3f5bd964fb578b5ae5c7e3b..b35837c138526c5a388589778af91dc142027815 100644
--- a/arch/powerpc/purgatory/trampoline_64.S
+++ b/arch/powerpc/purgatory/trampoline_64.S
@@ -12,7 +12,6 @@
 #include <asm/asm-compat.h>
 #include <asm/crashdump-ppc64.h>
 
-	.machine ppc64
 	.balign 256
 	.globl purgatory_start
 purgatory_start:
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 6b4a34b36d9879eb8973f45a12c230d0c0b30e4b..1d33b7a5ea83264c34a9465b35e2f6f37f60c7cb 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -344,7 +344,8 @@ static void iommu_table_dart_setup(void)
 	iommu_table_dart.it_index = 0;
 	iommu_table_dart.it_blocksize = 1;
 	iommu_table_dart.it_ops = &iommu_dart_ops;
-	iommu_init_table(&iommu_table_dart, -1, 0, 0);
+	if (!iommu_init_table(&iommu_table_dart, -1, 0, 0))
+		panic("Failed to initialize iommu table");
 
 	/* Reserve the last page of the DART to avoid possible prefetch
 	 * past the DART mapped area
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 040b9d01c07984575e26a7e9625ca6ae3195b0b1..69af73765783b061a5ba2263773b5d95f4c103bc 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -455,7 +455,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
 	}
 }
 
-static void __init setup_pci_cmd(struct pci_controller *hose)
+static void setup_pci_cmd(struct pci_controller *hose)
 {
 	u16 cmd;
 	int cap_x;
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 595310e056f4dec4d00e10bafd6f1f639e6ec9a3..50469700dec6a6462e1e283b1473824781430e37 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -63,8 +63,19 @@ static const struct xive_ops *xive_ops;
 static struct irq_domain *xive_irq_domain;
 
 #ifdef CONFIG_SMP
-/* The IPIs all use the same logical irq number */
-static u32 xive_ipi_irq;
+/* The IPIs use the same logical irq number when on the same chip */
+static struct xive_ipi_desc {
+	unsigned int irq;
+	char name[16];
+} *xive_ipis;
+
+/*
+ * Use early_cpu_to_node() for hot-plugged CPUs
+ */
+static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu)
+{
+	return xive_ipis[early_cpu_to_node(cpu)].irq;
+}
 #endif
 
 /* Xive state for each CPU */
@@ -253,17 +264,20 @@ notrace void xmon_xive_do_dump(int cpu)
 	xmon_printf("\n");
 }
 
+static struct irq_data *xive_get_irq_data(u32 hw_irq)
+{
+	unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq);
+
+	return irq ? irq_get_irq_data(irq) : NULL;
+}
+
 int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
 {
-	struct irq_chip *chip = irq_data_get_irq_chip(d);
 	int rc;
 	u32 target;
 	u8 prio;
 	u32 lirq;
 
-	if (!is_xive_irq(chip))
-		return -EINVAL;
-
 	rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
 	if (rc) {
 		xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
@@ -273,6 +287,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
 	xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
 		    hw_irq, target, prio, lirq);
 
+	if (!d)
+		d = xive_get_irq_data(hw_irq);
+
 	if (d) {
 		struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 		u64 val = xive_esb_read(xd, XIVE_ESB_GET);
@@ -289,6 +306,20 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
 	return 0;
 }
 
+void xmon_xive_get_irq_all(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_data *d = irq_desc_get_irq_data(desc);
+		unsigned int hwirq = (unsigned int)irqd_to_hwirq(d);
+
+		if (d->domain == xive_irq_domain)
+			xmon_xive_get_irq_config(hwirq, d);
+	}
+}
+
 #endif /* CONFIG_XMON */
 
 static unsigned int xive_get_irq(void)
@@ -1067,28 +1098,94 @@ static struct irq_chip xive_ipi_chip = {
 	.irq_unmask = xive_ipi_do_nothing,
 };
 
-static void __init xive_request_ipi(void)
+/*
+ * IPIs are marked per-cpu. We use separate HW interrupts under the
+ * hood but associated with the same "linux" interrupt
+ */
+struct xive_ipi_alloc_info {
+	irq_hw_number_t hwirq;
+};
+
+static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				     unsigned int nr_irqs, void *arg)
 {
-	unsigned int virq;
+	struct xive_ipi_alloc_info *info = arg;
+	int i;
 
-	/*
-	 * Initialization failed, move on, we might manage to
-	 * reach the point where we display our errors before
-	 * the system falls appart
-	 */
-	if (!xive_irq_domain)
-		return;
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip,
+				    domain->host_data, handle_percpu_irq,
+				    NULL, NULL);
+	}
+	return 0;
+}
 
-	/* Initialize it */
-	virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ);
-	xive_ipi_irq = virq;
+static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
+	.alloc  = xive_ipi_irq_domain_alloc,
+};
 
-	WARN_ON(request_irq(virq, xive_muxed_ipi_action,
-			    IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
+static int __init xive_request_ipi(void)
+{
+	struct fwnode_handle *fwnode;
+	struct irq_domain *ipi_domain;
+	unsigned int node;
+	int ret = -ENOMEM;
+
+	fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI");
+	if (!fwnode)
+		goto out;
+
+	ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids,
+					      &xive_ipi_irq_domain_ops, NULL);
+	if (!ipi_domain)
+		goto out_free_fwnode;
+
+	xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL);
+	if (!xive_ipis)
+		goto out_free_domain;
+
+	for_each_node(node) {
+		struct xive_ipi_desc *xid = &xive_ipis[node];
+		struct xive_ipi_alloc_info info = { node };
+
+		/* Skip nodes without CPUs */
+		if (cpumask_empty(cpumask_of_node(node)))
+			continue;
+
+		/*
+		 * Map one IPI interrupt per node for all cpus of that node.
+		 * Since the HW interrupt number doesn't have any meaning,
+		 * simply use the node number.
+		 */
+		xid->irq = irq_domain_alloc_irqs(ipi_domain, 1, node, &info);
+		if (xid->irq < 0) {
+			ret = xid->irq;
+			goto out_free_xive_ipis;
+		}
+
+		snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
+
+		ret = request_irq(xid->irq, xive_muxed_ipi_action,
+				  IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL);
+
+		WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
+	}
+
+	return ret;
+
+out_free_xive_ipis:
+	kfree(xive_ipis);
+out_free_domain:
+	irq_domain_remove(ipi_domain);
+out_free_fwnode:
+	irq_domain_free_fwnode(fwnode);
+out:
+	return ret;
 }
 
 static int xive_setup_cpu_ipi(unsigned int cpu)
 {
+	unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
 	struct xive_cpu *xc;
 	int rc;
 
@@ -1131,6 +1228,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
 
 static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
 {
+	unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
+
 	/* Disable the IPI and free the IRQ data */
 
 	/* Already cleaned up ? */
@@ -1178,19 +1277,6 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
 	 */
 	irq_clear_status_flags(virq, IRQ_LEVEL);
 
-#ifdef CONFIG_SMP
-	/* IPIs are special and come up with HW number 0 */
-	if (hw == XIVE_IPI_HW_IRQ) {
-		/*
-		 * IPIs are marked per-cpu. We use separate HW interrupts under
-		 * the hood but associated with the same "linux" interrupt
-		 */
-		irq_set_chip_and_handler(virq, &xive_ipi_chip,
-					 handle_percpu_irq);
-		return 0;
-	}
-#endif
-
 	rc = xive_irq_alloc_data(virq, hw);
 	if (rc)
 		return rc;
@@ -1202,15 +1288,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
 
 static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
 {
-	struct irq_data *data = irq_get_irq_data(virq);
-	unsigned int hw_irq;
-
-	/* XXX Assign BAD number */
-	if (!data)
-		return;
-	hw_irq = (unsigned int)irqd_to_hwirq(data);
-	if (hw_irq != XIVE_IPI_HW_IRQ)
-		xive_irq_free_data(virq);
+	xive_irq_free_data(virq);
 }
 
 static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
@@ -1335,17 +1413,14 @@ static int xive_prepare_cpu(unsigned int cpu)
 
 	xc = per_cpu(xive_cpu, cpu);
 	if (!xc) {
-		struct device_node *np;
-
 		xc = kzalloc_node(sizeof(struct xive_cpu),
 				  GFP_KERNEL, cpu_to_node(cpu));
 		if (!xc)
 			return -ENOMEM;
-		np = of_get_cpu_node(cpu, NULL);
-		if (np)
-			xc->chip_id = of_get_ibm_chip_id(np);
-		of_node_put(np);
 		xc->hw_ipi = XIVE_BAD_IRQ;
+		xc->chip_id = XIVE_INVALID_CHIP_ID;
+		if (xive_ops->prepare_cpu)
+			xive_ops->prepare_cpu(cpu, xc);
 
 		per_cpu(xive_cpu, cpu) = xc;
 	}
@@ -1408,13 +1483,12 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
 		struct irq_desc *desc = irq_to_desc(irq);
 		struct irq_data *d = irq_desc_get_irq_data(desc);
 		struct xive_irq_data *xd;
-		unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 
 		/*
 		 * Ignore anything that isn't a XIVE irq and ignore
 		 * IPIs, so can just be dropped.
 		 */
-		if (d->domain != xive_irq_domain || hw_irq == XIVE_IPI_HW_IRQ)
+		if (d->domain != xive_irq_domain)
 			continue;
 
 		/*
@@ -1592,16 +1666,15 @@ static void xive_debug_show_cpu(struct seq_file *m, int cpu)
 	seq_puts(m, "\n");
 }
 
-static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d)
+static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
 {
-	struct irq_chip *chip = irq_data_get_irq_chip(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 	int rc;
 	u32 target;
 	u8 prio;
 	u32 lirq;
-
-	if (!is_xive_irq(chip))
-		return;
+	struct xive_irq_data *xd;
+	u64 val;
 
 	rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
 	if (rc) {
@@ -1612,17 +1685,14 @@ static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data
 	seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
 		   hw_irq, target, prio, lirq);
 
-	if (d) {
-		struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
-		u64 val = xive_esb_read(xd, XIVE_ESB_GET);
-
-		seq_printf(m, "flags=%c%c%c PQ=%c%c",
-			   xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
-			   xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
-			   xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
-			   val & XIVE_ESB_VAL_P ? 'P' : '-',
-			   val & XIVE_ESB_VAL_Q ? 'Q' : '-');
-	}
+	xd = irq_data_get_irq_handler_data(d);
+	val = xive_esb_read(xd, XIVE_ESB_GET);
+	seq_printf(m, "flags=%c%c%c PQ=%c%c",
+		   xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
+		   xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
+		   xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
+		   val & XIVE_ESB_VAL_P ? 'P' : '-',
+		   val & XIVE_ESB_VAL_Q ? 'Q' : '-');
 	seq_puts(m, "\n");
 }
 
@@ -1640,16 +1710,9 @@ static int xive_core_debug_show(struct seq_file *m, void *private)
 
 	for_each_irq_desc(i, desc) {
 		struct irq_data *d = irq_desc_get_irq_data(desc);
-		unsigned int hw_irq;
-
-		if (!d)
-			continue;
-
-		hw_irq = (unsigned int)irqd_to_hwirq(d);
 
-		/* IPIs are special (HW number 0) */
-		if (hw_irq != XIVE_IPI_HW_IRQ)
-			xive_debug_show_irq(m, hw_irq, d);
+		if (d->domain == xive_irq_domain)
+			xive_debug_show_irq(m, d);
 	}
 	return 0;
 }
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 05a800a3104edda4591f3e49309122f4ece4737f..57e3f15404354601d44f8cb7228b06df8c41a396 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -380,6 +380,11 @@ static void xive_native_update_pending(struct xive_cpu *xc)
 	}
 }
 
+static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	xc->chip_id = cpu_to_chip_id(cpu);
+}
+
 static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
 {
 	s64 rc;
@@ -462,6 +467,7 @@ static const struct xive_ops xive_native_ops = {
 	.match			= xive_native_match,
 	.shutdown		= xive_native_shutdown,
 	.update_pending		= xive_native_update_pending,
+	.prepare_cpu		= xive_native_prepare_cpu,
 	.setup_cpu		= xive_native_setup_cpu,
 	.teardown_cpu		= xive_native_teardown_cpu,
 	.sync_source		= xive_native_sync_source,
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 01ccc0786ada6cdd7551c4c514ffa4f3e93b2db6..f143b6f111ac0a2b0efcbf61a757d8cfe4aa3f94 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -549,7 +549,7 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
 static bool xive_spapr_match(struct device_node *node)
 {
 	/* Ignore cascaded controllers for the moment */
-	return 1;
+	return true;
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index 9cf57c722faa3d1a0e1d3c29b1d203ea946ed504..504e7edce35812ac61d1004582b270384ebdbf42 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -5,8 +5,6 @@
 #ifndef __XIVE_INTERNAL_H
 #define __XIVE_INTERNAL_H
 
-#define XIVE_IPI_HW_IRQ		0 /* interrupt source # for IPIs */
-
 /*
  * A "disabled" interrupt should never fire, to catch problems
  * we set its logical number to this
@@ -46,6 +44,7 @@ struct xive_ops {
 				  u32 *sw_irq);
 	int	(*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
 	void	(*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
+	void	(*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc);
 	void	(*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
 	void	(*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc);
 	bool	(*match)(struct device_node *np);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index bf7d69625a2e50f90327554a88526a1eed9ef560..c8173e92f19d7b3ca8739e25f14a5252387d6a7d 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -54,6 +54,7 @@
 #include <asm/code-patching.h>
 #include <asm/sections.h>
 #include <asm/inst.h>
+#include <asm/interrupt.h>
 
 #ifdef CONFIG_PPC64
 #include <asm/hvcall.h>
@@ -605,7 +606,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 			 * debugger break (IPI). This is similar to
 			 * crash_kexec_secondary().
 			 */
-			if (TRAP(regs) != 0x100 || !wait_for_other_cpus(ncpus))
+			if (TRAP(regs) !=  INTERRUPT_SYSTEM_RESET || !wait_for_other_cpus(ncpus))
 				smp_send_debugger_break();
 
 			wait_for_other_cpus(ncpus);
@@ -615,7 +616,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 
 		if (!locked_down) {
 			/* for breakpoint or single step, print curr insn */
-			if (bp || TRAP(regs) == 0xd00)
+			if (bp || TRAP(regs) == INTERRUPT_TRACE)
 				ppc_inst_dump(regs->nip, 1, 0);
 			printf("enter ? for help\n");
 		}
@@ -684,7 +685,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		disable_surveillance();
 		if (!locked_down) {
 			/* for breakpoint or single step, print current insn */
-			if (bp || TRAP(regs) == 0xd00)
+			if (bp || TRAP(regs) == INTERRUPT_TRACE)
 				ppc_inst_dump(regs->nip, 1, 0);
 			printf("enter ? for help\n");
 		}
@@ -1769,9 +1770,12 @@ static void excprint(struct pt_regs *fp)
 	printf("    sp: %lx\n", fp->gpr[1]);
 	printf("   msr: %lx\n", fp->msr);
 
-	if (trap == 0x300 || trap == 0x380 || trap == 0x600 || trap == 0x200) {
+	if (trap == INTERRUPT_DATA_STORAGE ||
+	    trap == INTERRUPT_DATA_SEGMENT ||
+	    trap == INTERRUPT_ALIGNMENT ||
+	    trap == INTERRUPT_MACHINE_CHECK) {
 		printf("   dar: %lx\n", fp->dar);
-		if (trap != 0x380)
+		if (trap != INTERRUPT_DATA_SEGMENT)
 			printf(" dsisr: %lx\n", fp->dsisr);
 	}
 
@@ -1785,7 +1789,7 @@ static void excprint(struct pt_regs *fp)
 		       current->pid, current->comm);
 	}
 
-	if (trap == 0x700)
+	if (trap == INTERRUPT_PROGRAM)
 		print_bug_trap(fp);
 
 	printf(linux_banner);
@@ -1815,25 +1819,16 @@ static void prregs(struct pt_regs *fp)
 	}
 
 #ifdef CONFIG_PPC64
-	if (FULL_REGS(fp)) {
-		for (n = 0; n < 16; ++n)
-			printf("R%.2d = "REG"   R%.2d = "REG"\n",
-			       n, fp->gpr[n], n+16, fp->gpr[n+16]);
-	} else {
-		for (n = 0; n < 7; ++n)
-			printf("R%.2d = "REG"   R%.2d = "REG"\n",
-			       n, fp->gpr[n], n+7, fp->gpr[n+7]);
-	}
+#define R_PER_LINE 2
 #else
+#define R_PER_LINE 4
+#endif
+
 	for (n = 0; n < 32; ++n) {
-		printf("R%.2d = %.8lx%s", n, fp->gpr[n],
-		       (n & 3) == 3? "\n": "   ");
-		if (n == 12 && !FULL_REGS(fp)) {
-			printf("\n");
-			break;
-		}
+		printf("R%.2d = "REG"%s", n, fp->gpr[n],
+			(n % R_PER_LINE) == R_PER_LINE - 1 ? "\n" : "   ");
 	}
-#endif
+
 	printf("pc  = ");
 	xmon_print_symbol(fp->nip, " ", "\n");
 	if (!trap_is_syscall(fp) && cpu_has_feature(CPU_FTR_CFAR)) {
@@ -1846,7 +1841,9 @@ static void prregs(struct pt_regs *fp)
 	printf("ctr = "REG"   xer = "REG"   trap = %4lx\n",
 	       fp->ctr, fp->xer, fp->trap);
 	trap = TRAP(fp);
-	if (trap == 0x300 || trap == 0x380 || trap == 0x600)
+	if (trap == INTERRUPT_DATA_STORAGE ||
+	    trap == INTERRUPT_DATA_SEGMENT ||
+	    trap == INTERRUPT_ALIGNMENT)
 		printf("dar = "REG"   dsisr = %.8lx\n", fp->dar, fp->dsisr);
 }
 
@@ -2727,30 +2724,6 @@ static void dump_all_xives(void)
 		dump_one_xive(cpu);
 }
 
-static void dump_one_xive_irq(u32 num, struct irq_data *d)
-{
-	xmon_xive_get_irq_config(num, d);
-}
-
-static void dump_all_xive_irq(void)
-{
-	unsigned int i;
-	struct irq_desc *desc;
-
-	for_each_irq_desc(i, desc) {
-		struct irq_data *d = irq_desc_get_irq_data(desc);
-		unsigned int hwirq;
-
-		if (!d)
-			continue;
-
-		hwirq = (unsigned int)irqd_to_hwirq(d);
-		/* IPIs are special (HW number 0) */
-		if (hwirq)
-			dump_one_xive_irq(hwirq, d);
-	}
-}
-
 static void dump_xives(void)
 {
 	unsigned long num;
@@ -2767,9 +2740,9 @@ static void dump_xives(void)
 		return;
 	} else if (c == 'i') {
 		if (scanhex(&num))
-			dump_one_xive_irq(num, NULL);
+			xmon_xive_get_irq_config(num, NULL);
 		else
-			dump_all_xive_irq();
+			xmon_xive_get_irq_all();
 		return;
 	}
 
@@ -2980,7 +2953,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
 		if (!ppc_inst_prefixed(inst))
 			dump_func(ppc_inst_val(inst), adr);
 		else
-			dump_func(ppc_inst_as_u64(inst), adr);
+			dump_func(ppc_inst_as_ulong(inst), adr);
 		printf("\n");
 	}
 	return adr - first_adr;
@@ -4212,8 +4185,7 @@ static void dump_spu_fields(struct spu *spu)
 	DUMP_FIELD(spu, "0x%p", pdata);
 }
 
-int
-spu_inst_dump(unsigned long adr, long count, int praddr)
+static int spu_inst_dump(unsigned long adr, long count, int praddr)
 {
 	return generic_inst_dump(adr, count, praddr, print_insn_spu);
 }
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index ed89ef742530696133ade4f9ee39fa7966e08179..383c53c3dddd5580959af4c9bd8292f97ba7cbd3 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -68,7 +68,8 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
 }
 
 #ifdef CONFIG_TIME_NS
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
 	return _timens_data;
 }
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index df01d7349d79918d6014df3118f465b18d3cdf9a..1936f21ed8cdaa62e48fa5ec80289b766163bc6f 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -58,7 +58,8 @@ extern struct ms_hyperv_tsc_page hvclock_page
 #endif
 
 #ifdef CONFIG_TIME_NS
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
 	return __timens_vdso_data;
 }
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 3eec59f1fed355f80c30ad5e8832ec7db3990c11..543a05ed0c8239b9df25c2b74f6f62d26f76b2fa 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -776,7 +776,7 @@ config I2C_MT7621
 
 config I2C_MV64XXX
 	tristate "Marvell mv64xxx I2C Controller"
-	depends on MV64X60 || PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST
+	depends on PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST
 	help
 	  If you say yes to this option, support will be included for the
 	  built-in I2C interface on the Marvell 64xxx line of host bridges.
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 73e6ae88fafd4eaf1145c56e843f7dc327d755a1..4bdd4c45e7a72484ab5f4ce4213729754c77f57c 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -180,14 +180,13 @@ static struct proc_dir_entry *proc_pmu_options;
 static int option_server_mode;
 
 int pmu_battery_count;
-int pmu_cur_battery;
+static int pmu_cur_battery;
 unsigned int pmu_power_flags = PMU_PWR_AC_PRESENT;
 struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES];
 static int query_batt_timer = BATTERY_POLLING_COUNT;
 static struct adb_request batt_req;
 static struct proc_dir_entry *proc_pmu_batt[PMU_MAX_BATTERIES];
 
-int __fake_sleep;
 int asleep;
 
 #ifdef CONFIG_ADB
@@ -1833,6 +1832,7 @@ pmu_present(void)
  */
  
 static u32 save_via[8];
+static int __fake_sleep;
 
 static void
 save_via_state(void)
diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c
index 77612303841e4ce2ebb24a7580e1d1db0875a21a..07f91ec1f960e4fc7d217147934bf4b9792771fa 100644
--- a/drivers/macintosh/windfarm_core.c
+++ b/drivers/macintosh/windfarm_core.c
@@ -56,7 +56,7 @@ static BLOCKING_NOTIFIER_HEAD(wf_client_list);
 static int wf_client_count;
 static unsigned int wf_overtemp;
 static unsigned int wf_overtemp_counter;
-struct task_struct *wf_thread;
+static struct task_struct *wf_thread;
 
 static struct platform_device wf_platform_device = {
 	.name	= "windfarm",
diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c
index ab467b9c31be7ef1bc90aa6cf835881408f7af9d..ba1ec6fc11d21138d0434b35e0b74227278ebf43 100644
--- a/drivers/macintosh/windfarm_pm121.c
+++ b/drivers/macintosh/windfarm_pm121.c
@@ -433,7 +433,7 @@ struct pm121_sys_state {
 	struct wf_pid_state	pid;
 };
 
-struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {};
+static struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {};
 
 /*
  * ****** CPU Fans Control Loop ******
diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c
index 79cb1ad09bfdefb65678e361911c3a66b1cadee1..75966052819a0f236f702c908b64d3c61e81aa46 100644
--- a/drivers/macintosh/windfarm_smu_controls.c
+++ b/drivers/macintosh/windfarm_smu_controls.c
@@ -94,7 +94,7 @@ static int smu_set_fan(int pwm, u8 id, u16 value)
 		return rc;
 	wait_for_completion(&comp);
 
-	/* Handle fallback (see coment above) */
+	/* Handle fallback (see comment above) */
 	if (cmd.status != 0 && smu_supports_new_fans_ops) {
 		printk(KERN_WARNING "windfarm: SMU failed new fan command "
 		       "falling back to old method\n");
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f0d2dd35d408bd2406cb4080d9671535ec1b2299..acac0b571df121ecf3704e84adc87627439d81cd 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -467,6 +467,34 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
 		unsafe_put_user(__s->sig[0], &__c->sig[0], label);	\
 	}								\
 } while (0)
+
+#define unsafe_get_compat_sigset(set, compat, label) do {		\
+	const compat_sigset_t __user *__c = compat;			\
+	compat_sigset_word hi, lo;					\
+	sigset_t *__s = set;						\
+									\
+	switch (_NSIG_WORDS) {						\
+	case 4:								\
+		unsafe_get_user(lo, &__c->sig[7], label);		\
+		unsafe_get_user(hi, &__c->sig[6], label);		\
+		__s->sig[3] = hi | (((long)lo) << 32);			\
+		fallthrough;						\
+	case 3:								\
+		unsafe_get_user(lo, &__c->sig[5], label);		\
+		unsafe_get_user(hi, &__c->sig[4], label);		\
+		__s->sig[2] = hi | (((long)lo) << 32);			\
+		fallthrough;						\
+	case 2:								\
+		unsafe_get_user(lo, &__c->sig[3], label);		\
+		unsafe_get_user(hi, &__c->sig[2], label);		\
+		__s->sig[1] = hi | (((long)lo) << 32);			\
+		fallthrough;						\
+	case 1:								\
+		unsafe_get_user(lo, &__c->sig[1], label);		\
+		unsafe_get_user(hi, &__c->sig[0], label);		\
+		__s->sig[0] = hi | (((long)lo) << 32);			\
+	}								\
+} while (0)
 #else
 #define unsafe_put_compat_sigset(compat, set, label) do {		\
 	compat_sigset_t __user *__c = compat;				\
@@ -474,6 +502,13 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
 									\
 	unsafe_copy_to_user(__c, __s, sizeof(*__c), label);		\
 } while (0)
+
+#define unsafe_get_compat_sigset(set, compat, label) do {		\
+	const compat_sigset_t __user *__c = compat;			\
+	sigset_t *__s = set;						\
+									\
+	unsafe_copy_from_user(__s, __c, sizeof(*__c), label);		\
+} while (0)
 #endif
 
 extern int compat_ptrace_request(struct task_struct *child,
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index c7c6e8b8344d49871fd65524a30b5e1e1d02cbf6..c05e903cef02a3daa471c83a3593c34a168cd02e 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -397,6 +397,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count);
 #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e)
 #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
 #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e)
+#define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e)
 static inline unsigned long user_access_save(void) { return 0UL; }
 static inline void user_access_restore(unsigned long flags) { }
 #endif
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 2919f16981404199f91335fed9f31c37385cc5c6..ce2f6955200321400daf284ac691dad589477074 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -46,16 +46,18 @@ static inline bool vdso_cycles_ok(u64 cycles)
 #endif
 
 #ifdef CONFIG_TIME_NS
-static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
-			  struct __kernel_timespec *ts)
+static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
+					  struct __kernel_timespec *ts)
 {
-	const struct vdso_data *vd = __arch_get_timens_vdso_data();
+	const struct vdso_data *vd;
 	const struct timens_offset *offs = &vdns->offset[clk];
 	const struct vdso_timestamp *vdso_ts;
 	u64 cycles, last, ns;
 	u32 seq;
 	s64 sec;
 
+	vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE);
+	vd = __arch_get_timens_vdso_data(vd);
 	if (clk != CLOCK_MONOTONIC_RAW)
 		vd = &vd[CS_HRES_COARSE];
 	else
@@ -92,13 +94,14 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
 	return 0;
 }
 #else
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
 {
 	return NULL;
 }
 
-static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
-			  struct __kernel_timespec *ts)
+static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
+					  struct __kernel_timespec *ts)
 {
 	return -EINVAL;
 }
@@ -159,10 +162,10 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
 }
 
 #ifdef CONFIG_TIME_NS
-static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
-			    struct __kernel_timespec *ts)
+static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
+					    struct __kernel_timespec *ts)
 {
-	const struct vdso_data *vd = __arch_get_timens_vdso_data();
+	const struct vdso_data *vd = __arch_get_timens_vdso_data(vdns);
 	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
 	const struct timens_offset *offs = &vdns->offset[clk];
 	u64 nsec;
@@ -188,8 +191,8 @@ static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
 	return 0;
 }
 #else
-static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
-			    struct __kernel_timespec *ts)
+static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
+					    struct __kernel_timespec *ts)
 {
 	return -1;
 }
@@ -310,7 +313,7 @@ __cvdso_gettimeofday_data(const struct vdso_data *vd,
 	if (unlikely(tz != NULL)) {
 		if (IS_ENABLED(CONFIG_TIME_NS) &&
 		    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
-			vd = __arch_get_timens_vdso_data();
+			vd = __arch_get_timens_vdso_data(vd);
 
 		tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
 		tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
@@ -333,7 +336,7 @@ __cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time)
 
 	if (IS_ENABLED(CONFIG_TIME_NS) &&
 	    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
-		vd = __arch_get_timens_vdso_data();
+		vd = __arch_get_timens_vdso_data(vd);
 
 	t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
 
@@ -363,7 +366,7 @@ int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock,
 
 	if (IS_ENABLED(CONFIG_TIME_NS) &&
 	    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
-		vd = __arch_get_timens_vdso_data();
+		vd = __arch_get_timens_vdso_data(vd);
 
 	/*
 	 * Convert the clockid to a bitmask and use it to check which
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index c25cf7cd45e9fdb8ec328c5e5a8dff16bc993e05..33ee34fc0828a179b3fbffa4b24e049f33e4490b 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -10,16 +10,7 @@
  *
  * We create two sets of source and destination buffers, one in regular memory,
  * the other cache-inhibited (by default we use /dev/fb0 for this, but an
- * alterative path for cache-inhibited memory may be provided).
- *
- * One way to get cache-inhibited memory is to use the "mem" kernel parameter
- * to limit the kernel to less memory than actually exists.  Addresses above
- * the limit may still be accessed but will be treated as cache-inhibited. For
- * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
- * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
- * To access this region /dev/mem is used. The kernel should be configured
- * without CONFIG_STRICT_DEVMEM. In this case use:
- *         ./alignment_handler /dev/mem 0xc0000000
+ * alterative path for cache-inhibited memory may be provided, e.g. memtrace).
  *
  * We initialise the source buffers, then use whichever set of load/store
  * instructions is under test to copy bytes from the source buffers to the
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index defe488d6bf111b5469e7c9059e0e92c91603360..40253abc62089b23b55ca35487a82347212b99b7 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -5,6 +5,7 @@ noarg:
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
 		  large_vm_fork_separation bad_accesses pkey_exec_prot \
 		  pkey_siginfo stack_expansion_signal stack_expansion_ldst
+TEST_PROGS := stress_code_patching.sh
 
 TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
diff --git a/tools/testing/selftests/powerpc/mm/stress_code_patching.sh b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e454509659f6b4be5be0351354dfc7e4dc65006b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+TIMEOUT=30
+
+DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'`
+if [ ! -e "$DEBUFS_DIR" ]
+then
+	echo "debugfs not found, skipping" 1>&2
+	exit 4
+fi
+
+if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ]
+then
+	echo "Tracing files not found, skipping" 1>&2
+	exit 4
+fi
+
+
+echo "Testing for spurious faults when mapping kernel memory..."
+
+if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace"
+then
+	echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2
+	exit 1
+fi
+
+dmesg -C
+START_TIME=`date +%s`
+END_TIME=`expr $START_TIME + $TIMEOUT`
+while [ `date +%s` -lt $END_TIME ]
+do
+	echo function > $DEBUFS_DIR/tracing/current_tracer
+	echo nop > $DEBUFS_DIR/tracing/current_tracer
+	if dmesg | grep -q 'ftrace bug'
+	then
+		break
+	fi
+done
+
+echo nop > $DEBUFS_DIR/tracing/current_tracer
+if dmesg | grep -q 'ftrace bug'
+then
+	echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2
+	exit 1
+else
+	echo "OK: Mapping kernel memory does not cause spurious faults"
+	exit 0
+fi
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
index 02dffb65de48b6001176385fd3a4577de1a69ee5..b099753b50e4af4cf6fea2aa2ef4c915013eb7d3 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle)
 				fprintf(stderr, "error: cannot progress; ");
 				fprintf(stderr, "too many faults\n");
 				exit(-1);
-			};
+			}
 		}
 
 		fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index 0e96150b7c7e92e6af048a49cc68baa33d1a5534..eb75e5360e312f631ee265a003e54e6df71e7a6a 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -14,3 +14,4 @@ perf-hwbreak
 core-pkey
 ptrace-pkey
 ptrace-syscall
+ptrace-perf-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 8d3f006c98cc39d608f6d7a35acecd3e6a16b971..a500639da97ab4ecf7c3a503385f64fea378284a 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -2,7 +2,7 @@
 TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
               ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
-              perf-hwbreak ptrace-syscall
+              perf-hwbreak ptrace-syscall ptrace-perf-hwbreak
 
 top_srcdir = ../../../../..
 include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index c1f324afdbf373f2c1ff0e6c8e0a57243d25bafd..ecde2c199f3b185cf918452ca6f7e0bb66cf347d 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -21,8 +21,13 @@
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <signal.h>
 #include <string.h>
 #include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/ptrace.h>
+#include <sys/sysinfo.h>
+#include <asm/ptrace.h>
 #include <elf.h>
 #include <pthread.h>
 #include <sys/syscall.h>
@@ -30,32 +35,130 @@
 #include <linux/hw_breakpoint.h>
 #include "utils.h"
 
+#ifndef PPC_DEBUG_FEATURE_DATA_BP_ARCH_31
+#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31	0x20
+#endif
+
 #define MAX_LOOPS 10000
 
 #define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
 
-static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
-				      int cpu, int group_fd,
-				      unsigned long flags)
+int nprocs;
+
+static volatile int a = 10;
+static volatile int b = 10;
+static volatile char c[512 + 8] __attribute__((aligned(512)));
+
+static void perf_event_attr_set(struct perf_event_attr *attr,
+				__u32 type, __u64 addr, __u64 len,
+				bool exclude_user)
 {
-	attr->size = sizeof(*attr);
-	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+	memset(attr, 0, sizeof(struct perf_event_attr));
+	attr->type           = PERF_TYPE_BREAKPOINT;
+	attr->size           = sizeof(struct perf_event_attr);
+	attr->bp_type        = type;
+	attr->bp_addr        = addr;
+	attr->bp_len         = len;
+	attr->exclude_kernel = 1;
+	attr->exclude_hv     = 1;
+	attr->exclude_guest  = 1;
+	attr->exclude_user   = exclude_user;
+	attr->disabled       = 1;
 }
 
-static inline bool breakpoint_test(int len)
+static int
+perf_process_event_open_exclude_user(__u32 type, __u64 addr, __u64 len, bool exclude_user)
+{
+	struct perf_event_attr attr;
+
+	perf_event_attr_set(&attr, type, addr, len, exclude_user);
+	return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+}
+
+static int perf_process_event_open(__u32 type, __u64 addr, __u64 len)
+{
+	struct perf_event_attr attr;
+
+	perf_event_attr_set(&attr, type, addr, len, 0);
+	return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+}
+
+static int perf_cpu_event_open(long cpu, __u32 type, __u64 addr, __u64 len)
 {
 	struct perf_event_attr attr;
+
+	perf_event_attr_set(&attr, type, addr, len, 0);
+	return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+}
+
+static void close_fds(int *fd, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		close(fd[i]);
+}
+
+static unsigned long read_fds(int *fd, int n)
+{
+	int i;
+	unsigned long c = 0;
+	unsigned long count = 0;
+	size_t res;
+
+	for (i = 0; i < n; i++) {
+		res = read(fd[i], &c, sizeof(c));
+		assert(res == sizeof(unsigned long long));
+		count += c;
+	}
+	return count;
+}
+
+static void reset_fds(int *fd, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		ioctl(fd[i], PERF_EVENT_IOC_RESET);
+}
+
+static void enable_fds(int *fd, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		ioctl(fd[i], PERF_EVENT_IOC_ENABLE);
+}
+
+static void disable_fds(int *fd, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		ioctl(fd[i], PERF_EVENT_IOC_DISABLE);
+}
+
+static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len)
+{
+	int i = 0;
+
+	/* Assume online processors are 0 to nprocs for simplisity */
+	for (i = 0; i < nprocs; i++) {
+		fd[i] = perf_cpu_event_open(i, type, addr, len);
+		if (fd[i] < 0) {
+			close_fds(fd, i);
+			return fd[i];
+		}
+	}
+	return 0;
+}
+
+static inline bool breakpoint_test(int len)
+{
 	int fd;
 
-	/* setup counters */
-	memset(&attr, 0, sizeof(attr));
-	attr.disabled = 1;
-	attr.type = PERF_TYPE_BREAKPOINT;
-	attr.bp_type = HW_BREAKPOINT_R;
 	/* bp_addr can point anywhere but needs to be aligned */
-	attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
-	attr.bp_len = len;
-	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	fd = perf_process_event_open(HW_BREAKPOINT_R, (__u64)(&fd) & 0xfffffffffffff800, len);
 	if (fd < 0)
 		return false;
 	close(fd);
@@ -75,7 +178,6 @@ static inline bool dawr_supported(void)
 static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
 {
 	int i,j;
-	struct perf_event_attr attr;
 	size_t res;
 	unsigned long long breaks, needed;
 	int readint;
@@ -85,6 +187,7 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
 	int break_fd;
 	int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
 	volatile int *k;
+	__u64 len;
 
 	/* align to 0x400 boundary as required by DAWR */
 	readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
@@ -94,19 +197,11 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
 	if (arraytest)
 		ptr = &readintalign[0];
 
-	/* setup counters */
-	memset(&attr, 0, sizeof(attr));
-	attr.disabled = 1;
-	attr.type = PERF_TYPE_BREAKPOINT;
-	attr.bp_type = readwriteflag;
-	attr.bp_addr = (__u64)ptr;
-	attr.bp_len = sizeof(int);
-	if (arraytest)
-		attr.bp_len = DAWR_LENGTH_MAX;
-	attr.exclude_user = exclude_user;
-	break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	len = arraytest ? DAWR_LENGTH_MAX : sizeof(int);
+	break_fd = perf_process_event_open_exclude_user(readwriteflag, (__u64)ptr,
+							len, exclude_user);
 	if (break_fd < 0) {
-		perror("sys_perf_event_open");
+		perror("perf_process_event_open_exclude_user");
 		exit(1);
 	}
 
@@ -153,7 +248,6 @@ static int runtest_dar_outside(void)
 	void *target;
 	volatile __u16 temp16;
 	volatile __u64 temp64;
-	struct perf_event_attr attr;
 	int break_fd;
 	unsigned long long breaks;
 	int fail = 0;
@@ -165,21 +259,11 @@ static int runtest_dar_outside(void)
 		exit(EXIT_FAILURE);
 	}
 
-	/* setup counters */
-	memset(&attr, 0, sizeof(attr));
-	attr.disabled = 1;
-	attr.type = PERF_TYPE_BREAKPOINT;
-	attr.exclude_kernel = 1;
-	attr.exclude_hv = 1;
-	attr.exclude_guest = 1;
-	attr.bp_type = HW_BREAKPOINT_RW;
 	/* watch middle half of target array */
-	attr.bp_addr = (__u64)(target + 2);
-	attr.bp_len = 4;
-	break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	break_fd = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)(target + 2), 4);
 	if (break_fd < 0) {
 		free(target);
-		perror("sys_perf_event_open");
+		perror("perf_process_event_open");
 		exit(EXIT_FAILURE);
 	}
 
@@ -263,11 +347,467 @@ static int runtest_dar_outside(void)
 	return fail;
 }
 
+static void multi_dawr_workload(void)
+{
+	a += 10;
+	b += 10;
+	c[512 + 1] += 'a';
+}
+
+static int test_process_multi_diff_addr(void)
+{
+	unsigned long long breaks1 = 0, breaks2 = 0;
+	int fd1, fd2;
+	char *desc = "Process specific, Two events, diff addr";
+	size_t res;
+
+	fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+	if (fd1 < 0) {
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b));
+	if (fd2 < 0) {
+		close(fd1);
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ioctl(fd1, PERF_EVENT_IOC_RESET);
+	ioctl(fd2, PERF_EVENT_IOC_RESET);
+	ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+	ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+	multi_dawr_workload();
+	ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+	ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+	res = read(fd1, &breaks1, sizeof(breaks1));
+	assert(res == sizeof(unsigned long long));
+	res = read(fd2, &breaks2, sizeof(breaks2));
+	assert(res == sizeof(unsigned long long));
+
+	close(fd1);
+	close(fd2);
+
+	if (breaks1 != 2 || breaks2 != 2) {
+		printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+static int test_process_multi_same_addr(void)
+{
+	unsigned long long breaks1 = 0, breaks2 = 0;
+	int fd1, fd2;
+	char *desc = "Process specific, Two events, same addr";
+	size_t res;
+
+	fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+	if (fd1 < 0) {
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+	if (fd2 < 0) {
+		close(fd1);
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ioctl(fd1, PERF_EVENT_IOC_RESET);
+	ioctl(fd2, PERF_EVENT_IOC_RESET);
+	ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+	ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+	multi_dawr_workload();
+	ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+	ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+	res = read(fd1, &breaks1, sizeof(breaks1));
+	assert(res == sizeof(unsigned long long));
+	res = read(fd2, &breaks2, sizeof(breaks2));
+	assert(res == sizeof(unsigned long long));
+
+	close(fd1);
+	close(fd2);
+
+	if (breaks1 != 2 || breaks2 != 2) {
+		printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+static int test_process_multi_diff_addr_ro_wo(void)
+{
+	unsigned long long breaks1 = 0, breaks2 = 0;
+	int fd1, fd2;
+	char *desc = "Process specific, Two events, diff addr, one is RO, other is WO";
+	size_t res;
+
+	fd1 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+	if (fd1 < 0) {
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	fd2 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b));
+	if (fd2 < 0) {
+		close(fd1);
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ioctl(fd1, PERF_EVENT_IOC_RESET);
+	ioctl(fd2, PERF_EVENT_IOC_RESET);
+	ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+	ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+	multi_dawr_workload();
+	ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+	ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+	res = read(fd1, &breaks1, sizeof(breaks1));
+	assert(res == sizeof(unsigned long long));
+	res = read(fd2, &breaks2, sizeof(breaks2));
+	assert(res == sizeof(unsigned long long));
+
+	close(fd1);
+	close(fd2);
+
+	if (breaks1 != 1 || breaks2 != 1) {
+		printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+static int test_process_multi_same_addr_ro_wo(void)
+{
+	unsigned long long breaks1 = 0, breaks2 = 0;
+	int fd1, fd2;
+	char *desc = "Process specific, Two events, same addr, one is RO, other is WO";
+	size_t res;
+
+	fd1 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a));
+	if (fd1 < 0) {
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	fd2 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+	if (fd2 < 0) {
+		close(fd1);
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ioctl(fd1, PERF_EVENT_IOC_RESET);
+	ioctl(fd2, PERF_EVENT_IOC_RESET);
+	ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+	ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+	multi_dawr_workload();
+	ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+	ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+	res = read(fd1, &breaks1, sizeof(breaks1));
+	assert(res == sizeof(unsigned long long));
+	res = read(fd2, &breaks2, sizeof(breaks2));
+	assert(res == sizeof(unsigned long long));
+
+	close(fd1);
+	close(fd2);
+
+	if (breaks1 != 1 || breaks2 != 1) {
+		printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+static int test_syswide_multi_diff_addr(void)
+{
+	unsigned long long breaks1 = 0, breaks2 = 0;
+	int *fd1 = malloc(nprocs * sizeof(int));
+	int *fd2 = malloc(nprocs * sizeof(int));
+	char *desc = "Systemwide, Two events, diff addr";
+	int ret;
+
+	ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+	if (ret) {
+		perror("perf_systemwide_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b));
+	if (ret) {
+		close_fds(fd1, nprocs);
+		perror("perf_systemwide_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	reset_fds(fd1, nprocs);
+	reset_fds(fd2, nprocs);
+	enable_fds(fd1, nprocs);
+	enable_fds(fd2, nprocs);
+	multi_dawr_workload();
+	disable_fds(fd1, nprocs);
+	disable_fds(fd2, nprocs);
+
+	breaks1 = read_fds(fd1, nprocs);
+	breaks2 = read_fds(fd2, nprocs);
+
+	close_fds(fd1, nprocs);
+	close_fds(fd2, nprocs);
+
+	free(fd1);
+	free(fd2);
+
+	if (breaks1 != 2 || breaks2 != 2) {
+		printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+static int test_syswide_multi_same_addr(void)
+{
+	unsigned long long breaks1 = 0, breaks2 = 0;
+	int *fd1 = malloc(nprocs * sizeof(int));
+	int *fd2 = malloc(nprocs * sizeof(int));
+	char *desc = "Systemwide, Two events, same addr";
+	int ret;
+
+	ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+	if (ret) {
+		perror("perf_systemwide_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+	if (ret) {
+		close_fds(fd1, nprocs);
+		perror("perf_systemwide_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	reset_fds(fd1, nprocs);
+	reset_fds(fd2, nprocs);
+	enable_fds(fd1, nprocs);
+	enable_fds(fd2, nprocs);
+	multi_dawr_workload();
+	disable_fds(fd1, nprocs);
+	disable_fds(fd2, nprocs);
+
+	breaks1 = read_fds(fd1, nprocs);
+	breaks2 = read_fds(fd2, nprocs);
+
+	close_fds(fd1, nprocs);
+	close_fds(fd2, nprocs);
+
+	free(fd1);
+	free(fd2);
+
+	if (breaks1 != 2 || breaks2 != 2) {
+		printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+static int test_syswide_multi_diff_addr_ro_wo(void)
+{
+	unsigned long long breaks1 = 0, breaks2 = 0;
+	int *fd1 = malloc(nprocs * sizeof(int));
+	int *fd2 = malloc(nprocs * sizeof(int));
+	char *desc = "Systemwide, Two events, diff addr, one is RO, other is WO";
+	int ret;
+
+	ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+	if (ret) {
+		perror("perf_systemwide_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b));
+	if (ret) {
+		close_fds(fd1, nprocs);
+		perror("perf_systemwide_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	reset_fds(fd1, nprocs);
+	reset_fds(fd2, nprocs);
+	enable_fds(fd1, nprocs);
+	enable_fds(fd2, nprocs);
+	multi_dawr_workload();
+	disable_fds(fd1, nprocs);
+	disable_fds(fd2, nprocs);
+
+	breaks1 = read_fds(fd1, nprocs);
+	breaks2 = read_fds(fd2, nprocs);
+
+	close_fds(fd1, nprocs);
+	close_fds(fd2, nprocs);
+
+	free(fd1);
+	free(fd2);
+
+	if (breaks1 != 1 || breaks2 != 1) {
+		printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+static int test_syswide_multi_same_addr_ro_wo(void)
+{
+	unsigned long long breaks1 = 0, breaks2 = 0;
+	int *fd1 = malloc(nprocs * sizeof(int));
+	int *fd2 = malloc(nprocs * sizeof(int));
+	char *desc = "Systemwide, Two events, same addr, one is RO, other is WO";
+	int ret;
+
+	ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+	if (ret) {
+		perror("perf_systemwide_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a));
+	if (ret) {
+		close_fds(fd1, nprocs);
+		perror("perf_systemwide_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	reset_fds(fd1, nprocs);
+	reset_fds(fd2, nprocs);
+	enable_fds(fd1, nprocs);
+	enable_fds(fd2, nprocs);
+	multi_dawr_workload();
+	disable_fds(fd1, nprocs);
+	disable_fds(fd2, nprocs);
+
+	breaks1 = read_fds(fd1, nprocs);
+	breaks2 = read_fds(fd2, nprocs);
+
+	close_fds(fd1, nprocs);
+	close_fds(fd2, nprocs);
+
+	free(fd1);
+	free(fd2);
+
+	if (breaks1 != 1 || breaks2 != 1) {
+		printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+static int runtest_multi_dawr(void)
+{
+	int ret = 0;
+
+	ret |= test_process_multi_diff_addr();
+	ret |= test_process_multi_same_addr();
+	ret |= test_process_multi_diff_addr_ro_wo();
+	ret |= test_process_multi_same_addr_ro_wo();
+	ret |= test_syswide_multi_diff_addr();
+	ret |= test_syswide_multi_same_addr();
+	ret |= test_syswide_multi_diff_addr_ro_wo();
+	ret |= test_syswide_multi_same_addr_ro_wo();
+
+	return ret;
+}
+
+static int runtest_unaligned_512bytes(void)
+{
+	unsigned long long breaks = 0;
+	int fd;
+	char *desc = "Process specific, 512 bytes, unaligned";
+	__u64 addr = (__u64)&c + 8;
+	size_t res;
+
+	fd = perf_process_event_open(HW_BREAKPOINT_RW, addr, 512);
+	if (fd < 0) {
+		perror("perf_process_event_open");
+		exit(EXIT_FAILURE);
+	}
+
+	ioctl(fd, PERF_EVENT_IOC_RESET);
+	ioctl(fd, PERF_EVENT_IOC_ENABLE);
+	multi_dawr_workload();
+	ioctl(fd, PERF_EVENT_IOC_DISABLE);
+
+	res = read(fd, &breaks, sizeof(breaks));
+	assert(res == sizeof(unsigned long long));
+
+	close(fd);
+
+	if (breaks != 2) {
+		printf("FAILED: %s: %lld != 2\n", desc, breaks);
+		return 1;
+	}
+
+	printf("TESTED: %s\n", desc);
+	return 0;
+}
+
+/* There is no perf api to find number of available watchpoints. Use ptrace. */
+static int get_nr_wps(bool *arch_31)
+{
+	struct ppc_debug_info dbginfo;
+	int child_pid;
+
+	child_pid = fork();
+	if (!child_pid) {
+		int ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+		if (ret) {
+			perror("PTRACE_TRACEME failed\n");
+			exit(EXIT_FAILURE);
+		}
+		kill(getpid(), SIGUSR1);
+
+		sleep(1);
+		exit(EXIT_SUCCESS);
+	}
+
+	wait(NULL);
+	if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo)) {
+		perror("Can't get breakpoint info");
+		exit(EXIT_FAILURE);
+	}
+
+	*arch_31 = !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_ARCH_31);
+	return dbginfo.num_data_bps;
+}
+
 static int runtest(void)
 {
 	int rwflag;
 	int exclude_user;
 	int ret;
+	bool dawr = dawr_supported();
+	bool arch_31 = false;
+	int nr_wps = get_nr_wps(&arch_31);
 
 	/*
 	 * perf defines rwflag as two bits read and write and at least
@@ -280,7 +820,7 @@ static int runtest(void)
 				return ret;
 
 			/* if we have the dawr, we can do an array test */
-			if (!dawr_supported())
+			if (!dawr)
 				continue;
 			ret = runtestsingle(rwflag, exclude_user, 1);
 			if (ret)
@@ -289,6 +829,19 @@ static int runtest(void)
 	}
 
 	ret = runtest_dar_outside();
+	if (ret)
+		return ret;
+
+	if (dawr && nr_wps > 1) {
+		nprocs = get_nprocs();
+		ret = runtest_multi_dawr();
+		if (ret)
+			return ret;
+	}
+
+	if (dawr && arch_31)
+		ret = runtest_unaligned_512bytes();
+
 	return ret;
 }
 
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index 2e0d86e0687e15dda924a5d5f5931b4860838b90..a0635a3819aa421a4e11898e54e0625a7e799d11 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -194,6 +194,18 @@ static void test_workload(void)
 		big_var[rand() % DAWR_MAX_LEN] = 'a';
 	else
 		cvar = big_var[rand() % DAWR_MAX_LEN];
+
+	/* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */
+	gstruct.a[rand() % A_LEN] = 'a';
+
+	/* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */
+	cvar = gstruct.b[rand() % B_LEN];
+
+	/* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */
+	gstruct.a[rand() % A_LEN] = 'a';
+
+	/* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */
+	cvar = gstruct.a[rand() % A_LEN];
 }
 
 static void check_success(pid_t child_pid, const char *name, const char *type,
@@ -417,6 +429,69 @@ static void test_sethwdebug_range_aligned(pid_t child_pid)
 	ptrace_delhwdebug(child_pid, wh);
 }
 
+static void test_multi_sethwdebug_range(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info1, info2;
+	unsigned long wp_addr1, wp_addr2;
+	char *name1 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED";
+	char *name2 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED";
+	int len1, len2;
+	int wh1, wh2;
+
+	wp_addr1 = (unsigned long)&gstruct.a;
+	wp_addr2 = (unsigned long)&gstruct.b;
+	len1 = A_LEN;
+	len2 = B_LEN;
+	get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1);
+	get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2);
+
+	/* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */
+	wh1 = ptrace_sethwdebug(child_pid, &info1);
+
+	/* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */
+	wh2 = ptrace_sethwdebug(child_pid, &info2);
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name1, "WO", wp_addr1, len1);
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name2, "RO", wp_addr2, len2);
+
+	ptrace_delhwdebug(child_pid, wh1);
+	ptrace_delhwdebug(child_pid, wh2);
+}
+
+static void test_multi_sethwdebug_range_dawr_overlap(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info1, info2;
+	unsigned long wp_addr1, wp_addr2;
+	char *name = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap";
+	int len1, len2;
+	int wh1, wh2;
+
+	wp_addr1 = (unsigned long)&gstruct.a;
+	wp_addr2 = (unsigned long)&gstruct.a;
+	len1 = A_LEN;
+	len2 = A_LEN;
+	get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1);
+	get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2);
+
+	/* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */
+	wh1 = ptrace_sethwdebug(child_pid, &info1);
+
+	/* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */
+	wh2 = ptrace_sethwdebug(child_pid, &info2);
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "WO", wp_addr1, len1);
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "RO", wp_addr2, len2);
+
+	ptrace_delhwdebug(child_pid, wh1);
+	ptrace_delhwdebug(child_pid, wh2);
+}
+
 static void test_sethwdebug_range_unaligned(pid_t child_pid)
 {
 	struct ppc_hw_breakpoint info;
@@ -504,6 +579,10 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
 			test_sethwdebug_range_unaligned(child_pid);
 			test_sethwdebug_range_unaligned_dar(child_pid);
 			test_sethwdebug_dawr_max_range(child_pid);
+			if (dbginfo->num_data_bps > 1) {
+				test_multi_sethwdebug_range(child_pid);
+				test_multi_sethwdebug_range_dawr_overlap(child_pid);
+			}
 		}
 	}
 }
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
new file mode 100644
index 0000000000000000000000000000000000000000..3344e74a97b4f8fec2426f84371822b00f931f72
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
@@ -0,0 +1,659 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include "ptrace.h"
+
+char data[16];
+
+/* Overlapping address range */
+volatile __u64 *ptrace_data1 = (__u64 *)&data[0];
+volatile __u64 *perf_data1 = (__u64 *)&data[4];
+
+/* Non-overlapping address range */
+volatile __u64 *ptrace_data2 = (__u64 *)&data[0];
+volatile __u64 *perf_data2 = (__u64 *)&data[8];
+
+static unsigned long pid_max_addr(void)
+{
+	FILE *fp;
+	char *line, *c;
+	char addr[100];
+	size_t len = 0;
+
+	fp = fopen("/proc/kallsyms", "r");
+	if (!fp) {
+		printf("Failed to read /proc/kallsyms. Exiting..\n");
+		exit(EXIT_FAILURE);
+	}
+
+	while (getline(&line, &len, fp) != -1) {
+		if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") ||
+		    strstr(line, "pid_max_min"))
+			continue;
+
+		strncpy(addr, line, len < 100 ? len : 100);
+		c = strchr(addr, ' ');
+		*c = '\0';
+		return strtoul(addr, &c, 16);
+	}
+	fclose(fp);
+	printf("Could not find pix_max. Exiting..\n");
+	exit(EXIT_FAILURE);
+	return -1;
+}
+
+static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len)
+{
+	memset(attr, 0, sizeof(struct perf_event_attr));
+	attr->type           = PERF_TYPE_BREAKPOINT;
+	attr->size           = sizeof(struct perf_event_attr);
+	attr->bp_type        = HW_BREAKPOINT_R;
+	attr->bp_addr        = addr;
+	attr->bp_len         = len;
+	attr->exclude_kernel = 1;
+	attr->exclude_hv     = 1;
+}
+
+static void perf_kernel_event_attr_set(struct perf_event_attr *attr)
+{
+	memset(attr, 0, sizeof(struct perf_event_attr));
+	attr->type           = PERF_TYPE_BREAKPOINT;
+	attr->size           = sizeof(struct perf_event_attr);
+	attr->bp_type        = HW_BREAKPOINT_R;
+	attr->bp_addr        = pid_max_addr();
+	attr->bp_len         = sizeof(unsigned long);
+	attr->exclude_user   = 1;
+	attr->exclude_hv     = 1;
+}
+
+static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len)
+{
+	struct perf_event_attr attr;
+
+	perf_user_event_attr_set(&attr, addr, len);
+	return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+}
+
+static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len)
+{
+	struct perf_event_attr attr;
+
+	perf_user_event_attr_set(&attr, addr, len);
+	return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0);
+}
+
+static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len)
+{
+	struct perf_event_attr attr;
+
+	perf_user_event_attr_set(&attr, addr, len);
+	return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0);
+}
+
+static int perf_thread_kernel_event_open(pid_t child_pid)
+{
+	struct perf_event_attr attr;
+
+	perf_kernel_event_attr_set(&attr);
+	return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0);
+}
+
+static int perf_cpu_kernel_event_open(int cpu)
+{
+	struct perf_event_attr attr;
+
+	perf_kernel_event_attr_set(&attr);
+	return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+}
+
+static int child(void)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+	if (ret) {
+		printf("Error: PTRACE_TRACEME failed\n");
+		return 0;
+	}
+	kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */
+
+	return 0;
+}
+
+static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
+				     __u64 addr, int len)
+{
+	info->version = 1;
+	info->trigger_type = type;
+	info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+	info->addr = addr;
+	info->addr2 = addr + len;
+	info->condition_value = 0;
+	if (!len)
+		info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+	else
+		info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+}
+
+static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len)
+{
+	struct ppc_hw_breakpoint info;
+
+	ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+	return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
+}
+
+static int test1(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread event by ptrace)
+	 *	if (existing cpu event by perf)
+	 *		if (addr range overlaps)
+	 *			fail;
+	 */
+
+	perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
+	if (perf_fd < 0)
+		return -1;
+
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd > 0 || errno != ENOSPC)
+		ret = -1;
+
+	close(perf_fd);
+	return ret;
+}
+
+static int test2(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread event by ptrace)
+	 *	if (existing cpu event by perf)
+	 *		if (addr range does not overlaps)
+	 *			allow;
+	 */
+
+	perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2));
+	if (perf_fd < 0)
+		return -1;
+
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+	if (ptrace_fd < 0) {
+		ret = -1;
+		goto perf_close;
+	}
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+
+perf_close:
+	close(perf_fd);
+	return ret;
+}
+
+static int test3(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread event by ptrace)
+	 *	if (existing thread event by perf on the same thread)
+	 *		if (addr range overlaps)
+	 *			fail;
+	 */
+	perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1,
+					 sizeof(*perf_data1));
+	if (perf_fd < 0)
+		return -1;
+
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd > 0 || errno != ENOSPC)
+		ret = -1;
+
+	close(perf_fd);
+	return ret;
+}
+
+static int test4(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread event by ptrace)
+	 *	if (existing thread event by perf on the same thread)
+	 *		if (addr range does not overlaps)
+	 *			fail;
+	 */
+	perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2,
+					 sizeof(*perf_data2));
+	if (perf_fd < 0)
+		return -1;
+
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+	if (ptrace_fd < 0) {
+		ret = -1;
+		goto perf_close;
+	}
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+
+perf_close:
+	close(perf_fd);
+	return ret;
+}
+
+static int test5(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int cpid;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread event by ptrace)
+	 *	if (existing thread event by perf on the different thread)
+	 *		allow;
+	 */
+	cpid = fork();
+	if (!cpid) {
+		/* Temporary Child */
+		pause();
+		exit(EXIT_SUCCESS);
+	}
+
+	perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1));
+	if (perf_fd < 0) {
+		ret = -1;
+		goto kill_child;
+	}
+
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd < 0) {
+		ret = -1;
+		goto perf_close;
+	}
+
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+perf_close:
+	close(perf_fd);
+kill_child:
+	kill(cpid, SIGINT);
+	return ret;
+}
+
+static int test6(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread kernel event by perf)
+	 *	if (existing thread event by ptrace on the same thread)
+	 *		allow;
+	 * -- OR --
+	 * if (new per cpu kernel event by perf)
+	 *	if (existing thread event by ptrace)
+	 *		allow;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd < 0)
+		return -1;
+
+	perf_fd = perf_thread_kernel_event_open(child_pid);
+	if (perf_fd < 0) {
+		ret = -1;
+		goto ptrace_close;
+	}
+	close(perf_fd);
+
+	perf_fd = perf_cpu_kernel_event_open(0);
+	if (perf_fd < 0) {
+		ret = -1;
+		goto ptrace_close;
+	}
+	close(perf_fd);
+
+ptrace_close:
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int test7(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread event by perf)
+	 *	if (existing thread event by ptrace on the same thread)
+	 *		if (addr range overlaps)
+	 *			fail;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd < 0)
+		return -1;
+
+	perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1,
+					 sizeof(*perf_data1));
+	if (perf_fd > 0 || errno != ENOSPC)
+		ret = -1;
+
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int test8(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread event by perf)
+	 *	if (existing thread event by ptrace on the same thread)
+	 *		if (addr range does not overlaps)
+	 *			allow;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+	if (ptrace_fd < 0)
+		return -1;
+
+	perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2,
+					 sizeof(*perf_data2));
+	if (perf_fd < 0) {
+		ret = -1;
+		goto ptrace_close;
+	}
+	close(perf_fd);
+
+ptrace_close:
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int test9(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int cpid;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread event by perf)
+	 *	if (existing thread event by ptrace on the other thread)
+	 *		allow;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd < 0)
+		return -1;
+
+	cpid = fork();
+	if (!cpid) {
+		/* Temporary Child */
+		pause();
+		exit(EXIT_SUCCESS);
+	}
+
+	perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1));
+	if (perf_fd < 0) {
+		ret = -1;
+		goto kill_child;
+	}
+	close(perf_fd);
+
+kill_child:
+	kill(cpid, SIGINT);
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int test10(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per cpu event by perf)
+	 *	if (existing thread event by ptrace on the same thread)
+	 *		if (addr range overlaps)
+	 *			fail;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd < 0)
+		return -1;
+
+	perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
+	if (perf_fd > 0 || errno != ENOSPC)
+		ret = -1;
+
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int test11(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per cpu event by perf)
+	 *	if (existing thread event by ptrace on the same thread)
+	 *		if (addr range does not overlap)
+	 *			allow;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+	if (ptrace_fd < 0)
+		return -1;
+
+	perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2));
+	if (perf_fd < 0) {
+		ret = -1;
+		goto ptrace_close;
+	}
+	close(perf_fd);
+
+ptrace_close:
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int test12(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread and per cpu event by perf)
+	 *	if (existing thread event by ptrace on the same thread)
+	 *		if (addr range overlaps)
+	 *			fail;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd < 0)
+		return -1;
+
+	perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1));
+	if (perf_fd > 0 || errno != ENOSPC)
+		ret = -1;
+
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int test13(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread and per cpu event by perf)
+	 *	if (existing thread event by ptrace on the same thread)
+	 *		if (addr range does not overlap)
+	 *			allow;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2));
+	if (ptrace_fd < 0)
+		return -1;
+
+	perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2));
+	if (perf_fd < 0) {
+		ret = -1;
+		goto ptrace_close;
+	}
+	close(perf_fd);
+
+ptrace_close:
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int test14(pid_t child_pid)
+{
+	int perf_fd;
+	int ptrace_fd;
+	int cpid;
+	int ret = 0;
+
+	/* Test:
+	 * if (new per thread and per cpu event by perf)
+	 *	if (existing thread event by ptrace on the other thread)
+	 *		allow;
+	 */
+	ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1));
+	if (ptrace_fd < 0)
+		return -1;
+
+	cpid = fork();
+	if (!cpid) {
+		/* Temporary Child */
+		pause();
+		exit(EXIT_SUCCESS);
+	}
+
+	perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1,
+					     sizeof(*perf_data1));
+	if (perf_fd < 0) {
+		ret = -1;
+		goto kill_child;
+	}
+	close(perf_fd);
+
+kill_child:
+	kill(cpid, SIGINT);
+	ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd);
+	return ret;
+}
+
+static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg)
+{
+	int ret;
+
+	ret = fun(arg);
+	if (ret)
+		printf("%s: Error\n", msg);
+	else
+		printf("%s: Ok\n", msg);
+	return ret;
+}
+
+char *desc[14] = {
+	"perf cpu event -> ptrace thread event (Overlapping)",
+	"perf cpu event -> ptrace thread event (Non-overlapping)",
+	"perf thread event -> ptrace same thread event (Overlapping)",
+	"perf thread event -> ptrace same thread event (Non-overlapping)",
+	"perf thread event -> ptrace other thread event",
+	"ptrace thread event -> perf kernel event",
+	"ptrace thread event -> perf same thread event (Overlapping)",
+	"ptrace thread event -> perf same thread event (Non-overlapping)",
+	"ptrace thread event -> perf other thread event",
+	"ptrace thread event -> perf cpu event (Overlapping)",
+	"ptrace thread event -> perf cpu event (Non-overlapping)",
+	"ptrace thread event -> perf same thread & cpu event (Overlapping)",
+	"ptrace thread event -> perf same thread & cpu event (Non-overlapping)",
+	"ptrace thread event -> perf other thread & cpu event",
+};
+
+static int test(pid_t child_pid)
+{
+	int ret = TEST_PASS;
+
+	ret |= do_test(desc[0], test1, child_pid);
+	ret |= do_test(desc[1], test2, child_pid);
+	ret |= do_test(desc[2], test3, child_pid);
+	ret |= do_test(desc[3], test4, child_pid);
+	ret |= do_test(desc[4], test5, child_pid);
+	ret |= do_test(desc[5], test6, child_pid);
+	ret |= do_test(desc[6], test7, child_pid);
+	ret |= do_test(desc[7], test8, child_pid);
+	ret |= do_test(desc[8], test9, child_pid);
+	ret |= do_test(desc[9], test10, child_pid);
+	ret |= do_test(desc[10], test11, child_pid);
+	ret |= do_test(desc[11], test12, child_pid);
+	ret |= do_test(desc[12], test13, child_pid);
+	ret |= do_test(desc[13], test14, child_pid);
+
+	return ret;
+}
+
+static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
+{
+	if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) {
+		perror("Can't get breakpoint info");
+		exit(-1);
+	}
+}
+
+static int ptrace_perf_hwbreak(void)
+{
+	int ret;
+	pid_t child_pid;
+	struct ppc_debug_info dbginfo;
+
+	child_pid = fork();
+	if (!child_pid)
+		return child();
+
+	/* parent */
+	wait(NULL); /* <-- child (SIGUSR1) */
+
+	get_dbginfo(child_pid, &dbginfo);
+	SKIP_IF(dbginfo.num_data_bps <= 1);
+
+	ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1));
+	SKIP_IF(ret < 0);
+	close(ret);
+
+	ret = test(child_pid);
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index f25e854fe3709a7faf1638d1b7fbacaa00e18307..844d18cd5f930360cf193a30e773773f67a7d94d 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0+
 
-TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2
+TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2
 top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
@@ -13,3 +13,4 @@ $(OUTPUT)/spectre_v2: CFLAGS += -m64
 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
 $(OUTPUT)/rfi_flush: flush_utils.c
 $(OUTPUT)/entry_flush: flush_utils.c
+$(OUTPUT)/uaccess_flush: flush_utils.c
diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c
index 78cf914fa3217395a10582394d754409d7dec663..68ce377b205e91ab04ab34fb49cc67151199f545 100644
--- a/tools/testing/selftests/powerpc/security/entry_flush.c
+++ b/tools/testing/selftests/powerpc/security/entry_flush.c
@@ -53,7 +53,7 @@ int entry_flush_test(void)
 
 	entry_flush = entry_flush_orig;
 
-	fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+	fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
 	FAIL_IF(fd < 0);
 
 	p = (char *)memalign(zero_size, CACHELINE_SIZE);
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c
index 0c3c4c40c7fbbc1baabc7506f5145f17b1f9cf20..4d95965cb751f360fecedc51d8c737949948b49b 100644
--- a/tools/testing/selftests/powerpc/security/flush_utils.c
+++ b/tools/testing/selftests/powerpc/security/flush_utils.c
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
+#include <sys/utsname.h>
 #include "utils.h"
 #include "flush_utils.h"
 
@@ -35,6 +36,18 @@ void syscall_loop(char *p, unsigned long iterations,
 	}
 }
 
+void syscall_loop_uaccess(char *p, unsigned long iterations,
+			  unsigned long zero_size)
+{
+	struct utsname utsname;
+
+	for (unsigned long i = 0; i < iterations; i++) {
+		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+			load(p + j);
+		uname(&utsname);
+	}
+}
+
 static void sigill_handler(int signr, siginfo_t *info, void *unused)
 {
 	static int warned;
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h
index 07a5eb301466900645f759e6e9397bbbd08a8f4c..e1e68281f7ac2e91030b4a4358b3dafbe4d89a07 100644
--- a/tools/testing/selftests/powerpc/security/flush_utils.h
+++ b/tools/testing/selftests/powerpc/security/flush_utils.h
@@ -9,9 +9,16 @@
 
 #define CACHELINE_SIZE 128
 
+#define PERF_L1D_READ_MISS_CONFIG	((PERF_COUNT_HW_CACHE_L1D) | 		\
+					(PERF_COUNT_HW_CACHE_OP_READ << 8) |	\
+					(PERF_COUNT_HW_CACHE_RESULT_MISS << 16))
+
 void syscall_loop(char *p, unsigned long iterations,
 		  unsigned long zero_size);
 
+void syscall_loop_uaccess(char *p, unsigned long iterations,
+			  unsigned long zero_size);
+
 void set_dscr(unsigned long val);
 
 #endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 7565fd786640fc38eb280bbd9022903815caea59..f73484a6470faebb8c9ec0b9c039199bdc7902f7 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -54,7 +54,7 @@ int rfi_flush_test(void)
 
 	rfi_flush = rfi_flush_orig;
 
-	fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+	fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
 	FAIL_IF(fd < 0);
 
 	p = (char *)memalign(zero_size, CACHELINE_SIZE);
diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c
new file mode 100644
index 0000000000000000000000000000000000000000..cf80f960e38a45b8cd712161c4f04721d2e17185
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/uaccess_flush.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ * Copyright 2020 Canonical Ltd.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+int uaccess_flush_test(void)
+{
+	char *p;
+	int repetitions = 10;
+	int fd, passes = 0, iter, rc = 0;
+	struct perf_event_read v;
+	__u64 l1d_misses_total = 0;
+	unsigned long iterations = 100000, zero_size = 24 * 1024;
+	unsigned long l1d_misses_expected;
+	int rfi_flush_orig;
+	int entry_flush_orig;
+	int uaccess_flush, uaccess_flush_orig;
+
+	SKIP_IF(geteuid() != 0);
+
+	// The PMU event we use only works on Power7 or later
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+	if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
+		perror("Unable to read powerpc/rfi_flush debugfs file");
+		SKIP_IF(1);
+	}
+
+	if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
+		perror("Unable to read powerpc/entry_flush debugfs file");
+		SKIP_IF(1);
+	}
+
+	if (read_debugfs_file("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) {
+		perror("Unable to read powerpc/entry_flush debugfs file");
+		SKIP_IF(1);
+	}
+
+	if (rfi_flush_orig != 0) {
+		if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) {
+			perror("error writing to powerpc/rfi_flush debugfs file");
+			FAIL_IF(1);
+		}
+	}
+
+	if (entry_flush_orig != 0) {
+		if (write_debugfs_file("powerpc/entry_flush", 0) < 0) {
+			perror("error writing to powerpc/entry_flush debugfs file");
+			FAIL_IF(1);
+		}
+	}
+
+	uaccess_flush = uaccess_flush_orig;
+
+	fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
+	FAIL_IF(fd < 0);
+
+	p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+	FAIL_IF(perf_event_enable(fd));
+
+	// disable L1 prefetching
+	set_dscr(1);
+
+	iter = repetitions;
+
+	/*
+	 * We expect to see l1d miss for each cacheline access when entry_flush
+	 * is set. Allow a small variation on this.
+	 */
+	l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+	FAIL_IF(perf_event_reset(fd));
+
+	syscall_loop_uaccess(p, iterations, zero_size);
+
+	FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+	if (uaccess_flush && v.l1d_misses >= l1d_misses_expected)
+		passes++;
+	else if (!uaccess_flush && v.l1d_misses < (l1d_misses_expected / 2))
+		passes++;
+
+	l1d_misses_total += v.l1d_misses;
+
+	while (--iter)
+		goto again;
+
+	if (passes < repetitions) {
+		printf("FAIL (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+		       uaccess_flush, l1d_misses_total, uaccess_flush ? '<' : '>',
+		       uaccess_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
+		       repetitions - passes, repetitions);
+		rc = 1;
+	} else {
+		printf("PASS (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+		       uaccess_flush, l1d_misses_total, uaccess_flush ? '>' : '<',
+		       uaccess_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
+		       passes, repetitions);
+	}
+
+	if (uaccess_flush == uaccess_flush_orig) {
+		uaccess_flush = !uaccess_flush_orig;
+		if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush) < 0) {
+			perror("error writing to powerpc/uaccess_flush debugfs file");
+			return 1;
+		}
+		iter = repetitions;
+		l1d_misses_total = 0;
+		passes = 0;
+		goto again;
+	}
+
+	perf_event_disable(fd);
+	close(fd);
+
+	set_dscr(0);
+
+	if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
+		perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+		return 1;
+	}
+
+	if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
+		perror("unable to restore original value of powerpc/entry_flush debugfs file");
+		return 1;
+	}
+
+	if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush_orig) < 0) {
+		perror("unable to restore original value of powerpc/uaccess_flush debugfs file");
+		return 1;
+	}
+
+	return rc;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(uaccess_flush_test, "uaccess_flush_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index c75960af8018fb3cebb8ba4a7008e7f4bc70f00f..11521077f91519b4fd0975a8fafd54c18f9140fa 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -66,7 +66,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc)
 	/* Get thread endianness: extract bit LE from MSR */
 	thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
 
-	/***
+	/*
 	 * Little-Endian Machine
 	 */
 
@@ -126,7 +126,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc)
 		}
 	}
 
-	/***
+	/*
 	 * Big-Endian Machine
 	 */
 
diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c
index 7bf841a3967baf74c4e6ec2095baa7519d753839..6b13dc277724126a9d2d5e17c805217760df72ad 100644
--- a/tools/testing/selftests/timens/gettime_perf.c
+++ b/tools/testing/selftests/timens/gettime_perf.c
@@ -25,12 +25,20 @@ static void fill_function_pointers(void)
 	if (!vdso)
 		vdso = dlopen("linux-gate.so.1",
 			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-vdso32.so.1",
+			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-vdso64.so.1",
+			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
 	if (!vdso) {
 		pr_err("[WARN]\tfailed to find vDSO\n");
 		return;
 	}
 
 	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_clock_gettime)
+		vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__kernel_clock_gettime");
 	if (!vdso_clock_gettime)
 		pr_err("Warning: failed to find clock_gettime in vDSO\n");