Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux into next
Freescale updates from Scott:
"Highlights include 8xx hugepage support, qbman fixes/cleanup, device
tree updates, and some misc cleanup."
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b49062b..48001e7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -163,6 +163,7 @@
select HAVE_VIRT_CPU_ACCOUNTING
select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_KERNEL_GZIP
+ select HAVE_CC_STACKPROTECTOR
config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
@@ -396,6 +397,14 @@
depends on PPC64 && CPU_LITTLE_ENDIAN
def_bool !DISABLE_MPROFILE_KERNEL
+config USE_THIN_ARCHIVES
+ bool "Build the kernel using thin archives"
+ default n
+ select THIN_ARCHIVES
+ help
+ Build the kernel using thin archives.
+ If you're unsure say N.
+
config IOMMU_HELPER
def_bool PPC64
@@ -456,6 +465,19 @@
interface is strongly in flux, so no good recommendation can be
made.
+config KEXEC_FILE
+ bool "kexec file based system call"
+ select KEXEC_CORE
+ select BUILD_BIN2C
+ depends on PPC64
+ depends on CRYPTO=y
+ depends on CRYPTO_SHA256=y
+ help
+ This is a new version of the kexec system call. This call is
+ file based and takes in file descriptors as system call arguments
+ for kernel and initramfs as opposed to a list of segments as is the
+ case for the older kexec call.
+
config RELOCATABLE
bool "Build a relocatable kernel"
depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE))
@@ -499,7 +521,7 @@
config FA_DUMP
bool "Firmware-assisted dump"
- depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC
+ depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC_CORE
help
A robust mechanism to get reliable kernel crash dump with
assistance from firmware. This approach does not use kexec,
@@ -558,6 +580,13 @@
config SYS_SUPPORTS_HUGETLBFS
bool
+config ILLEGAL_POINTER_VALUE
+ hex
+ # This is roughly half way between the top of user space and the bottom
+ # of kernel space, which seems about as good as we can get.
+ default 0x5deadbeef0000000 if PPC64
+ default 0
+
source "mm/Kconfig"
config ARCH_MEMORY_PROBE
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 20cf770..949258d 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -366,4 +366,8 @@
If you are unsure, say N.
+config PPC_HTDUMP
+ def_bool y
+ depends on PPC_PTDUMP && PPC_BOOK3S
+
endmenu
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 041fda1..31286fa 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -23,7 +23,7 @@
ifeq ($(HAS_BIARCH),y)
ifeq ($(CROSS32_COMPILE),)
CROSS32CC := $(CC) -m32
-CROSS32AR := GNUTARGET=elf32-powerpc $(AR)
+KBUILD_ARFLAGS += --target=elf32-powerpc
endif
endif
@@ -85,7 +85,7 @@
override AS += -a$(BITS)
override LD += -m elf$(BITS)$(LDEMULATION)
override CC += -m$(BITS)
-override AR := GNUTARGET=elf$(BITS)-$(GNUTARGET) $(AR)
+KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET)
endif
LDFLAGS_vmlinux-y := -Bstatic
@@ -250,6 +250,7 @@
core-$(CONFIG_XMON) += arch/powerpc/xmon/
core-$(CONFIG_KVM) += arch/powerpc/kvm/
core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/
+core-$(CONFIG_KEXEC_FILE) += arch/powerpc/purgatory/
drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
@@ -276,16 +277,16 @@
endif
$(BOOT_TARGETS1): vmlinux
- $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+ $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
$(BOOT_TARGETS2): vmlinux
- $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+ $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
bootwrapper_install:
- $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+ $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
%.dtb: scripts
- $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+ $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
# Used to create 'merged defconfigs'
# To use it $(call) it with the first argument as the base defconfig
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index eae2dc8..1d41d5a 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -171,10 +171,6 @@
$(obj)/empty.c:
$(Q)touch $@
-$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S
- $(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \
- -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
-
$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
$(Q)cp $< $@
@@ -356,17 +352,17 @@
# Don't put the ramdisk on the pattern rule; when its missing make will try
# the pattern rule with less dependencies that also matches (even with the
# hard dependency listed).
-$(obj)/zImage.initrd.%: vmlinux $(wrapperbits)
+$(obj)/zImage.initrd.%: vmlinux $(wrapperbits) FORCE
$(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz)
-$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits)
+$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) FORCE
$(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
# dtbImage% - a dtbImage is a zImage with an embedded device tree blob
-$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb
+$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
$(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb
+$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
$(call if_changed,wrap,$*,,$(obj)/$*.dtb)
# This cannot be in the root of $(src) as the zImage rule always adds a $(obj)
@@ -374,31 +370,31 @@
$(obj)/vmlinux.strip: vmlinux
$(STRIP) -s -R .comment $< -o $@
-$(obj)/uImage: vmlinux $(wrapperbits)
+$(obj)/uImage: vmlinux $(wrapperbits) FORCE
$(call if_changed,wrap,uboot)
-$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
+$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
$(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
+$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
$(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb)
-$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
+$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
$(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
+$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
$(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb)
-$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
+$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
$(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
+$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
$(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb)
-$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
+$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
$(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
+$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
$(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb)
# Rule to build device tree blobs
diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S
index b6fcbaf..3dc44b0 100644
--- a/arch/powerpc/boot/ps3-head.S
+++ b/arch/powerpc/boot/ps3-head.S
@@ -57,11 +57,6 @@
bctr
1:
- /* Save the value at addr zero for a null pointer write check later. */
-
- li r4, 0
- lwz r3, 0(r4)
-
/* Primary delays then goes to _zimage_start in wrapper. */
or 31, 31, 31 /* db16cyc */
diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c
index 4ec2d86..a05558a 100644
--- a/arch/powerpc/boot/ps3.c
+++ b/arch/powerpc/boot/ps3.c
@@ -119,13 +119,12 @@ void ps3_copy_vectors(void)
flush_cache((void *)0x100, 512);
}
-void platform_init(unsigned long null_check)
+void platform_init(void)
{
const u32 heapsize = 0x1000000 - (u32)_end; /* 16MiB */
void *chosen;
unsigned long ft_addr;
u64 rm_size;
- unsigned long val;
console_ops.write = ps3_console_write;
platform_ops.exit = ps3_exit;
@@ -153,11 +152,6 @@ void platform_init(unsigned long null_check)
printf(" flat tree at 0x%lx\n\r", ft_addr);
- val = *(unsigned long *)0;
-
- if (val != null_check)
- printf("null check failed: %lx != %lx\n\r", val, null_check);
-
((kernel_entry_t)0)(ft_addr, 0, NULL);
ps3_exit();
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 404b3aa..76fe3cc 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -181,6 +181,28 @@
elf32-powerpc) format=elf32ppc ;;
esac
+ld_version()
+{
+ # Poached from scripts/ld-version.sh, but we don't want to call that because
+ # this script (wrapper) is distributed separately from the kernel source.
+ # Extract linker version number from stdin and turn into single number.
+ awk '{
+ gsub(".*\\)", "");
+ gsub(".*version ", "");
+ gsub("-.*", "");
+ split($1,a, ".");
+ print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
+ exit
+ }'
+}
+
+# Do not include PT_INTERP segment when linking pie. Non-pie linking
+# just ignores this option.
+LD_VERSION=$(${CROSS}ld --version | ld_version)
+LD_NO_DL_MIN_VERSION=$(echo 2.26 | ld_version)
+if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then
+ nodl="--no-dynamic-linker"
+fi
platformo=$object/"$platform".o
lds=$object/zImage.lds
@@ -446,7 +468,7 @@
text_start="-Ttext $link_address"
fi
#link everything
- ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \
+ ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \
$platformo $tmp $object/wrapper.a
rm $tmp
fi
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index d77af0e..e4d53fe 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -49,6 +49,7 @@
CONFIG_PPC_TRANSACTIONAL_MEM=y
CONFIG_HOTPLUG_CPU=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_NUMA=y
CONFIG_MEMORY_HOTPLUG=y
@@ -296,7 +297,10 @@
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPT_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
@@ -304,6 +308,7 @@
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index df5f161..0396126 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -46,6 +46,7 @@
CONFIG_BINFMT_MISC=m
CONFIG_PPC_TRANSACTIONAL_MEM=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
CONFIG_CRASH_DUMP=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -332,7 +333,10 @@
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPT_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
@@ -340,6 +344,7 @@
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 536e811..5a06bdd 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -52,6 +52,7 @@
CONFIG_BINFMT_MISC=m
CONFIG_PPC_TRANSACTIONAL_MEM=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
@@ -295,7 +296,10 @@
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPT_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
@@ -303,6 +307,7 @@
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index d149273..dfef117 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -13,8 +13,6 @@
*/
#include <linux/threads.h>
-#include <linux/kprobes.h>
-
#include <uapi/asm/ucontext.h>
/* SMP */
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 388b052..0122236 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -233,7 +233,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
static inline void __ptep_set_access_flags(struct mm_struct *mm,
- pte_t *ptep, pte_t entry)
+ pte_t *ptep, pte_t entry,
+ unsigned long address)
{
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 1af837c..1c64bc6 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -16,9 +16,6 @@
#define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE)
#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE)
-/* With 4k base page size, hugepage PTEs go at the PMD level */
-#define MIN_HUGEPTE_SHIFT PMD_SHIFT
-
/* PTE flags to conserve for HPTE identification */
#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
H_PAGE_F_SECOND | H_PAGE_F_GIX)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 5aae4f5..f3dd21e 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -6,9 +6,6 @@
#define H_PUD_INDEX_SIZE 5
#define H_PGD_INDEX_SIZE 12
-/* With 4k base page size, hugepage PTEs go at the PMD level */
-#define MIN_HUGEPTE_SHIFT PAGE_SHIFT
-
#define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */
#define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */
/*
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
deleted file mode 100644
index c45189a..0000000
--- a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H
-#define _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H
-/*
- * For radix we want generic code to handle hugetlb. But then if we want
- * both hash and radix to be enabled together we need to workaround the
- * limitations.
- */
-void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern unsigned long
-radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags);
-
-static inline int hstate_get_psize(struct hstate *hstate)
-{
- unsigned long shift;
-
- shift = huge_page_shift(hstate);
- if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
- return MMU_PAGE_2M;
- else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
- return MMU_PAGE_1G;
- else {
- WARN(1, "Wrong huge page shift\n");
- return mmu_virtual_psize;
- }
-}
-#endif
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
new file mode 100644
index 0000000..c62f14d
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H
+#define _ASM_POWERPC_BOOK3S_64_HUGETLB_H
+/*
+ * For radix we want generic code to handle hugetlb. But then if we want
+ * both hash and radix to be enabled together we need to workaround the
+ * limitations.
+ */
+void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern unsigned long
+radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
+
+static inline int hstate_get_psize(struct hstate *hstate)
+{
+ unsigned long shift;
+
+ shift = huge_page_shift(hstate);
+ if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+ return MMU_PAGE_2M;
+ else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+ return MMU_PAGE_1G;
+ else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift)
+ return MMU_PAGE_16M;
+ else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift)
+ return MMU_PAGE_16G;
+ else {
+ WARN(1, "Wrong huge page shift\n");
+ return mmu_virtual_psize;
+ }
+}
+
+#define arch_make_huge_pte arch_make_huge_pte
+static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+ struct page *page, int writable)
+{
+ unsigned long page_shift;
+
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+ return entry;
+
+ page_shift = huge_page_shift(hstate_vma(vma));
+ /*
+ * We don't support 1G hugetlb pages yet.
+ */
+ VM_WARN_ON(page_shift == mmu_psize_defs[MMU_PAGE_1G].shift);
+ if (page_shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+ return __pte(pte_val(entry) | _PAGE_LARGE);
+ else
+ return entry;
+}
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e407af2..2e6a823 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -70,7 +70,9 @@
#define HPTE_V_SSIZE_SHIFT 62
#define HPTE_V_AVPN_SHIFT 7
+#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff)
#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80)
+#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80)
#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
@@ -80,14 +82,16 @@
#define HPTE_V_VALID ASM_CONST(0x0000000000000001)
/*
- * ISA 3.0 have a different HPTE format.
+ * ISA 3.0 has a different HPTE format.
*/
#define HPTE_R_3_0_SSIZE_SHIFT 58
+#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT)
#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
#define HPTE_R_TS ASM_CONST(0x4000000000000000)
#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
#define HPTE_R_RPN_SHIFT 12
#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
+#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
#define HPTE_R_PP ASM_CONST(0x0000000000000003)
#define HPTE_R_PPP ASM_CONST(0x8000000000000003)
#define HPTE_R_N ASM_CONST(0x0000000000000004)
@@ -316,12 +320,43 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
*/
v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
v <<= HPTE_V_AVPN_SHIFT;
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+ v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
return v;
}
/*
+ * ISA v3.0 defines a new HPTE format, which differs from the old
+ * format in having smaller AVPN and ARPN fields, and the B field
+ * in the second dword instead of the first.
+ */
+static inline unsigned long hpte_old_to_new_v(unsigned long v)
+{
+ /* trim AVPN, drop B */
+ return v & HPTE_V_COMMON_BITS;
+}
+
+static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r)
+{
+ /* move B field from 1st to 2nd dword, trim ARPN */
+ return (r & ~HPTE_R_3_0_SSIZE_MASK) |
+ (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT);
+}
+
+static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r)
+{
+ /* insert B field */
+ return (v & HPTE_V_COMMON_BITS) |
+ ((r & HPTE_R_3_0_SSIZE_MASK) <<
+ (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT));
+}
+
+static inline unsigned long hpte_new_to_old_r(unsigned long r)
+{
+ /* clear out B field */
+ return r & ~HPTE_R_3_0_SSIZE_MASK;
+}
+
+/*
* This function sets the AVPN and L fields of the HPTE appropriately
* using the base page size and actual page size.
*/
@@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
* aligned for the requested page size
*/
static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
- int actual_psize, int ssize)
+ int actual_psize)
{
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
-
/* A 4K page needs no special encoding */
if (actual_psize == MMU_PAGE_4K)
return pa & HPTE_R_RPN;
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 0a46a5f..6cfc5db 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -26,6 +26,11 @@
#define _RPAGE_SW1 0x00800
#define _RPAGE_SW2 0x00400
#define _RPAGE_SW3 0x00200
+#define _RPAGE_RSV1 0x1000000000000000UL
+#define _RPAGE_RSV2 0x0800000000000000UL
+#define _RPAGE_RSV3 0x0400000000000000UL
+#define _RPAGE_RSV4 0x0200000000000000UL
+
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
#else
@@ -33,6 +38,11 @@
#endif
#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
+/*
+ * For P9 DD1 only, we need to track whether the pte's huge.
+ */
+#define _PAGE_LARGE _RPAGE_RSV1
+
#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */
#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */
@@ -568,10 +578,11 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev)
*/
static inline void __ptep_set_access_flags(struct mm_struct *mm,
- pte_t *ptep, pte_t entry)
+ pte_t *ptep, pte_t entry,
+ unsigned long address)
{
if (radix_enabled())
- return radix__ptep_set_access_flags(mm, ptep, entry);
+ return radix__ptep_set_access_flags(mm, ptep, entry, address);
return hash__ptep_set_access_flags(ptep, entry);
}
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 2a46dea..b4d1302 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -140,19 +140,20 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
unsigned long new_pte;
old_pte = __radix_pte_update(ptep, ~0, 0);
- asm volatile("ptesync" : : : "memory");
/*
* new value of pte
*/
new_pte = (old_pte | set) & ~clr;
-
/*
- * For now let's do heavy pid flush
- * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
+ * If we are trying to clear the pte, we can skip
+ * the below sequence and batch the tlb flush. The
+ * tlb flush batching is done by mmu gather code
*/
- radix__flush_tlb_mm(mm);
-
- __radix_pte_update(ptep, 0, new_pte);
+ if (new_pte) {
+ asm volatile("ptesync" : : : "memory");
+ radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr);
+ __radix_pte_update(ptep, 0, new_pte);
+ }
} else
old_pte = __radix_pte_update(ptep, clr, set);
asm volatile("ptesync" : : : "memory");
@@ -167,7 +168,8 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
* function doesn't need to invalidate tlb.
*/
static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
- pte_t *ptep, pte_t entry)
+ pte_t *ptep, pte_t entry,
+ unsigned long address)
{
unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
@@ -183,13 +185,7 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
* new value of pte
*/
new_pte = old_pte | set;
-
- /*
- * For now let's do heavy pid flush
- * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
- */
- radix__flush_tlb_mm(mm);
-
+ radix__flush_tlb_pte_p9_dd1(old_pte, mm, address);
__radix_pte_update(ptep, 0, new_pte);
} else
__radix_pte_update(ptep, 0, set);
@@ -243,6 +239,8 @@ static inline int radix__pmd_trans_huge(pmd_t pmd)
static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
{
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ return __pmd(pmd_val(pmd) | _PAGE_PTE | _PAGE_LARGE);
return __pmd(pmd_val(pmd) | _PAGE_PTE);
}
static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index a9e19cb..cc7fbde 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -42,4 +42,6 @@ extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
unsigned long page_size);
extern void radix__flush_tlb_lpid(unsigned long lpid);
extern void radix__flush_tlb_all(void);
+extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
+ unsigned long address);
#endif
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index 44efe73..fc46b66 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -7,6 +7,71 @@
#include <asm/asm-compat.h>
#include <linux/bug.h>
+#ifdef __BIG_ENDIAN
+#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE)
+#else
+#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE)
+#endif
+
+#define XCHG_GEN(type, sfx, cl) \
+static inline u32 __xchg_##type##sfx(volatile void *p, u32 val) \
+{ \
+ unsigned int prev, prev_mask, tmp, bitoff, off; \
+ \
+ off = (unsigned long)p % sizeof(u32); \
+ bitoff = BITOFF_CAL(sizeof(type), off); \
+ p -= off; \
+ val <<= bitoff; \
+ prev_mask = (u32)(type)-1 << bitoff; \
+ \
+ __asm__ __volatile__( \
+"1: lwarx %0,0,%3\n" \
+" andc %1,%0,%5\n" \
+" or %1,%1,%4\n" \
+ PPC405_ERR77(0,%3) \
+" stwcx. %1,0,%3\n" \
+" bne- 1b\n" \
+ : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \
+ : "r" (p), "r" (val), "r" (prev_mask) \
+ : "cc", cl); \
+ \
+ return prev >> bitoff; \
+}
+
+#define CMPXCHG_GEN(type, sfx, br, br2, cl) \
+static inline \
+u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \
+{ \
+ unsigned int prev, prev_mask, tmp, bitoff, off; \
+ \
+ off = (unsigned long)p % sizeof(u32); \
+ bitoff = BITOFF_CAL(sizeof(type), off); \
+ p -= off; \
+ old <<= bitoff; \
+ new <<= bitoff; \
+ prev_mask = (u32)(type)-1 << bitoff; \
+ \
+ __asm__ __volatile__( \
+ br \
+"1: lwarx %0,0,%3\n" \
+" and %1,%0,%6\n" \
+" cmpw 0,%1,%4\n" \
+" bne- 2f\n" \
+" andc %1,%0,%6\n" \
+" or %1,%1,%5\n" \
+ PPC405_ERR77(0,%3) \
+" stwcx. %1,0,%3\n" \
+" bne- 1b\n" \
+ br2 \
+ "\n" \
+"2:" \
+ : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \
+ : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \
+ : "cc", cl); \
+ \
+ return prev >> bitoff; \
+}
+
/*
* Atomic exchange
*
@@ -14,6 +79,11 @@
* the previous value stored there.
*/
+XCHG_GEN(u8, _local, "memory");
+XCHG_GEN(u8, _relaxed, "cc");
+XCHG_GEN(u16, _local, "memory");
+XCHG_GEN(u16, _relaxed, "cc");
+
static __always_inline unsigned long
__xchg_u32_local(volatile void *p, unsigned long val)
{
@@ -85,9 +155,13 @@ __xchg_u64_relaxed(u64 *p, unsigned long val)
#endif
static __always_inline unsigned long
-__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
+__xchg_local(void *ptr, unsigned long x, unsigned int size)
{
switch (size) {
+ case 1:
+ return __xchg_u8_local(ptr, x);
+ case 2:
+ return __xchg_u16_local(ptr, x);
case 4:
return __xchg_u32_local(ptr, x);
#ifdef CONFIG_PPC64
@@ -103,6 +177,10 @@ static __always_inline unsigned long
__xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
{
switch (size) {
+ case 1:
+ return __xchg_u8_relaxed(ptr, x);
+ case 2:
+ return __xchg_u16_relaxed(ptr, x);
case 4:
return __xchg_u32_relaxed(ptr, x);
#ifdef CONFIG_PPC64
@@ -131,6 +209,15 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
* and return the old value of *p.
*/
+CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
+CMPXCHG_GEN(u8, _local, , , "memory");
+CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
+CMPXCHG_GEN(u8, _relaxed, , , "cc");
+CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
+CMPXCHG_GEN(u16, _local, , , "memory");
+CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
+CMPXCHG_GEN(u16, _relaxed, , , "cc");
+
static __always_inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
{
@@ -316,6 +403,10 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
unsigned int size)
{
switch (size) {
+ case 1:
+ return __cmpxchg_u8(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16(ptr, old, new);
case 4:
return __cmpxchg_u32(ptr, old, new);
#ifdef CONFIG_PPC64
@@ -328,10 +419,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
}
static __always_inline unsigned long
-__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
+__cmpxchg_local(void *ptr, unsigned long old, unsigned long new,
unsigned int size)
{
switch (size) {
+ case 1:
+ return __cmpxchg_u8_local(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16_local(ptr, old, new);
case 4:
return __cmpxchg_u32_local(ptr, old, new);
#ifdef CONFIG_PPC64
@@ -348,6 +443,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
unsigned int size)
{
switch (size) {
+ case 1:
+ return __cmpxchg_u8_relaxed(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16_relaxed(ptr, old, new);
case 4:
return __cmpxchg_u32_relaxed(ptr, old, new);
#ifdef CONFIG_PPC64
@@ -364,6 +463,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
unsigned int size)
{
switch (size) {
+ case 1:
+ return __cmpxchg_u8_acquire(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16_acquire(ptr, old, new);
case 4:
return __cmpxchg_u32_acquire(ptr, old, new);
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index a954e49..86308f1 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -10,7 +10,7 @@ struct pt_regs;
extern struct dentry *powerpc_debugfs_root;
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
extern int (*__debugger)(struct pt_regs *regs);
extern int (*__debugger_ipi)(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 3facdd4..ede2151 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -9,7 +9,7 @@ extern struct kmem_cache *hugepte_cache;
#ifdef CONFIG_PPC_BOOK3S_64
-#include <asm/book3s/64/hugetlb-radix.h>
+#include <asm/book3s/64/hugetlb.h>
/*
* This should work for other subarchs too. But right now we use the
* new format only for 64bit book3s
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 8d5f8352..77ff1ba 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -276,7 +276,8 @@
#define H_GET_MPP_X 0x314
#define H_SET_MODE 0x31C
#define H_CLEAR_HPT 0x358
-#define MAX_HCALL_OPCODE H_CLEAR_HPT
+#define H_SIGNAL_SYS_RESET 0x380
+#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET
/* H_VIOCTL functions */
#define H_GET_VIOA_DUMP_SIZE 0x01
@@ -307,6 +308,11 @@
#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3
#define H_SET_MODE_RESOURCE_LE 4
+/* Values for argument to H_SIGNAL_SYS_RESET */
+#define H_SIGNAL_SYS_RESET_ALL -1
+#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
+/* >= 0 values are CPU number */
+
#ifndef __ASSEMBLY__
/**
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index a46f5f4..6c3b715 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -53,7 +53,7 @@
typedef void (*crash_shutdown_t)(void);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* This function is responsible for capturing register states if coming
@@ -91,7 +91,17 @@ static inline bool kdump_in_progress(void)
return crashing_cpu >= 0;
}
-#else /* !CONFIG_KEXEC */
+#ifdef CONFIG_KEXEC_FILE
+extern struct kexec_file_ops kexec_elf64_ops;
+
+int setup_purgatory(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr);
+int setup_new_fdt(void *fdt, unsigned long initrd_load_addr,
+ unsigned long initrd_len, const char *cmdline);
+#endif /* CONFIG_KEXEC_FILE */
+
+#else /* !CONFIG_KEXEC_CORE */
static inline void crash_kexec_secondary(struct pt_regs *regs) { }
static inline int overlaps_crashkernel(unsigned long start, unsigned long size)
@@ -116,7 +126,7 @@ static inline bool kdump_in_progress(void)
return false;
}
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#endif /* ! __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_KEXEC_H */
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 2c9759bd..97b8c1f 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -32,6 +32,7 @@
#include <asm/probes.h>
#include <asm/code-patching.h>
+#ifdef CONFIG_KPROBES
#define __ARCH_WANT_KPROBES_INSN_SLOT
struct pt_regs;
@@ -127,5 +128,11 @@ struct kprobe_ctlblk {
extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_handler(struct pt_regs *regs);
+extern int kprobe_post_handler(struct pt_regs *regs);
+#else
+static inline int kprobe_handler(struct pt_regs *regs) { return 0; }
+static inline int kprobe_post_handler(struct pt_regs *regs) { return 0; }
+#endif /* CONFIG_KPROBES */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_KPROBES_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index e02cbc6..5011b69 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -183,7 +183,7 @@ struct machdep_calls {
*/
void (*machine_shutdown)(void);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void (*kexec_cpu_down)(int crash_shutdown, int secondary);
/* Called to do what every setup is needed on image and the
@@ -198,7 +198,7 @@ struct machdep_calls {
* no return.
*/
void (*machine_kexec)(struct kimage *image);
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_SUSPEND
/* These are called to disable and enable, respectively, IRQs when
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index b119bdd..09304d2 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -208,6 +208,11 @@ extern u64 ppc64_rma_size;
/* Cleanup function used by kexec */
extern void mmu_cleanup_all(void);
extern void radix__mmu_cleanup_all(void);
+
+/* Functions for creating and updating partition table on POWER9 */
+extern void mmu_partition_table_init(void);
+extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1);
#endif /* CONFIG_PPC64 */
struct mm_struct;
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 5c45114..b9e3f0a 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -19,16 +19,18 @@ extern void destroy_context(struct mm_struct *mm);
struct mm_iommu_table_group_mem_t;
extern int isolate_lru_page(struct page *page); /* from internal.h */
-extern bool mm_iommu_preregistered(void);
-extern long mm_iommu_get(unsigned long ua, unsigned long entries,
+extern bool mm_iommu_preregistered(struct mm_struct *mm);
+extern long mm_iommu_get(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem);
-extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem);
-extern void mm_iommu_init(mm_context_t *ctx);
-extern void mm_iommu_cleanup(mm_context_t *ctx);
-extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
- unsigned long size);
-extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
- unsigned long entries);
+extern long mm_iommu_put(struct mm_struct *mm,
+ struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_init(struct mm_struct *mm);
+extern void mm_iommu_cleanup(struct mm_struct *mm);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+ unsigned long ua, unsigned long size);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries);
extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned long *hpa);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 7bd916e..ba9921b 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -275,7 +275,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
static inline void __ptep_set_access_flags(struct mm_struct *mm,
- pte_t *ptep, pte_t entry)
+ pte_t *ptep, pte_t entry,
+ unsigned long address)
{
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
index fc7d517..d0db987 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -27,9 +27,6 @@
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-/* With 4k base page size, hugepage PTEs go at the PMD level */
-#define MIN_HUGEPTE_SHIFT PMD_SHIFT
-
/* PUD_SHIFT determines what a third-level page table entry can map */
#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
#define PUD_SIZE (1UL << PUD_SHIFT)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
index 9083245..55b28ef 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
@@ -31,9 +31,6 @@
#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
-/* With 4k base page size, hugepage PTEs go at the PMD level */
-#define MIN_HUGEPTE_SHIFT PAGE_SHIFT
-
/* PMD_SHIFT determines what a second-level page table entry can map */
#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
#define PMD_SIZE (1UL << PMD_SHIFT)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 6c4a142..c7f927e 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -289,7 +289,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
* function doesn't need to flush the hash entry
*/
static inline void __ptep_set_access_flags(struct mm_struct *mm,
- pte_t *ptep, pte_t entry)
+ pte_t *ptep, pte_t entry,
+ unsigned long address)
{
unsigned long bits = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index e958b70..5c7db0f 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
int64_t opal_pci_poll2(uint64_t id, uint64_t data);
int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll);
+int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll);
int64_t opal_int_set_cppr(uint8_t cppr);
int64_t opal_int_eoi(uint32_t xirr);
+int64_t opal_rm_int_eoi(uint32_t xirr);
int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
+int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
uint32_t pe_num, uint32_t tce_size,
uint64_t dma_addr, uint32_t npages);
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 034a588..0bcc75e 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -308,4 +308,9 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr
return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
}
+static inline long plapr_signal_sys_reset(long cpu)
+{
+ return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
+}
+
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 0f73de0..7262880 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -53,7 +53,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
int eeh_pci_enable(struct eeh_pe *pe, int function);
-int eeh_reset_pe(struct eeh_pe *);
+int eeh_pe_reset_full(struct eeh_pe *pe);
void eeh_save_bars(struct eeh_dev *edev);
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 7f436ba..5e57705 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -159,11 +159,5 @@ struct of_drconf_cell {
/* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02 /* Linux is our OS */
-/*
- * The architecture vector has an array of PVR mask/value pairs,
- * followed by # option vectors - 1, followed by the option vectors.
- */
-extern unsigned char ibm_architecture_vec[];
-
#endif /* __KERNEL__ */
#endif /* _POWERPC_PROM_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c491cfe..0d4531a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -153,6 +153,8 @@
#define PSSCR_EC 0x00100000 /* Exit Criterion */
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
+#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
+#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
/* Floating Point Status and Control Register (FPSCR) Fields */
#define FPSCR_FX 0x80000000 /* FPU exception summary */
@@ -236,6 +238,7 @@
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
+#define SPRN_TIDR 144 /* Thread ID register */
#define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098
#define CTRL_CT 0xc0000000 /* current thread */
@@ -292,6 +295,7 @@
#define SPRN_HRMOR 0x139 /* Real mode offset register */
#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */
#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
+#define SPRN_ASDR 0x330 /* Access segment descriptor register */
#define SPRN_IC 0x350 /* Virtual Instruction Count */
#define SPRN_VTB 0x351 /* Virtual Time Base */
#define SPRN_LDBAR 0x352 /* LD Base Address Register */
@@ -302,6 +306,7 @@
#define SPRN_PMCR 0x374 /* Power Management Control Register */
/* HFSCR and FSCR bit numbers are the same */
+#define FSCR_MSGP_LG 10 /* Enable MSGP */
#define FSCR_TAR_LG 8 /* Enable Target Address Register */
#define FSCR_EBB_LG 7 /* Enable Event Based Branching */
#define FSCR_TM_LG 5 /* Enable Transactional Memory */
@@ -315,6 +320,7 @@
#define FSCR_EBB __MASK(FSCR_EBB_LG)
#define FSCR_DSCR __MASK(FSCR_DSCR_LG)
#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */
+#define HFSCR_MSGP __MASK(FSCR_MSGP_LG)
#define HFSCR_TAR __MASK(FSCR_TAR_LG)
#define HFSCR_EBB __MASK(FSCR_EBB_LG)
#define HFSCR_TM __MASK(FSCR_TM_LG)
@@ -350,8 +356,10 @@
#define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */
#define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */
#define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */
+#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */
#define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */
#define LPCR_MER_SH 11
+#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */
#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */
#define LPCR_LPES 0x0000000c
#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */
@@ -372,6 +380,12 @@
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
+/*
+ * These bits are used in the function kvmppc_set_arch_compat() to specify and
+ * determine both the compatibility level which we want to emulate and the
+ * compatibility level which the host is capable of emulating.
+ */
+#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
@@ -1213,6 +1227,7 @@
#define PVR_ARCH_206 0x0f000003
#define PVR_ARCH_206p 0x0f100003
#define PVR_ARCH_207 0x0f000004
+#define PVR_ARCH_300 0x0f000005
/* Macros for setting and retrieving special purpose registers */
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 0d02c11..32db16d 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -176,7 +176,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
#endif /* !CONFIG_SMP */
#endif /* !CONFIG_PPC64 */
-#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC))
+#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE))
extern void smp_release_cpus(void);
#else
static inline void smp_release_cpus(void) { };
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
new file mode 100644
index 0000000..6720190
--- /dev/null
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -0,0 +1,40 @@
+/*
+ * GCC stack protector support.
+ *
+ * Stack protector works by putting predefined pattern at the start of
+ * the stack frame and verifying that it hasn't been overwritten when
+ * returning from the function. The pattern is called stack canary
+ * and gcc expects it to be defined by a global variable called
+ * "__stack_chk_guard" on PPC. This unfortunately means that on SMP
+ * we cannot have a different canary value per task.
+ */
+
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H
+
+#include <linux/random.h>
+#include <linux/version.h>
+#include <asm/reg.h>
+
+extern unsigned long __stack_chk_guard;
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+ unsigned long canary;
+
+ /* Try to get a semi random initial value. */
+ get_random_bytes(&canary, sizeof(canary));
+ canary ^= mftb();
+ canary ^= LINUX_VERSION_CODE;
+
+ current->stack_canary = canary;
+ __stack_chk_guard = current->stack_canary;
+}
+
+#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 2fc5d4d..4b369d8 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -386,3 +386,4 @@ SYSCALL(mlock2)
SYSCALL(copy_file_range)
COMPAT_SYS_SPU(preadv2)
COMPAT_SYS_SPU(pwritev2)
+SYSCALL(kexec_file_load)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index e8cdfec..eb1acee 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 382
+#define NR_syscalls 383
#define __NR__exit __NR_exit
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index e9f5f41..2f26335 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -392,5 +392,6 @@
#define __NR_copy_file_range 379
#define __NR_preadv2 380
#define __NR_pwritev2 381
+#define __NR_kexec_file_load 382
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1925341..a3a6047 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -19,6 +19,10 @@
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+# -fstack-protector triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
@@ -58,8 +62,6 @@
obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
-obj-$(CONFIG_IBMVIO) += vio.o
-obj-$(CONFIG_IBMEBUS) += ibmebus.o
obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
@@ -107,8 +109,9 @@
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
pci-common.o pci_of_scan.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \
machine_kexec_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
@@ -128,7 +131,7 @@
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
-ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
+ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),)
obj-y += ppc_save_regs.o
endif
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index caec7bf..5c86030 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -91,6 +91,9 @@ int main(void)
DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp));
#endif
+#ifdef CONFIG_CC_STACKPROTECTOR
+ DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary));
+#endif
DEFINE(KSP, offsetof(struct thread_struct, ksp));
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 52ff3f0..fe35ef2 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -96,6 +96,7 @@
mtlr r11
beqlr
li r0,0
+ mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -116,6 +117,7 @@
mtlr r11
beqlr
li r0,0
+ mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -174,7 +176,7 @@
__init_HFSCR:
mfspr r3,SPRN_HFSCR
ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
- HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB
+ HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP
mtspr SPRN_HFSCR,r3
blr
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f257316..8180bfd 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -372,7 +372,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* Find the PHB PE */
phb_pe = eeh_phb_pe_get(pe->phb);
if (!phb_pe) {
- pr_warn("%s Can't find PE for PHB#%d\n",
+ pr_warn("%s Can't find PE for PHB#%x\n",
__func__, pe->phb->global_number);
return -EEXIST;
}
@@ -664,7 +664,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
rc = eeh_ops->set_option(pe, function);
if (rc)
pr_warn("%s: Unexpected state change %d on "
- "PHB#%d-PE#%x, err=%d\n",
+ "PHB#%x-PE#%x, err=%d\n",
__func__, function, pe->phb->global_number,
pe->addr, rc);
@@ -808,76 +808,67 @@ static void *eeh_set_dev_freset(void *data, void *flag)
}
/**
- * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
+ * eeh_pe_reset_full - Complete a full reset process on the indicated PE
* @pe: EEH PE
*
- * Assert the PCI #RST line for 1/4 second.
+ * This function executes a full reset procedure on a PE, including setting
+ * the appropriate flags, performing a fundamental or hot reset, and then
+ * deactivating the reset status. It is designed to be used within the EEH
+ * subsystem, as opposed to eeh_pe_reset which is exported to drivers and
+ * only performs a single operation at a time.
+ *
+ * This function will attempt to reset a PE three times before failing.
*/
-static void eeh_reset_pe_once(struct eeh_pe *pe)
+int eeh_pe_reset_full(struct eeh_pe *pe)
{
+ int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+ int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
+ int type = EEH_RESET_HOT;
unsigned int freset = 0;
+ int i, state, ret;
- /* Determine type of EEH reset required for
- * Partitionable Endpoint, a hot-reset (1)
- * or a fundamental reset (3).
- * A fundamental reset required by any device under
- * Partitionable Endpoint trumps hot-reset.
+ /*
+ * Determine the type of reset to perform - hot or fundamental.
+ * Hot reset is the default operation, unless any device under the
+ * PE requires a fundamental reset.
*/
eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
if (freset)
- eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
- else
- eeh_ops->reset(pe, EEH_RESET_HOT);
+ type = EEH_RESET_FUNDAMENTAL;
- eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-}
+ /* Mark the PE as in reset state and block config space accesses */
+ eeh_pe_state_mark(pe, reset_state);
-/**
- * eeh_reset_pe - Reset the indicated PE
- * @pe: EEH PE
- *
- * This routine should be called to reset indicated device, including
- * PE. A PE might include multiple PCI devices and sometimes PCI bridges
- * might be involved as well.
- */
-int eeh_reset_pe(struct eeh_pe *pe)
-{
- int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
- int i, state, ret;
-
- /* Mark as reset and block config space */
- eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
-
- /* Take three shots at resetting the bus */
+ /* Make three attempts at resetting the bus */
for (i = 0; i < 3; i++) {
- eeh_reset_pe_once(pe);
+ ret = eeh_pe_reset(pe, type);
+ if (ret)
+ break;
- /*
- * EEH_PE_ISOLATED is expected to be removed after
- * BAR restore.
- */
+ ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+ if (ret)
+ break;
+
+ /* Wait until the PE is in a functioning state */
state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if ((state & flags) == flags) {
- ret = 0;
- goto out;
- }
+ if ((state & active_flags) == active_flags)
+ break;
if (state < 0) {
- pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
+ pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
__func__, pe->phb->global_number, pe->addr);
ret = -ENOTRECOVERABLE;
- goto out;
+ break;
}
- /* We might run out of credits */
+ /* Set error in case this is our last attempt */
ret = -EIO;
pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
__func__, state, pe->phb->global_number, pe->addr, (i + 1));
}
-out:
- eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, reset_state);
return ret;
}
@@ -1601,6 +1592,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
return eeh_unfreeze_pe(pe, true);
}
+
/**
* eeh_pe_reset - Issue PE reset according to specified type
* @pe: EEH PE
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index a62be72..555a47b 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -588,7 +588,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
/* Issue reset */
- ret = eeh_reset_pe(pe);
+ ret = eeh_pe_reset_full(pe);
if (ret) {
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
return ret;
@@ -659,7 +659,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* config accesses. So we prefer to block them. However, controlled
* PCI config accesses initiated from EEH itself are allowed.
*/
- rc = eeh_reset_pe(pe);
+ rc = eeh_pe_reset_full(pe);
if (rc)
return rc;
@@ -732,7 +732,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
frozen_bus = eeh_pe_bus_get(pe);
if (!frozen_bus) {
- pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+ pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
return;
}
@@ -876,7 +876,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
* are due to poorly seated PCI cards. Only 10% or so are
* due to actual, failed cards.
*/
- pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
+ pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n"
"last hour and has been permanently disabled.\n"
"Please try reseating or replacing it.\n",
pe->phb->global_number, pe->addr,
@@ -884,7 +884,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
goto perm_error;
hard_fail:
- pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
"Please try reseating or replacing it\n",
pe->phb->global_number, pe->addr);
@@ -998,7 +998,7 @@ static void eeh_handle_special_event(void)
bus = eeh_pe_bus_get(phb_pe);
if (!bus) {
pr_err("%s: Cannot find PCI bus for "
- "PHB#%d-PE#%x\n",
+ "PHB#%x-PE#%x\n",
__func__,
pe->phb->global_number,
pe->addr);
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 82e7327..accbf8b 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -75,11 +75,11 @@ static int eeh_event_handler(void * dummy)
if (pe) {
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
if (pe->type & EEH_PE_PHB)
- pr_info("EEH: Detected error on PHB#%d\n",
+ pr_info("EEH: Detected error on PHB#%x\n",
pe->phb->global_number);
else
pr_info("EEH: Detected PCI bus error on "
- "PHB#%d-PE#%x\n",
+ "PHB#%x-PE#%x\n",
pe->phb->global_number, pe->addr);
eeh_handle_event(pe);
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index de7d091..cc4b206 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -104,7 +104,7 @@ int eeh_phb_pe_create(struct pci_controller *phb)
/* Put it into the list */
list_add_tail(&pe->child, &eeh_phb_pe);
- pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
+ pr_debug("EEH: Add PE for PHB#%x\n", phb->global_number);
return 0;
}
@@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%d\n",
+ pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
__func__, edev->config_addr, edev->phb->global_number);
return -EINVAL;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 3841d74..5742dbd 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -674,7 +674,11 @@
mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */
END_FTR_SECTION_IFSET(CPU_FTR_SPE)
#endif /* CONFIG_SPE */
-
+#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+ lwz r0,TSK_STACK_CANARY(r2)
+ lis r4,__stack_chk_guard@ha
+ stw r0,__stack_chk_guard@l(r4)
+#endif
lwz r0,_CCR(r1)
mtcrf 0xFF,r0
/* r3-r12 are destroyed -- Cort */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 38a1f96..45b453e 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -923,10 +923,10 @@
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x340)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .save_nvgprs
+ bl save_nvgprs
INTS_RESTORE_HARD
- bl .unknown_exception
- b .ret_from_except
+ bl unknown_exception
+ b ret_from_except
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index a95639b..5c9f50c 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -47,13 +47,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
unsigned int replaced;
/*
- * Note: Due to modules and __init, code can
- * disappear and change, we need to protect against faulting
- * as well as code changing. We do this by using the
- * probe_kernel_* functions.
- *
- * No real locking needed, this code is run through
- * kstop_machine, or before SMP starts.
+ * Note:
+ * We are paranoid about modifying text, as if a bug was to happen, it
+ * could cause us to read or write to someplace that could cause harm.
+ * Carefully read and modify the code with probe_kernel_*(), and make
+ * sure what we read is what we expected it to be before modifying it.
*/
/* read the text we want to modify */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 451a8e1..1dc5eae 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -160,7 +160,7 @@
cmpdi 0,r12,0
beq 100b
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
#ifdef CONFIG_PPC_BOOK3E
tovirt(r12,r12)
#endif
@@ -221,9 +221,9 @@
*/
_GLOBAL(book3e_start_thread)
LOAD_REG_IMMEDIATE(r5, MSR_KERNEL)
- cmpi 0, r3, 0
+ cmpwi r3, 0
beq 10f
- cmpi 0, r3, 1
+ cmpwi r3, 1
beq 11f
/* If the thread id is invalid, just exit. */
b 13f
@@ -248,9 +248,9 @@
* r3 = the thread physical id
*/
_GLOBAL(book3e_stop_thread)
- cmpi 0, r3, 0
+ cmpwi r3, 0
beq 10f
- cmpi 0, r3, 1
+ cmpwi r3, 1
beq 10f
/* If the thread id is invalid, just exit. */
b 13f
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
new file mode 100644
index 0000000..6acffd3
--- /dev/null
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -0,0 +1,663 @@
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "kexec_elf: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/module.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define PURGATORY_STACK_SIZE (16 * 1024)
+
+#define elf_addr_to_cpu elf64_to_cpu
+
+#ifndef Elf_Rel
+#define Elf_Rel Elf64_Rel
+#endif /* Elf_Rel */
+
+struct elf_info {
+ /*
+ * Where the ELF binary contents are kept.
+ * Memory managed by the user of the struct.
+ */
+ const char *buffer;
+
+ const struct elfhdr *ehdr;
+ const struct elf_phdr *proghdrs;
+ struct elf_shdr *sechdrs;
+};
+
+static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
+{
+ return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
+}
+
+static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le64_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be64_to_cpu(value);
+
+ return value;
+}
+
+static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le16_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be16_to_cpu(value);
+
+ return value;
+}
+
+static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le32_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be32_to_cpu(value);
+
+ return value;
+}
+
+/**
+ * elf_is_ehdr_sane - check that it is safe to use the ELF header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
+{
+ if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
+ pr_debug("Bad program header size.\n");
+ return false;
+ } else if (ehdr->e_shnum > 0 &&
+ ehdr->e_shentsize != sizeof(struct elf_shdr)) {
+ pr_debug("Bad section header size.\n");
+ return false;
+ } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
+ ehdr->e_version != EV_CURRENT) {
+ pr_debug("Unknown ELF version.\n");
+ return false;
+ }
+
+ if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
+ size_t phdr_size;
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
+
+ /* Sanity check the program header table location. */
+ if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
+ pr_debug("Program headers at invalid location.\n");
+ return false;
+ } else if (ehdr->e_phoff + phdr_size > buf_len) {
+ pr_debug("Program headers truncated.\n");
+ return false;
+ }
+ }
+
+ if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
+ size_t shdr_size;
+
+ /*
+ * e_shnum is at most 65536 so calculating
+ * the size of the section header cannot overflow.
+ */
+ shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
+
+ /* Sanity check the section header table location. */
+ if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
+ pr_debug("Section headers at invalid location.\n");
+ return false;
+ } else if (ehdr->e_shoff + shdr_size > buf_len) {
+ pr_debug("Section headers truncated.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
+{
+ struct elfhdr *buf_ehdr;
+
+ if (len < sizeof(*buf_ehdr)) {
+ pr_debug("Buffer is too small to hold ELF header.\n");
+ return -ENOEXEC;
+ }
+
+ memset(ehdr, 0, sizeof(*ehdr));
+ memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
+ if (!elf_is_elf_file(ehdr)) {
+ pr_debug("No ELF header magic.\n");
+ return -ENOEXEC;
+ }
+
+ if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
+ pr_debug("Not a supported ELF class.\n");
+ return -ENOEXEC;
+ } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
+ ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
+ pr_debug("Not a supported ELF data format.\n");
+ return -ENOEXEC;
+ }
+
+ buf_ehdr = (struct elfhdr *) buf;
+ if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
+ pr_debug("Bad ELF header size.\n");
+ return -ENOEXEC;
+ }
+
+ ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type);
+ ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
+ ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version);
+ ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
+ ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
+ ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
+ ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
+ ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
+ ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
+ ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
+ ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
+ ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
+
+ return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_is_phdr_sane - check that it is safe to use the program header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
+{
+
+ if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
+ pr_debug("ELF segment location wraps around.\n");
+ return false;
+ } else if (phdr->p_offset + phdr->p_filesz > buf_len) {
+ pr_debug("ELF segment not in file.\n");
+ return false;
+ } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
+ pr_debug("ELF segment address wraps around.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
+ int idx)
+{
+ /* Override the const in proghdrs, we are the ones doing the loading. */
+ struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
+ const char *pbuf;
+ struct elf_phdr *buf_phdr;
+
+ pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
+ buf_phdr = (struct elf_phdr *) pbuf;
+
+ phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
+ phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
+ phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
+ phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
+ phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
+
+ /*
+ * The following fields have a type equivalent to Elf_Addr
+ * both in 32 bit and 64 bit ELF.
+ */
+ phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
+ phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
+ phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
+
+ return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_read_phdrs - read the program headers from the buffer
+ *
+ * This function assumes that the program header table was checked for sanity.
+ * Use elf_is_ehdr_sane() if it wasn't.
+ */
+static int elf_read_phdrs(const char *buf, size_t len,
+ struct elf_info *elf_info)
+{
+ size_t phdr_size, i;
+ const struct elfhdr *ehdr = elf_info->ehdr;
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
+
+ elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
+ if (!elf_info->proghdrs)
+ return -ENOMEM;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ int ret;
+
+ ret = elf_read_phdr(buf, len, elf_info, i);
+ if (ret) {
+ kfree(elf_info->proghdrs);
+ elf_info->proghdrs = NULL;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_is_shdr_sane - check that it is safe to use the section header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
+{
+ bool size_ok;
+
+ /* SHT_NULL headers have undefined values, so we can't check them. */
+ if (shdr->sh_type == SHT_NULL)
+ return true;
+
+ /* Now verify sh_entsize */
+ switch (shdr->sh_type) {
+ case SHT_SYMTAB:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
+ break;
+ case SHT_RELA:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
+ break;
+ case SHT_DYNAMIC:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
+ break;
+ case SHT_REL:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
+ break;
+ case SHT_NOTE:
+ case SHT_PROGBITS:
+ case SHT_HASH:
+ case SHT_NOBITS:
+ default:
+ /*
+ * This is a section whose entsize requirements
+ * I don't care about. If I don't know about
+ * the section I can't care about it's entsize
+ * requirements.
+ */
+ size_ok = true;
+ break;
+ }
+
+ if (!size_ok) {
+ pr_debug("ELF section with wrong entry size.\n");
+ return false;
+ } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
+ pr_debug("ELF section address wraps around.\n");
+ return false;
+ }
+
+ if (shdr->sh_type != SHT_NOBITS) {
+ if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
+ pr_debug("ELF section location wraps around.\n");
+ return false;
+ } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
+ pr_debug("ELF section not in file.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
+ int idx)
+{
+ struct elf_shdr *shdr = &elf_info->sechdrs[idx];
+ const struct elfhdr *ehdr = elf_info->ehdr;
+ const char *sbuf;
+ struct elf_shdr *buf_shdr;
+
+ sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
+ buf_shdr = (struct elf_shdr *) sbuf;
+
+ shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name);
+ shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type);
+ shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
+ shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
+ shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link);
+ shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info);
+
+ /*
+ * The following fields have a type equivalent to Elf_Addr
+ * both in 32 bit and 64 bit ELF.
+ */
+ shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
+ shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
+ shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
+ shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
+
+ return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_read_shdrs - read the section headers from the buffer
+ *
+ * This function assumes that the section header table was checked for sanity.
+ * Use elf_is_ehdr_sane() if it wasn't.
+ */
+static int elf_read_shdrs(const char *buf, size_t len,
+ struct elf_info *elf_info)
+{
+ size_t shdr_size, i;
+
+ /*
+ * e_shnum is at most 65536 so calculating
+ * the size of the section header cannot overflow.
+ */
+ shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
+
+ elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
+ if (!elf_info->sechdrs)
+ return -ENOMEM;
+
+ for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
+ int ret;
+
+ ret = elf_read_shdr(buf, len, elf_info, i);
+ if (ret) {
+ kfree(elf_info->sechdrs);
+ elf_info->sechdrs = NULL;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
+ * @buf: Buffer to read ELF file from.
+ * @len: Size of @buf.
+ * @ehdr: Pointer to existing struct which will be populated.
+ * @elf_info: Pointer to existing struct which will be populated.
+ *
+ * This function allows reading ELF files with different byte order than
+ * the kernel, byte-swapping the fields as needed.
+ *
+ * Return:
+ * On success returns 0, and the caller should call elf_free_info(elf_info) to
+ * free the memory allocated for the section and program headers.
+ */
+int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
+ struct elf_info *elf_info)
+{
+ int ret;
+
+ ret = elf_read_ehdr(buf, len, ehdr);
+ if (ret)
+ return ret;
+
+ elf_info->buffer = buf;
+ elf_info->ehdr = ehdr;
+ if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
+ ret = elf_read_phdrs(buf, len, elf_info);
+ if (ret)
+ return ret;
+ }
+ if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
+ ret = elf_read_shdrs(buf, len, elf_info);
+ if (ret) {
+ kfree(elf_info->proghdrs);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_free_info - free memory allocated by elf_read_from_buffer
+ */
+void elf_free_info(struct elf_info *elf_info)
+{
+ kfree(elf_info->proghdrs);
+ kfree(elf_info->sechdrs);
+ memset(elf_info, 0, sizeof(*elf_info));
+}
+/**
+ * build_elf_exec_info - read ELF executable and check that we can use it
+ */
+static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
+ struct elf_info *elf_info)
+{
+ int i;
+ int ret;
+
+ ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
+ if (ret)
+ return ret;
+
+ /* Big endian vmlinux has type ET_DYN. */
+ if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
+ pr_err("Not an ELF executable.\n");
+ goto error;
+ } else if (!elf_info->proghdrs) {
+ pr_err("No ELF program header.\n");
+ goto error;
+ }
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ /*
+ * Kexec does not support loading interpreters.
+ * In addition this check keeps us from attempting
+ * to kexec ordinay executables.
+ */
+ if (elf_info->proghdrs[i].p_type == PT_INTERP) {
+ pr_err("Requires an ELF interpreter.\n");
+ goto error;
+ }
+ }
+
+ return 0;
+error:
+ elf_free_info(elf_info);
+ return -ENOEXEC;
+}
+
+static int elf64_probe(const char *buf, unsigned long len)
+{
+ struct elfhdr ehdr;
+ struct elf_info elf_info;
+ int ret;
+
+ ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
+ if (ret)
+ return ret;
+
+ elf_free_info(&elf_info);
+
+ return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_exec_load - load ELF executable image
+ * @lowest_load_addr: On return, will be the address where the first PT_LOAD
+ * section will be loaded in memory.
+ *
+ * Return:
+ * 0 on success, negative value on failure.
+ */
+static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
+ struct elf_info *elf_info,
+ unsigned long *lowest_load_addr)
+{
+ unsigned long base = 0, lowest_addr = UINT_MAX;
+ int ret;
+ size_t i;
+ struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
+ .top_down = false };
+
+ /* Read in the PT_LOAD segments. */
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ unsigned long load_addr;
+ size_t size;
+ const struct elf_phdr *phdr;
+
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
+ kbuf.bufsz = size;
+ kbuf.memsz = phdr->p_memsz;
+ kbuf.buf_align = phdr->p_align;
+ kbuf.buf_min = phdr->p_paddr + base;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ load_addr = kbuf.mem;
+
+ if (load_addr < lowest_addr)
+ lowest_addr = load_addr;
+ }
+
+ /* Update entry point to reflect new load address. */
+ ehdr->e_entry += base;
+
+ *lowest_load_addr = lowest_addr;
+ ret = 0;
+ out:
+ return ret;
+}
+
+static void *elf64_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned int fdt_size;
+ unsigned long kernel_load_addr, purgatory_load_addr;
+ unsigned long initrd_load_addr = 0, fdt_load_addr;
+ void *fdt;
+ const void *slave_code;
+ struct elfhdr ehdr;
+ struct elf_info elf_info;
+ struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size };
+
+ ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ goto out;
+
+ ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
+ if (ret)
+ goto out;
+
+ pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+
+ ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true,
+ &purgatory_load_addr);
+ if (ret) {
+ pr_err("Loading purgatory failed.\n");
+ goto out;
+ }
+
+ pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = false;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_load_addr = kbuf.mem;
+
+ pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
+ }
+
+ fdt_size = fdt_totalsize(initial_boot_params) * 2;
+ fdt = kmalloc(fdt_size, GFP_KERNEL);
+ if (!fdt) {
+ pr_err("Not enough memory for the device tree.\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline);
+ if (ret)
+ goto out;
+
+ fdt_pack(fdt);
+
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_size;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ fdt_load_addr = kbuf.mem;
+
+ pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
+
+ slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
+ ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+ fdt_load_addr);
+ if (ret)
+ pr_err("Error setting up the purgatory.\n");
+
+out:
+ elf_free_info(&elf_info);
+
+ /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
+ return ret ? ERR_PTR(ret) : fdt;
+}
+
+struct kexec_file_ops kexec_elf64_ops = {
+ .probe = elf64_probe,
+ .load = elf64_load,
+};
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9479d8e..ad108b8 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -140,13 +140,16 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
regs->link = (unsigned long)kretprobe_trampoline;
}
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+int __kprobes kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
int ret = 0;
unsigned int *addr = (unsigned int *)regs->nip;
struct kprobe_ctlblk *kcb;
+ if (user_mode(regs))
+ return 0;
+
/*
* We don't want to be preempted for the entire
* duration of kprobe processing
@@ -359,12 +362,12 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
* single-stepped a copy of the instruction. The address of this
* copy is p->ainsn.insn.
*/
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+int __kprobes kprobe_post_handler(struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (!cur)
+ if (!cur || user_mode(regs))
return 0;
/* make sure we got here for instruction we have a kprobe on */
@@ -470,25 +473,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
- struct die_args *args = (struct die_args *)data;
- int ret = NOTIFY_DONE;
-
- if (args->regs && user_mode(args->regs))
- return ret;
-
- switch (val) {
- case DIE_BPT:
- if (kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_SSTEP:
- if (post_kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- default:
- break;
- }
- return ret;
+ return NOTIFY_DONE;
}
unsigned long arch_deref_entry_point(void *entry)
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index a205fa3..5c12e21 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -310,7 +310,7 @@ void default_machine_kexec(struct kimage *image)
if (!kdump_in_progress())
kexec_prepare_cpus();
- pr_debug("kexec: Starting switchover sequence.\n");
+ printk("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
* We setup preempt_count to avoid using VMX in memcpy.
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
new file mode 100644
index 0000000..7abc8a7
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -0,0 +1,338 @@
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+
+#define SLAVE_CODE_SIZE 256
+
+static struct kexec_file_ops *kexec_file_loaders[] = {
+ &kexec_elf64_ops,
+};
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int i, ret = -ENOEXEC;
+ struct kexec_file_ops *fops;
+
+ /* We don't support crash kernels yet. */
+ if (image->type == KEXEC_TYPE_CRASH)
+ return -ENOTSUPP;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+ fops = kexec_file_loaders[i];
+ if (!fops || !fops->probe)
+ continue;
+
+ ret = fops->probe(buf, buf_len);
+ if (!ret) {
+ image->fops = fops;
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+ if (!image->fops || !image->fops->load)
+ return ERR_PTR(-ENOEXEC);
+
+ return image->fops->load(image, image->kernel_buf,
+ image->kernel_buf_len, image->initrd_buf,
+ image->initrd_buf_len, image->cmdline_buf,
+ image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ if (!image->fops || !image->fops->cleanup)
+ return 0;
+
+ return image->fops->cleanup(image->image_loader_data);
+}
+
+/**
+ * arch_kexec_walk_mem - call func(data) for each unreserved memory block
+ * @kbuf: Context info for the search. Also passed to @func.
+ * @func: Function to call for each memory block.
+ *
+ * This function is used by kexec_add_buffer and kexec_locate_mem_hole
+ * to find unreserved memory to load kexec segments into.
+ *
+ * Return: The memory walk will stop when func returns a non-zero value
+ * and that value will be returned. If all free regions are visited without
+ * func returning non-zero, then zero will be returned.
+ */
+int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
+{
+ int ret = 0;
+ u64 i;
+ phys_addr_t mstart, mend;
+
+ if (kbuf->top_down) {
+ for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0,
+ &mstart, &mend, NULL) {
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in kexec, end points to the last byte
+ * in the range.
+ */
+ ret = func(mstart, mend - 1, kbuf);
+ if (ret)
+ break;
+ }
+ } else {
+ for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend,
+ NULL) {
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in kexec, end points to the last byte
+ * in the range.
+ */
+ ret = func(mstart, mend - 1, kbuf);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * setup_purgatory - initialize the purgatory's global variables
+ * @image: kexec image.
+ * @slave_code: Slave code for the purgatory.
+ * @fdt: Flattened device tree for the next kernel.
+ * @kernel_load_addr: Address where the kernel is loaded.
+ * @fdt_load_addr: Address where the flattened device tree is loaded.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_purgatory(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr)
+{
+ unsigned int *slave_code_buf, master_entry;
+ int ret;
+
+ slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
+ if (!slave_code_buf)
+ return -ENOMEM;
+
+ /* Get the slave code from the new kernel and put it in purgatory. */
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+ slave_code_buf, SLAVE_CODE_SIZE,
+ true);
+ if (ret) {
+ kfree(slave_code_buf);
+ return ret;
+ }
+
+ master_entry = slave_code_buf[0];
+ memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
+ slave_code_buf[0] = master_entry;
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+ slave_code_buf, SLAVE_CODE_SIZE,
+ false);
+ kfree(slave_code_buf);
+
+ ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
+ sizeof(kernel_load_addr), false);
+ if (ret)
+ return ret;
+ ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
+ sizeof(fdt_load_addr), false);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * delete_fdt_mem_rsv - delete memory reservation with given address and size
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
+{
+ int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
+
+ for (i = 0; i < num_rsvs; i++) {
+ uint64_t rsv_start, rsv_size;
+
+ ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
+ if (ret) {
+ pr_err("Malformed device tree.\n");
+ return -EINVAL;
+ }
+
+ if (rsv_start == start && rsv_size == size) {
+ ret = fdt_del_mem_rsv(fdt, i);
+ if (ret) {
+ pr_err("Error deleting device tree reservation.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+/*
+ * setup_new_fdt - modify /chosen and memory reservation for the next kernel
+ * @fdt: Flattened device tree for the next kernel.
+ * @initrd_load_addr: Address where the next initrd will be loaded.
+ * @initrd_len: Size of the next initrd, or 0 if there will be none.
+ * @cmdline: Command line for the next kernel, or NULL if there will
+ * be none.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_new_fdt(void *fdt, unsigned long initrd_load_addr,
+ unsigned long initrd_len, const char *cmdline)
+{
+ int ret, chosen_node;
+ const void *prop;
+
+ /* Remove memory reservation for the current device tree. */
+ ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
+ fdt_totalsize(initial_boot_params));
+ if (ret == 0)
+ pr_debug("Removed old device tree reservation.\n");
+ else if (ret != -ENOENT)
+ return ret;
+
+ chosen_node = fdt_path_offset(fdt, "/chosen");
+ if (chosen_node == -FDT_ERR_NOTFOUND) {
+ chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+ "chosen");
+ if (chosen_node < 0) {
+ pr_err("Error creating /chosen.\n");
+ return -EINVAL;
+ }
+ } else if (chosen_node < 0) {
+ pr_err("Malformed device tree: error reading /chosen.\n");
+ return -EINVAL;
+ }
+
+ /* Did we boot using an initrd? */
+ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
+ if (prop) {
+ uint64_t tmp_start, tmp_end, tmp_size;
+
+ tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
+ if (!prop) {
+ pr_err("Malformed device tree.\n");
+ return -EINVAL;
+ }
+ tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+ /*
+ * kexec reserves exact initrd size, while firmware may
+ * reserve a multiple of PAGE_SIZE, so check for both.
+ */
+ tmp_size = tmp_end - tmp_start;
+ ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
+ if (ret == -ENOENT)
+ ret = delete_fdt_mem_rsv(fdt, tmp_start,
+ round_up(tmp_size, PAGE_SIZE));
+ if (ret == 0)
+ pr_debug("Removed old initrd reservation.\n");
+ else if (ret != -ENOENT)
+ return ret;
+
+ /* If there's no new initrd, delete the old initrd's info. */
+ if (initrd_len == 0) {
+ ret = fdt_delprop(fdt, chosen_node,
+ "linux,initrd-start");
+ if (ret) {
+ pr_err("Error deleting linux,initrd-start.\n");
+ return -EINVAL;
+ }
+
+ ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
+ if (ret) {
+ pr_err("Error deleting linux,initrd-end.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (initrd_len) {
+ ret = fdt_setprop_u64(fdt, chosen_node,
+ "linux,initrd-start",
+ initrd_load_addr);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ /* initrd-end is the first address after the initrd image. */
+ ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
+ initrd_load_addr + initrd_len);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
+ if (ret) {
+ pr_err("Error reserving initrd memory: %s\n",
+ fdt_strerror(ret));
+ return -EINVAL;
+ }
+ }
+
+ if (cmdline != NULL) {
+ ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+ } else {
+ ret = fdt_delprop(fdt, chosen_node, "bootargs");
+ if (ret && ret != -FDT_ERR_NOTFOUND) {
+ pr_err("Error deleting bootargs.\n");
+ return -EINVAL;
+ }
+ }
+
+ ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
+ if (ret) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 5e7ece0..c6923ff 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -72,7 +72,6 @@ void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
uint64_t nip, uint64_t addr)
{
- uint64_t srr1;
int index = __this_cpu_inc_return(mce_nest_count) - 1;
struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
@@ -99,8 +98,6 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
mce->severity = MCE_SEV_ERROR_SYNC;
- srr1 = regs->msr;
-
/*
* Populate the mce error_type and type-specific error_type.
*/
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 93cf7a5..1863324 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -614,7 +614,7 @@
_GLOBAL(__main)
blr
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* Must be relocatable PIC code callable as a C function.
*/
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 4f17867..32be2a8 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -478,7 +478,7 @@
addi r5,r5,kexec_flag-1b
99: HMT_LOW
-#ifdef CONFIG_KEXEC /* use no memory without kexec */
+#ifdef CONFIG_KEXEC_CORE /* use no memory without kexec */
lwz r4,0(r5)
cmpwi 0,r4,0
beq 99b
@@ -503,7 +503,7 @@
.long 0
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
#ifdef CONFIG_PPC_BOOK3E
/*
* BOOK3E has no real MMU mode, so we have to setup the initial TLB
@@ -716,4 +716,4 @@
mtlr 4
li r5,0
blr /* image->start(physid, image->start, 0); */
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index b60a67d..34aeac5 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -114,11 +114,6 @@ static struct platform_driver of_pci_phb_driver = {
},
};
-static __init int of_pci_phb_init(void)
-{
- return platform_driver_register(&of_pci_phb_driver);
-}
-
-device_initcall(of_pci_phb_init);
+builtin_platform_driver(of_pci_phb_driver);
#endif /* CONFIG_PPC_OF_PLATFORM_PCI */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5b62e8c..9da9a42 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -64,6 +64,12 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+#ifdef CONFIG_CC_STACKPROTECTOR
+#include <linux/stackprotector.h>
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
/* Transactional Memory debug */
#ifdef TM_DEBUG_SW
#define TM_DEBUG(x...) printk(KERN_INFO x)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b0245be..f5d399e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -156,21 +156,22 @@ static struct ibm_pa_feature {
unsigned char pabit; /* bit number (big-endian) */
unsigned char invert; /* if 1, pa bit set => clear feature */
} ibm_pa_features[] __initdata = {
- {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0},
- {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0},
- {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0},
- {CPU_FTR_NOEXECUTE, 0, 0, 0, 0, 6, 0},
- {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1},
- {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0},
- {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0},
+ { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU },
+ { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU },
+ { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL },
+ { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE },
+ { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE },
+ { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX },
+ { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
+ { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE,
+ .cpu_user_ftrs = PPC_FEATURE_TRUE_LE },
/*
* If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n),
* we don't want to turn on TM here, so we use the *_COMP versions
* which are 0 if the kernel doesn't support TM.
*/
- {CPU_FTR_TM_COMP, 0, 0,
- PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0},
- {0, MMU_FTR_TYPE_RADIX, 0, 0, 40, 0, 0},
+ { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP,
+ .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP },
};
static void __init scan_features(unsigned long node, const unsigned char *ftrs,
@@ -427,7 +428,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
tce_alloc_end = *lprop;
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
if (lprop)
crashk_res.start = *lprop;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 88ac964..ec47a93 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -461,14 +461,14 @@ static int __init prom_next_node(phandle *nodep)
}
}
-static int inline prom_getprop(phandle node, const char *pname,
+static inline int prom_getprop(phandle node, const char *pname,
void *value, size_t valuelen)
{
return call_prom("getprop", 4, 1, node, ADDR(pname),
(u32)(unsigned long) value, (u32) valuelen);
}
-static int inline prom_getproplen(phandle node, const char *pname)
+static inline int prom_getproplen(phandle node, const char *pname)
{
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
@@ -635,13 +635,7 @@ static void __init early_cmdline_parse(void)
*
* See prom.h for the definition of the bits specified in the
* architecture vector.
- *
- * Because the description vector contains a mix of byte and word
- * values, we declare it as an unsigned char array, and use this
- * macro to put word values in.
*/
-#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \
- ((x) >> 8) & 0xff, (x) & 0xff
/* Firmware expects the value to be n - 1, where n is the # of vectors */
#define NUM_VECTORS(n) ((n) - 1)
@@ -652,92 +646,205 @@ static void __init early_cmdline_parse(void)
*/
#define VECTOR_LENGTH(n) (1 + (n) - 2)
-unsigned char ibm_architecture_vec[] = {
- W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
- W(0xffff0000), W(0x003e0000), /* POWER6 */
- W(0xffff0000), W(0x003f0000), /* POWER7 */
- W(0xffff0000), W(0x004b0000), /* POWER8E */
- W(0xffff0000), W(0x004c0000), /* POWER8NVL */
- W(0xffff0000), W(0x004d0000), /* POWER8 */
- W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */
- W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
- W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
- W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */
- NUM_VECTORS(6), /* 6 option vectors */
+struct option_vector1 {
+ u8 byte1;
+ u8 arch_versions;
+} __packed;
- /* option vector 1: processor architectures supported */
- VECTOR_LENGTH(2), /* length */
- 0, /* don't ignore, don't halt */
- OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
- OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
+struct option_vector2 {
+ u8 byte1;
+ __be16 reserved;
+ __be32 real_base;
+ __be32 real_size;
+ __be32 virt_base;
+ __be32 virt_size;
+ __be32 load_base;
+ __be32 min_rma;
+ __be32 min_load;
+ u8 min_rma_percent;
+ u8 max_pft_size;
+} __packed;
+struct option_vector3 {
+ u8 byte1;
+ u8 byte2;
+} __packed;
+
+struct option_vector4 {
+ u8 byte1;
+ u8 min_vp_cap;
+} __packed;
+
+struct option_vector5 {
+ u8 byte1;
+ u8 byte2;
+ u8 byte3;
+ u8 cmo;
+ u8 associativity;
+ u8 bin_opts;
+ u8 micro_checkpoint;
+ u8 reserved0;
+ __be32 max_cpus;
+ __be16 papr_level;
+ __be16 reserved1;
+ u8 platform_facilities;
+ u8 reserved2;
+ __be16 reserved3;
+ u8 subprocessors;
+} __packed;
+
+struct option_vector6 {
+ u8 reserved;
+ u8 secondary_pteg;
+ u8 os_name;
+} __packed;
+
+struct ibm_arch_vec {
+ struct { u32 mask, val; } pvrs[10];
+
+ u8 num_vectors;
+
+ u8 vec1_len;
+ struct option_vector1 vec1;
+
+ u8 vec2_len;
+ struct option_vector2 vec2;
+
+ u8 vec3_len;
+ struct option_vector3 vec3;
+
+ u8 vec4_len;
+ struct option_vector4 vec4;
+
+ u8 vec5_len;
+ struct option_vector5 vec5;
+
+ u8 vec6_len;
+ struct option_vector6 vec6;
+} __packed;
+
+struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
+ .pvrs = {
+ {
+ .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
+ .val = cpu_to_be32(0x003a0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER6 */
+ .val = cpu_to_be32(0x003e0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER7 */
+ .val = cpu_to_be32(0x003f0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8E */
+ .val = cpu_to_be32(0x004b0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8NVL */
+ .val = cpu_to_be32(0x004c0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8 */
+ .val = cpu_to_be32(0x004d0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */
+ .val = cpu_to_be32(0x0f000004),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.06-compliant */
+ .val = cpu_to_be32(0x0f000003),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.05-compliant */
+ .val = cpu_to_be32(0x0f000002),
+ },
+ {
+ .mask = cpu_to_be32(0xfffffffe), /* all 2.04-compliant and earlier */
+ .val = cpu_to_be32(0x0f000001),
+ },
+ },
+
+ .num_vectors = NUM_VECTORS(6),
+
+ .vec1_len = VECTOR_LENGTH(sizeof(struct option_vector1)),
+ .vec1 = {
+ .byte1 = 0,
+ .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
+ OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
+ },
+
+ .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)),
/* option vector 2: Open Firmware options supported */
- VECTOR_LENGTH(33), /* length */
- OV2_REAL_MODE,
- 0, 0,
- W(0xffffffff), /* real_base */
- W(0xffffffff), /* real_size */
- W(0xffffffff), /* virt_base */
- W(0xffffffff), /* virt_size */
- W(0xffffffff), /* load_base */
- W(256), /* 256MB min RMA */
- W(0xffffffff), /* full client load */
- 0, /* min RMA percentage of total RAM */
- 48, /* max log_2(hash table size) */
+ .vec2 = {
+ .byte1 = OV2_REAL_MODE,
+ .reserved = 0,
+ .real_base = cpu_to_be32(0xffffffff),
+ .real_size = cpu_to_be32(0xffffffff),
+ .virt_base = cpu_to_be32(0xffffffff),
+ .virt_size = cpu_to_be32(0xffffffff),
+ .load_base = cpu_to_be32(0xffffffff),
+ .min_rma = cpu_to_be32(256), /* 256MB min RMA */
+ .min_load = cpu_to_be32(0xffffffff), /* full client load */
+ .min_rma_percent = 0, /* min RMA percentage of total RAM */
+ .max_pft_size = 48, /* max log_2(hash table size) */
+ },
+ .vec3_len = VECTOR_LENGTH(sizeof(struct option_vector3)),
/* option vector 3: processor options supported */
- VECTOR_LENGTH(2), /* length */
- 0, /* don't ignore, don't halt */
- OV3_FP | OV3_VMX | OV3_DFP,
+ .vec3 = {
+ .byte1 = 0, /* don't ignore, don't halt */
+ .byte2 = OV3_FP | OV3_VMX | OV3_DFP,
+ },
+ .vec4_len = VECTOR_LENGTH(sizeof(struct option_vector4)),
/* option vector 4: IBM PAPR implementation */
- VECTOR_LENGTH(2), /* length */
- 0, /* don't halt */
- OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
+ .vec4 = {
+ .byte1 = 0, /* don't halt */
+ .min_vp_cap = OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
+ },
+ .vec5_len = VECTOR_LENGTH(sizeof(struct option_vector5)),
/* option vector 5: PAPR/OF options */
- VECTOR_LENGTH(21), /* length */
- 0, /* don't ignore, don't halt */
- OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
- OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
+ .vec5 = {
+ .byte1 = 0, /* don't ignore, don't halt */
+ .byte2 = OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
+ OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
#ifdef CONFIG_PCI_MSI
- /* PCIe/MSI support. Without MSI full PCIe is not supported */
- OV5_FEAT(OV5_MSI),
+ /* PCIe/MSI support. Without MSI full PCIe is not supported */
+ OV5_FEAT(OV5_MSI),
#else
- 0,
+ 0,
#endif
- 0,
+ .byte3 = 0,
+ .cmo =
#ifdef CONFIG_PPC_SMLPAR
- OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO),
+ OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO),
#else
- 0,
+ 0,
#endif
- OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
- 0,
- 0,
- 0,
- /* WARNING: The offset of the "number of cores" field below
- * must match by the macro below. Update the definition if
- * the structure layout changes.
- */
-#define IBM_ARCH_VEC_NRCORES_OFFSET 133
- W(NR_CPUS), /* number of cores supported */
- 0,
- 0,
- 0,
- 0,
- OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) |
- OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */
- 0, /* Byte 18 */
- 0, /* Byte 19 */
- 0, /* Byte 20 */
- OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */
+ .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
+ .bin_opts = 0,
+ .micro_checkpoint = 0,
+ .reserved0 = 0,
+ .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */
+ .papr_level = 0,
+ .reserved1 = 0,
+ .platform_facilities = OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | OV5_FEAT(OV5_PFO_HW_842),
+ .reserved2 = 0,
+ .reserved3 = 0,
+ .subprocessors = 1,
+ },
/* option vector 6: IBM PAPR hints */
- VECTOR_LENGTH(3), /* length */
- 0,
- 0,
- OV6_LINUX,
+ .vec6_len = VECTOR_LENGTH(sizeof(struct option_vector6)),
+ .vec6 = {
+ .reserved = 0,
+ .secondary_pteg = 0,
+ .os_name = OV6_LINUX,
+ },
};
/* Old method - ELF header with PT_NOTE sections only works on BE */
@@ -873,7 +980,6 @@ static void __init prom_send_capabilities(void)
ihandle root;
prom_arg_t ret;
u32 cores;
- unsigned char *ptcores;
root = call_prom("open", 1, 1, ADDR("/"));
if (root != 0) {
@@ -884,37 +990,18 @@ static void __init prom_send_capabilities(void)
* divide NR_CPUS.
*/
- /* The core value may start at an odd address. If such a word
- * access is made at a cache line boundary, this leads to an
- * exception which may not be handled at this time.
- * Forcing a per byte access to avoid exception.
- */
- ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET];
- cores = 0;
- cores |= ptcores[0] << 24;
- cores |= ptcores[1] << 16;
- cores |= ptcores[2] << 8;
- cores |= ptcores[3];
- if (cores != NR_CPUS) {
- prom_printf("WARNING ! "
- "ibm_architecture_vec structure inconsistent: %lu!\n",
- cores);
- } else {
- cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
- prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
- cores, NR_CPUS);
- ptcores[0] = (cores >> 24) & 0xff;
- ptcores[1] = (cores >> 16) & 0xff;
- ptcores[2] = (cores >> 8) & 0xff;
- ptcores[3] = cores & 0xff;
- }
+ cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
+ prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
+ cores, NR_CPUS);
+
+ ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores);
/* try calling the ibm,client-architecture-support method */
prom_printf("Calling ibm,client-architecture-support...");
if (call_prom_ret("call-method", 3, 2, &ret,
ADDR("ibm,client-architecture-support"),
root,
- ADDR(ibm_architecture_vec)) == 0) {
+ ADDR(&ibm_architecture_vec)) == 0) {
/* the call exists... */
if (ret)
prom_printf("\nWARNING: ibm,client-architecture"
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 270ee30..f516ac5 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -915,7 +915,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.context.pte_frag = NULL;
#endif
#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_init(&init_mm.context);
+ mm_iommu_init(&init_mm);
#endif
irqstack_early_init();
exc_lvl_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7ac8e6e..c3e1290 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -346,7 +346,7 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
static bool use_spinloop(void)
{
if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
@@ -391,7 +391,7 @@ void smp_release_cpus(void)
DBG(" <- smp_release_cpus()\n");
}
-#endif /* CONFIG_SMP || CONFIG_KEXEC */
+#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
/*
* Initialize some remaining members of the ppc64_caches and systemcfg
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9c6f3fd..893bd7f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -193,7 +193,7 @@ int smp_request_message_ipi(int virq, int msg)
if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
return -EINVAL;
}
-#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE)
if (msg == PPC_MSG_DEBUGGER_BREAK) {
return 1;
}
@@ -325,7 +325,7 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
void smp_send_debugger_break(void)
{
int cpu;
@@ -340,7 +340,7 @@ void smp_send_debugger_break(void)
}
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
crash_ipi_function_ptr = crash_ipi_callback;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 91d278c..4239aaf 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -64,8 +64,9 @@
#include <asm/asm-prototypes.h>
#include <asm/hmi.h>
#include <sysdev/fsl_pci.h>
+#include <asm/kprobes.h>
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
@@ -122,9 +123,6 @@ static unsigned long oops_begin(struct pt_regs *regs)
int cpu;
unsigned long flags;
- if (debugger(regs))
- return 1;
-
oops_enter();
/* racy, but better than risking deadlock. */
@@ -150,14 +148,15 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
int signr)
{
bust_spinlocks(0);
- die_owner = -1;
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
die_nest_count--;
oops_exit();
printk("\n");
- if (!die_nest_count)
+ if (!die_nest_count) {
/* Nest count reaches zero, release the lock. */
+ die_owner = -1;
arch_spin_unlock(&die_lock);
+ }
raw_local_irq_restore(flags);
crash_fadump(regs, "die oops");
@@ -227,8 +226,12 @@ NOKPROBE_SYMBOL(__die);
void die(const char *str, struct pt_regs *regs, long err)
{
- unsigned long flags = oops_begin(regs);
+ unsigned long flags;
+ if (debugger(regs))
+ return;
+
+ flags = oops_begin(regs);
if (__die(str, regs, err))
err = 0;
oops_end(flags, regs, err);
@@ -824,6 +827,9 @@ void single_step_exception(struct pt_regs *regs)
clear_single_step(regs);
+ if (kprobe_post_handler(regs))
+ return;
+
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
goto bail;
@@ -1177,6 +1183,9 @@ void program_check_exception(struct pt_regs *regs)
if (debugger_bpt(regs))
goto bail;
+ if (kprobe_handler(regs))
+ goto bail;
+
/* trap exception */
if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
@@ -1510,7 +1519,8 @@ void facility_unavailable_exception(struct pt_regs *regs)
return;
}
- if ((status < ARRAY_SIZE(facility_strings)) &&
+ if ((hv || status >= 2) &&
+ (status < ARRAY_SIZE(facility_strings)) &&
facility_strings[status])
facility = facility_strings[status];
@@ -1518,9 +1528,8 @@ void facility_unavailable_exception(struct pt_regs *regs)
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
- pr_err_ratelimited(
- "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
- hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
+ pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
+ hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
out:
if (user_mode(regs)) {
@@ -1745,6 +1754,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
return;
}
+ if (kprobe_post_handler(regs))
+ return;
+
if (notify_die(DIE_SSTEP, "block_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP) {
return;
@@ -1759,6 +1771,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
/* Clear the instruction completion event */
mtspr(SPRN_DBSR, DBSR_IC);
+ if (kprobe_post_handler(regs))
+ return;
+
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP) {
return;
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index b64287c..9c78a9c 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -15,6 +15,7 @@
#include <asm/sstep.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
+#include <asm/cpu_has_feature.h>
#include <asm/cputable.h>
extern char system_call_common[];
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3f4d338..7414034 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -43,5 +43,5 @@
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
-obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o \
- dump_hashpagetable.o
+obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
+obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 362954f..aaa7ec6 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -134,6 +134,9 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
return 1;
}
+ /* Bad address */
+ if (!vsid)
+ return 1;
vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey;
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index d242bc7..49abaf4 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -159,6 +159,7 @@ static const struct flag_info flag_array[] = {
.set = "no cache",
.clear = " ",
}, {
+#ifdef CONFIG_PPC_BOOK3S_64
.mask = H_PAGE_BUSY,
.val = H_PAGE_BUSY,
.set = "busy",
@@ -183,6 +184,7 @@ static const struct flag_info flag_array[] = {
.val = H_PAGE_F_SECOND,
.set = "f_second",
}, {
+#endif
.mask = _PAGE_SPECIAL,
.val = _PAGE_SPECIAL,
.set = "special",
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 73932f4..6fd30ac 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -391,6 +391,20 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (is_exec) {
/*
+ * An execution fault + no execute ?
+ *
+ * On CPUs that don't have CPU_FTR_COHERENT_ICACHE we
+ * deliberately create NX mappings, and use the fault to do the
+ * cache flush. This is usually handled in hash_page_do_lazy_icache()
+ * but we could end up here if that races with a concurrent PTE
+ * update. In that case we need to fall through here to the VMA
+ * check below.
+ */
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
+ (regs->msr & SRR1_ISI_N_OR_G))
+ goto bad_area;
+
+ /*
* Allow execution from readable areas if the MMU does not
* provide separate controls over reading and executing.
*
@@ -404,6 +418,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
!(vma->vm_flags & (VM_READ | VM_WRITE))))
goto bad_area;
+
#ifdef CONFIG_PPC_STD_MMU
/*
* protfault should only happen due to us
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 9d9b3ef..cc33260 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -223,13 +223,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
return -1;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
i, hpte_v, hpte_r);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
+ hpte_v = hpte_old_to_new_v(hpte_v);
+ }
+
hptep->r = cpu_to_be64(hpte_r);
/* Guarantee the second dword is visible before the valid bit */
eieio();
@@ -297,6 +302,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
vpn, want_v & HPTE_V_AVPN, slot, newpp);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -311,6 +318,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
native_lock_hpte(hptep);
/* recheck with locks held */
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))) {
ret = -1;
@@ -352,6 +361,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + slot;
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
/* HPTE matches */
@@ -411,6 +422,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
@@ -469,6 +482,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/* Even if we miss, we need to invalidate the TLB */
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
@@ -506,6 +521,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
/* Look at the 8 bit LP value */
unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
+ hpte_r = hpte_new_to_old_r(hpte_r);
+ }
if (!(hpte_v & HPTE_V_LARGE)) {
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
@@ -514,11 +533,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
- else
- *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
-
+ *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
shift = mmu_psize_defs[size].shift;
avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
@@ -641,6 +656,9 @@ static void native_flush_hash_range(unsigned long number, int local)
want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v,
+ be64_to_cpu(hptep->r));
if (!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 44d3c3a..b9a062f 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -792,37 +792,17 @@ static void update_hid_for_hash(void)
static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size)
{
- unsigned long ps_field;
- unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+ mmu_partition_table_init();
/*
- * slb llp encoding for the page size used in VPM real mode.
- * We can ignore that for lpid 0
+ * PS field (VRMA page size) is not used for LPID 0, hence set to 0.
+ * For now, UPRT is 0 and we have no segment table.
*/
- ps_field = 0;
htab_size = __ilog2(htab_size) - 18;
-
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
- partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
- MEMBLOCK_ALLOC_ANYWHERE));
-
- /* Initialize the Partition Table with no entries */
- memset((void *)partition_tb, 0, patb_size);
- partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
- /*
- * FIXME!! This should be done via update_partition table
- * For now UPRT is 0 for us.
- */
- partition_tb->patb1 = 0;
+ mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
pr_info("Partition table %p\n", partition_tb);
if (cpu_has_feature(CPU_FTR_POWER9_DD1))
update_hid_for_hash();
- /*
- * update partition table control register,
- * 64 K size.
- */
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
-
}
static void __init htab_initialize(void)
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index b114f8b..73bf6e1 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
mm->context.pte_frag = NULL;
#endif
#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_init(&mm->context);
+ mm_iommu_init(mm);
#endif
return 0;
}
@@ -156,13 +156,11 @@ static inline void destroy_pagetable_page(struct mm_struct *mm)
}
#endif
-
void destroy_context(struct mm_struct *mm)
{
#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_cleanup(&mm->context);
+ WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
-
#ifdef CONFIG_PPC_ICSWX
drop_cop(mm->context.acop, mm);
kfree(mm->context.cop_lockp);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index e0f1c33..104bad0 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -56,7 +56,7 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
}
pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
- current->pid,
+ current ? current->pid : 0,
incr ? '+' : '-',
npages << PAGE_SHIFT,
mm->locked_vm << PAGE_SHIFT,
@@ -66,12 +66,9 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
return ret;
}
-bool mm_iommu_preregistered(void)
+bool mm_iommu_preregistered(struct mm_struct *mm)
{
- if (!current || !current->mm)
- return false;
-
- return !list_empty(¤t->mm->context.iommu_group_mem_list);
+ return !list_empty(&mm->context.iommu_group_mem_list);
}
EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
@@ -124,19 +121,16 @@ static int mm_iommu_move_page_from_cma(struct page *page)
return 0;
}
-long mm_iommu_get(unsigned long ua, unsigned long entries,
+long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
long i, j, ret = 0, locked_entries = 0;
struct page *page = NULL;
- if (!current || !current->mm)
- return -ESRCH; /* process exited */
-
mutex_lock(&mem_list_mutex);
- list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list,
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
++mem->used;
@@ -154,7 +148,7 @@ long mm_iommu_get(unsigned long ua, unsigned long entries,
}
- ret = mm_iommu_adjust_locked_vm(current->mm, entries, true);
+ ret = mm_iommu_adjust_locked_vm(mm, entries, true);
if (ret)
goto unlock_exit;
@@ -215,11 +209,11 @@ long mm_iommu_get(unsigned long ua, unsigned long entries,
mem->entries = entries;
*pmem = mem;
- list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list);
+ list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
unlock_exit:
if (locked_entries && ret)
- mm_iommu_adjust_locked_vm(current->mm, locked_entries, false);
+ mm_iommu_adjust_locked_vm(mm, locked_entries, false);
mutex_unlock(&mem_list_mutex);
@@ -264,17 +258,13 @@ static void mm_iommu_free(struct rcu_head *head)
static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
{
list_del_rcu(&mem->next);
- mm_iommu_adjust_locked_vm(current->mm, mem->entries, false);
call_rcu(&mem->rcu, mm_iommu_free);
}
-long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem)
+long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
- if (!current || !current->mm)
- return -ESRCH; /* process exited */
-
mutex_lock(&mem_list_mutex);
if (mem->used == 0) {
@@ -297,6 +287,8 @@ long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem)
/* @mapped became 0 so now mappings are disabled, release the region */
mm_iommu_release(mem);
+ mm_iommu_adjust_locked_vm(mm, mem->entries, false);
+
unlock_exit:
mutex_unlock(&mem_list_mutex);
@@ -304,14 +296,12 @@ long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem)
}
EXPORT_SYMBOL_GPL(mm_iommu_put);
-struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
- unsigned long size)
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+ unsigned long ua, unsigned long size)
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
- list_for_each_entry_rcu(mem,
- ¤t->mm->context.iommu_group_mem_list,
- next) {
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if ((mem->ua <= ua) &&
(ua + size <= mem->ua +
(mem->entries << PAGE_SHIFT))) {
@@ -324,14 +314,12 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
}
EXPORT_SYMBOL_GPL(mm_iommu_lookup);
-struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
- unsigned long entries)
+struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries)
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
- list_for_each_entry_rcu(mem,
- ¤t->mm->context.iommu_group_mem_list,
- next) {
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
ret = mem;
break;
@@ -373,17 +361,7 @@ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
}
EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
-void mm_iommu_init(mm_context_t *ctx)
+void mm_iommu_init(struct mm_struct *mm)
{
- INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list);
-}
-
-void mm_iommu_cleanup(mm_context_t *ctx)
-{
- struct mm_iommu_table_group_mem_t *mem, *tmp;
-
- list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) {
- list_del_rcu(&mem->next);
- mm_iommu_do_free(mem);
- }
+ INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
}
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index f4f437c..ebf9782 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -35,7 +35,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
#endif
changed = !pmd_same(*(pmdp), entry);
if (changed) {
- __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
+ __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp),
+ pmd_pte(entry), address);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
return changed;
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index e93e384..623a0dc 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -177,23 +177,15 @@ static void __init radix_init_pgtable(void)
static void __init radix_init_partition_table(void)
{
- unsigned long rts_field;
+ unsigned long rts_field, dw0;
+ mmu_partition_table_init();
rts_field = radix__get_tree_size();
+ dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
+ mmu_partition_table_set_entry(0, dw0, 0);
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
- partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
- partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
- RADIX_PGD_INDEX_SIZE | PATB_HR);
pr_info("Initializing Radix MMU\n");
pr_info("Partition table %p\n", partition_tb);
-
- memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- /*
- * update partition table control register,
- * 64 K size.
- */
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
}
void __init radix_init_native(void)
@@ -320,6 +312,38 @@ static void update_hid_for_radix(void)
cpu_relax();
}
+static void radix_init_amor(void)
+{
+ /*
+ * In HV mode, we init AMOR (Authority Mask Override Register) so that
+ * the hypervisor and guest can setup IAMR (Instruction Authority Mask
+ * Register), enable key 0 and set it to 1.
+ *
+ * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
+ */
+ mtspr(SPRN_AMOR, (3ul << 62));
+}
+
+static void radix_init_iamr(void)
+{
+ unsigned long iamr;
+
+ /*
+ * The IAMR should set to 0 on DD1.
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ iamr = 0;
+ else
+ iamr = (1ul << 62);
+
+ /*
+ * Radix always uses key0 of the IAMR to determine if an access is
+ * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
+ * fetch.
+ */
+ mtspr(SPRN_IAMR, iamr);
+}
+
void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
@@ -376,8 +400,12 @@ void __init radix__early_init_mmu(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
+ radix_init_amor();
}
+ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+
+ radix_init_iamr();
radix_init_pgtable();
}
@@ -393,7 +421,9 @@ void radix__early_init_mmu_secondary(void)
mtspr(SPRN_PTCR,
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ radix_init_amor();
}
+ radix_init_iamr();
}
void radix__mmu_cleanup_all(void)
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 911fdfb..cb39c8b 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -224,7 +224,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
if (changed) {
if (!is_vm_hugetlb_page(vma))
assert_pte_locked(vma->vm_mm, address);
- __ptep_set_access_flags(vma->vm_mm, ptep, entry);
+ __ptep_set_access_flags(vma->vm_mm, ptep, entry, address);
flush_tlb_page(vma, address);
}
return changed;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f5e8d4e..8bca7f5 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
}
}
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void __init mmu_partition_table_init(void)
+{
+ unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+
+ BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
+ partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+ MEMBLOCK_ALLOC_ANYWHERE));
+
+ /* Initialize the Partition Table with no entries */
+ memset((void *)partition_tb, 0, patb_size);
+
+ /*
+ * update partition table control register,
+ * 64 K size.
+ */
+ mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+}
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1)
+{
+ partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+ partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+
+ /* Global flush of TLBs and partition table caches for this lpid */
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index bda8c43..2822a82 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -424,3 +424,21 @@ void radix__flush_tlb_all(void)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+
+void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
+ unsigned long address)
+{
+ /*
+ * We track page size in pte only for DD1, So we can
+ * call this only on DD1.
+ */
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ VM_WARN_ON(1);
+ return;
+ }
+
+ if (old_pte & _PAGE_LARGE)
+ radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
+ else
+ radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
+}
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 6143c99..50e598c 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -12,6 +12,40 @@
*/
#include "isa207-common.h"
+PMU_FORMAT_ATTR(event, "config:0-49");
+PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
+PMU_FORMAT_ATTR(mark, "config:8");
+PMU_FORMAT_ATTR(combine, "config:11");
+PMU_FORMAT_ATTR(unit, "config:12-15");
+PMU_FORMAT_ATTR(pmc, "config:16-19");
+PMU_FORMAT_ATTR(cache_sel, "config:20-23");
+PMU_FORMAT_ATTR(sample_mode, "config:24-28");
+PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
+PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
+PMU_FORMAT_ATTR(thresh_start, "config:36-39");
+PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
+
+struct attribute *isa207_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_pmcxsel.attr,
+ &format_attr_mark.attr,
+ &format_attr_combine.attr,
+ &format_attr_unit.attr,
+ &format_attr_pmc.attr,
+ &format_attr_cache_sel.attr,
+ &format_attr_sample_mode.attr,
+ &format_attr_thresh_sel.attr,
+ &format_attr_thresh_stop.attr,
+ &format_attr_thresh_start.attr,
+ &format_attr_thresh_cmp.attr,
+ NULL,
+};
+
+struct attribute_group isa207_pmu_format_group = {
+ .name = "format",
+ .attrs = isa207_pmu_format_attr,
+};
+
static inline bool event_is_fab_match(u64 event)
{
/* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */
@@ -21,6 +55,48 @@ static inline bool event_is_fab_match(u64 event)
return (event == 0x30056 || event == 0x4f052);
}
+static bool is_event_valid(u64 event)
+{
+ u64 valid_mask = EVENT_VALID_MASK;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ valid_mask = p9_EVENT_VALID_MASK;
+
+ return !(event & ~valid_mask);
+}
+
+static u64 mmcra_sdar_mode(u64 event)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+
+ return MMCRA_SDAR_MODE_TLB;
+}
+
+static u64 thresh_cmp_val(u64 value)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ return value << p9_MMCRA_THR_CMP_SHIFT;
+
+ return value << MMCRA_THR_CMP_SHIFT;
+}
+
+static unsigned long combine_from_event(u64 event)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ return p9_EVENT_COMBINE(event);
+
+ return EVENT_COMBINE(event);
+}
+
+static unsigned long combine_shift(unsigned long pmc)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ return p9_MMCR1_COMBINE_SHIFT(pmc);
+
+ return MMCR1_COMBINE_SHIFT(pmc);
+}
+
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
{
unsigned int unit, pmc, cache, ebb;
@@ -28,7 +104,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
mask = value = 0;
- if (event & ~EVENT_VALID_MASK)
+ if (!is_event_valid(event))
return -1;
pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
@@ -155,15 +231,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
pmc_inuse |= 1 << pmc;
}
- /* In continuous sampling mode, update SDAR on TLB miss */
- mmcra = MMCRA_SDAR_MODE_TLB;
- mmcr1 = mmcr2 = 0;
+ mmcra = mmcr1 = mmcr2 = 0;
/* Second pass: assign PMCs, set all MMCR1 fields */
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
- combine = (event[i] >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK;
+ combine = combine_from_event(event[i]);
psel = event[i] & EVENT_PSEL_MASK;
if (!pmc) {
@@ -177,10 +251,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
if (pmc <= 4) {
mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc);
- mmcr1 |= combine << MMCR1_COMBINE_SHIFT(pmc);
+ mmcr1 |= combine << combine_shift(pmc);
mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc);
}
+ /* In continuous sampling mode, update SDAR on TLB miss */
+ mmcra |= mmcra_sdar_mode(event[i]);
+
if (event[i] & EVENT_IS_L1) {
cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT;
@@ -211,7 +288,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
mmcra |= val << MMCRA_THR_SEL_SHIFT;
val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
- mmcra |= val << MMCRA_THR_CMP_SHIFT;
+ mmcra |= thresh_cmp_val(val);
}
if (event[i] & EVENT_WANTS_BHRB) {
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 4d0a4e5..90495f1 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -107,6 +107,7 @@
#define EVENT_UNIT_MASK 0xf
#define EVENT_COMBINE_SHIFT 11 /* Combine bit */
#define EVENT_COMBINE_MASK 0x1
+#define EVENT_COMBINE(v) (((v) >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK)
#define EVENT_MARKED_SHIFT 8 /* Marked bit */
#define EVENT_MARKED_MASK 0x1
#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
@@ -134,6 +135,26 @@
PERF_SAMPLE_BRANCH_KERNEL |\
PERF_SAMPLE_BRANCH_HV)
+/* Contants to support power9 raw encoding format */
+#define p9_EVENT_COMBINE_SHIFT 10 /* Combine bit */
+#define p9_EVENT_COMBINE_MASK 0x3ull
+#define p9_EVENT_COMBINE(v) (((v) >> p9_EVENT_COMBINE_SHIFT) & p9_EVENT_COMBINE_MASK)
+#define p9_SDAR_MODE_SHIFT 50
+#define p9_SDAR_MODE_MASK 0x3ull
+#define p9_SDAR_MODE(v) (((v) >> p9_SDAR_MODE_SHIFT) & p9_SDAR_MODE_MASK)
+
+#define p9_EVENT_VALID_MASK \
+ ((p9_SDAR_MODE_MASK << p9_SDAR_MODE_SHIFT | \
+ (EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
+ (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
+ (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
+ (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
+ (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
+ (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \
+ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
+ EVENT_LINUX_MASK | \
+ EVENT_PSEL_MASK))
+
/*
* Layout of constraint bits:
*
@@ -210,15 +231,22 @@
#define MMCR1_DC_QUAL_SHIFT 47
#define MMCR1_IC_QUAL_SHIFT 46
+/* MMCR1 Combine bits macro for power9 */
+#define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2))
+
/* Bits in MMCRA for PowerISA v2.07 */
#define MMCRA_SAMP_MODE_SHIFT 1
#define MMCRA_SAMP_ELIG_SHIFT 4
#define MMCRA_THR_CTL_SHIFT 8
#define MMCRA_THR_SEL_SHIFT 16
#define MMCRA_THR_CMP_SHIFT 32
-#define MMCRA_SDAR_MODE_TLB (1ull << 42)
+#define MMCRA_SDAR_MODE_SHIFT 42
+#define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT)
#define MMCRA_IFM_SHIFT 30
+/* MMCR1 Threshold Compare bit constant for power9 */
+#define p9_MMCRA_THR_CMP_SHIFT 45
+
/* Bits in MMCR2 for PowerISA v2.07 */
#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9)))
#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9)))
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index ab830d1..d071863 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -30,6 +30,9 @@ enum {
#define POWER8_MMCRA_IFM2 0x0000000080000000UL
#define POWER8_MMCRA_IFM3 0x00000000C0000000UL
+/* PowerISA v2.07 format attribute structure*/
+extern struct attribute_group isa207_pmu_format_group;
+
/* Table of alternatives, sorted by column 0 */
static const unsigned int event_alternatives[][MAX_ALT] = {
{ PM_MRK_ST_CMPL, PM_MRK_ST_CMPL_ALT },
@@ -175,42 +178,8 @@ static struct attribute_group power8_pmu_events_group = {
.attrs = power8_events_attr,
};
-PMU_FORMAT_ATTR(event, "config:0-49");
-PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
-PMU_FORMAT_ATTR(mark, "config:8");
-PMU_FORMAT_ATTR(combine, "config:11");
-PMU_FORMAT_ATTR(unit, "config:12-15");
-PMU_FORMAT_ATTR(pmc, "config:16-19");
-PMU_FORMAT_ATTR(cache_sel, "config:20-23");
-PMU_FORMAT_ATTR(sample_mode, "config:24-28");
-PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
-PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
-PMU_FORMAT_ATTR(thresh_start, "config:36-39");
-PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
-
-static struct attribute *power8_pmu_format_attr[] = {
- &format_attr_event.attr,
- &format_attr_pmcxsel.attr,
- &format_attr_mark.attr,
- &format_attr_combine.attr,
- &format_attr_unit.attr,
- &format_attr_pmc.attr,
- &format_attr_cache_sel.attr,
- &format_attr_sample_mode.attr,
- &format_attr_thresh_sel.attr,
- &format_attr_thresh_stop.attr,
- &format_attr_thresh_start.attr,
- &format_attr_thresh_cmp.attr,
- NULL,
-};
-
-static struct attribute_group power8_pmu_format_group = {
- .name = "format",
- .attrs = power8_pmu_format_attr,
-};
-
static const struct attribute_group *power8_pmu_attr_groups[] = {
- &power8_pmu_format_group,
+ &isa207_pmu_format_group,
&power8_pmu_events_group,
NULL,
};
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 8e9a819..346010e 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -16,6 +16,78 @@
#include "isa207-common.h"
/*
+ * Raw event encoding for Power9:
+ *
+ * 60 56 52 48 44 40 36 32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * | | [ ] [ ] [ thresh_cmp ] [ thresh_ctl ]
+ * | | | | |
+ * | | *- IFM (Linux) | thresh start/stop OR FAB match -*
+ * | *- BHRB (Linux) *sm
+ * *- EBB (Linux)
+ *
+ * 28 24 20 16 12 8 4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ ] [ sample ] [cache] [ pmc ] [unit ] [] m [ pmcxsel ]
+ * | | | | |
+ * | | | | *- mark
+ * | | *- L1/L2/L3 cache_sel |
+ * | | |
+ * | *- sampling mode for marked events *- combine
+ * |
+ * *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit (PMCxUNIT)
+ * MMCR1[24] = pmc1combine[0]
+ * MMCR1[25] = pmc1combine[1]
+ * MMCR1[26] = pmc2combine[0]
+ * MMCR1[27] = pmc2combine[1]
+ * MMCR1[28] = pmc3combine[0]
+ * MMCR1[29] = pmc3combine[1]
+ * MMCR1[30] = pmc4combine[0]
+ * MMCR1[31] = pmc4combine[1]
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ * # PM_MRK_FAB_RSP_MATCH
+ * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ * # PM_MRK_FAB_RSP_MATCH_CYC
+ * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
+ * else
+ * MMCRA[48:55] = thresh_ctl (THRESH START/END)
+ *
+ * if thresh_sel:
+ * MMCRA[45:47] = thresh_sel
+ *
+ * if thresh_cmp:
+ * MMCRA[9:11] = thresh_cmp[0:2]
+ * MMCRA[12:18] = thresh_cmp[3:9]
+ *
+ * if unit == 6 or unit == 7
+ * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL)
+ * else if unit == 8 or unit == 9:
+ * if cache_sel[0] == 0: # L3 bank
+ * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0)
+ * else if cache_sel[0] == 1:
+ * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1)
+ * else if cache_sel[1]: # L1 event
+ * MMCR1[16] = cache_sel[2]
+ * MMCR1[17] = cache_sel[3]
+ *
+ * if mark:
+ * MMCRA[63] = 1 (SAMPLE_ENABLE)
+ * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
+ * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ * MMCRA[32:33] = IFM
+ *
+ * MMCRA[SDAR_MODE] = sm
+ */
+
+/*
* Some power9 event codes.
*/
#define EVENT(_name, _code) _name = _code,
@@ -31,6 +103,9 @@ enum {
#define POWER9_MMCRA_IFM2 0x0000000080000000UL
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
+/* PowerISA v2.07 format attribute structure*/
+extern struct attribute_group isa207_pmu_format_group;
+
GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC);
GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
@@ -90,10 +165,16 @@ static struct attribute_group power9_pmu_events_group = {
.attrs = power9_events_attr,
};
-PMU_FORMAT_ATTR(event, "config:0-49");
+static const struct attribute_group *power9_isa207_pmu_attr_groups[] = {
+ &isa207_pmu_format_group,
+ &power9_pmu_events_group,
+ NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-51");
PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
PMU_FORMAT_ATTR(mark, "config:8");
-PMU_FORMAT_ATTR(combine, "config:11");
+PMU_FORMAT_ATTR(combine, "config:10-11");
PMU_FORMAT_ATTR(unit, "config:12-15");
PMU_FORMAT_ATTR(pmc, "config:16-19");
PMU_FORMAT_ATTR(cache_sel, "config:20-23");
@@ -102,6 +183,7 @@ PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
PMU_FORMAT_ATTR(thresh_start, "config:36-39");
PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
+PMU_FORMAT_ATTR(sdar_mode, "config:50-51");
static struct attribute *power9_pmu_format_attr[] = {
&format_attr_event.attr,
@@ -116,6 +198,7 @@ static struct attribute *power9_pmu_format_attr[] = {
&format_attr_thresh_stop.attr,
&format_attr_thresh_start.attr,
&format_attr_thresh_cmp.attr,
+ &format_attr_sdar_mode.attr,
NULL,
};
@@ -291,6 +374,24 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
#undef C
+static struct power_pmu power9_isa207_pmu = {
+ .name = "POWER9",
+ .n_counter = MAX_PMU_COUNTERS,
+ .add_fields = ISA207_ADD_FIELDS,
+ .test_adder = ISA207_TEST_ADDER,
+ .compute_mmcr = isa207_compute_mmcr,
+ .config_bhrb = power9_config_bhrb,
+ .bhrb_filter_map = power9_bhrb_filter_map,
+ .get_constraint = isa207_get_constraint,
+ .disable_pmc = isa207_disable_pmc,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
+ .n_generic = ARRAY_SIZE(power9_generic_events),
+ .generic_events = power9_generic_events,
+ .cache_events = &power9_cache_events,
+ .attr_groups = power9_isa207_pmu_attr_groups,
+ .bhrb_nr = 32,
+};
+
static struct power_pmu power9_pmu = {
.name = "POWER9",
.n_counter = MAX_PMU_COUNTERS,
@@ -311,14 +412,19 @@ static struct power_pmu power9_pmu = {
static int __init init_power9_pmu(void)
{
- int rc;
+ int rc = 0;
/* Comes from cpu_specs[] */
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9"))
return -ENODEV;
- rc = register_power_pmu(&power9_pmu);
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ rc = register_power_pmu(&power9_isa207_pmu);
+ } else {
+ rc = register_power_pmu(&power9_pmu);
+ }
+
if (rc)
return rc;
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index e3257f2..f8d1410 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -102,18 +102,18 @@
bool
select IBM405_ERR77
select IBM405_ERR51
- select IBM_EMAC_ZMII
+ select IBM_EMAC_ZMII if IBM_EMAC
config 405EX
bool
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
config 405EZ
bool
- select IBM_EMAC_NO_FLOW_CTRL
- select IBM_EMAC_MAL_CLR_ICINTSTAT
- select IBM_EMAC_MAL_COMMON_ERR
+ select IBM_EMAC_NO_FLOW_CTRL if IBM_EMAC
+ select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC
+ select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC
config XILINX_VIRTEX
bool
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 48fc180..8d18669 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -26,7 +26,7 @@
select PCI_MSI
select PPC4xx_MSI
select PPC4xx_PCI_EXPRESS
- select IBM_EMAC_RGMII
+ select IBM_EMAC_RGMII if IBM_EMAC
help
This option enables support for the APM APM821xx Evaluation board.
@@ -125,8 +125,8 @@
select PPC4xx_PCI_EXPRESS
select PCI_MSI
select PPC4xx_MSI
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
help
This option enables support for the AMCC PPC460EX evaluation board.
@@ -138,8 +138,8 @@
select 460EX # Odd since it uses 460GT but the effects are the same
select PCI
select PPC4xx_PCI_EXPRESS
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
help
This option enables support for the AMCC PPC460GT evaluation board.
@@ -164,7 +164,7 @@
select 460SX
select PCI
select PPC4xx_PCI_EXPRESS
- select IBM_EMAC_RGMII
+ select IBM_EMAC_RGMII if IBM_EMAC
help
This option enables support for the AMCC PPC460SX evaluation board.
@@ -213,7 +213,7 @@
select NETDEVICES
select ETHERNET
select NET_VENDOR_IBM
- select IBM_EMAC_EMAC4
+ select IBM_EMAC_EMAC4 if IBM_EMAC
select USB if USB_SUPPORT
select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD
select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD
@@ -290,54 +290,54 @@
bool
select PPC_FPU
select IBM440EP_ERR42
- select IBM_EMAC_ZMII
+ select IBM_EMAC_ZMII if IBM_EMAC
config 440EPX
bool
select PPC_FPU
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
select USB_EHCI_BIG_ENDIAN_MMIO
select USB_EHCI_BIG_ENDIAN_DESC
config 440GRX
bool
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
config 440GP
bool
- select IBM_EMAC_ZMII
+ select IBM_EMAC_ZMII if IBM_EMAC
config 440GX
bool
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII #test only
- select IBM_EMAC_TAH #test only
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC #test only
+ select IBM_EMAC_TAH if IBM_EMAC #test only
config 440SP
bool
config 440SPe
bool
- select IBM_EMAC_EMAC4
+ select IBM_EMAC_EMAC4 if IBM_EMAC
config 460EX
bool
select PPC_FPU
- select IBM_EMAC_EMAC4
- select IBM_EMAC_TAH
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
config 460SX
bool
select PPC_FPU
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
- select IBM_EMAC_TAH
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
config 476FPE
bool
@@ -346,8 +346,8 @@
config APM821xx
bool
select PPC_FPU
- select IBM_EMAC_EMAC4
- select IBM_EMAC_TAH
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
config 476FPE_ERR46
depends on 476FPE
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 1179115..3803b0a 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -220,7 +220,7 @@ define_machine(corenet_generic) {
*
* Likewise, problems have been seen with kexec when coreint is enabled.
*/
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE)
.get_irq = mpic_get_irq,
#else
.get_irq = mpic_get_coreint_irq,
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index fe9f19e..a83a6d2 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -349,13 +349,13 @@ struct smp_ops_t smp_85xx_ops = {
.cpu_disable = generic_cpu_disable,
.cpu_die = generic_cpu_die,
#endif
-#if defined(CONFIG_KEXEC) && !defined(CONFIG_PPC64)
+#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64)
.give_timebase = smp_generic_give_timebase,
.take_timebase = smp_generic_take_timebase,
#endif
};
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
#ifdef CONFIG_PPC32
atomic_t kexec_down_cpus = ATOMIC_INIT(0);
@@ -458,7 +458,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
default_machine_kexec(image);
}
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
static void smp_85xx_basic_setup(int cpu_nr)
{
@@ -512,7 +512,7 @@ void __init mpc85xx_smp_init(void)
#endif
smp_ops = &smp_85xx_ops;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down;
ppc_md.machine_kexec = mpc85xx_smp_machine_kexec;
#endif
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index fbdae83..7e3a2eb 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -168,17 +168,6 @@
well, but enabling it uses about 8KB of memory to keep copies
of the register contents in software.
-config IBMVIO
- depends on PPC_PSERIES
- bool
- default y
-
-config IBMEBUS
- depends on PPC_PSERIES
- bool "Support for GX bus based adapters"
- help
- Bus device driver for GX bus based adapters.
-
config EEH
bool
depends on (PPC_POWERNV || PPC_PSERIES) && PCI
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index d9088f0..a4522f0 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -17,10 +17,10 @@
select PPC_CELL_COMMON
select MPIC
select PPC_IO_WORKAROUNDS
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII #test only
- select IBM_EMAC_TAH #test only
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC #test only
+ select IBM_EMAC_TAH if IBM_EMAC #test only
default n
config PPC_IBM_CELL_BLADE
@@ -46,7 +46,6 @@
default m
depends on PPC_CELL
select SPU_BASE
- select MEMORY_HOTPLUG
help
The SPU file system is used to access Synergistic Processing
Units on machines implementing the Broadband Processor
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index e84d8fb..96c2b8a 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -676,7 +676,7 @@ static ssize_t spu_stat_show(struct device *dev,
static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
struct crash_spu_info {
struct spu *spu;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 2354ea5..6fb5522 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -393,7 +393,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
/* Create PE */
ret = eeh_add_to_parent_pe(edev);
if (ret) {
- pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n",
+ pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%x)\n",
__func__, hose->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
return NULL;
@@ -1097,7 +1097,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
bus = eeh_pe_bus_get(pe);
if (!bus) {
- pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+ pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
return -EIO;
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index aec85e7..73b155f 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -263,7 +263,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
/* Enable the bypass window */
top = roundup_pow_of_two(top);
- dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
+ dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n",
npe->pe_number);
rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
npe->pe_number, npe->pe_number,
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 44d2d84..3aa40f1 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -304,8 +304,11 @@
OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
+OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR);
OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
+OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI);
OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
+OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR);
OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 893d8ea..2822935 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -886,3 +886,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind);
EXPORT_SYMBOL_GPL(opal_leds_set_ind);
/* Export this symbol for PowerNV Operator Panel class driver */
EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
+/* Export this for KVM */
+EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index d4b33dd..b07680c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -83,7 +83,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
#endif /* CONFIG_PCI_IOV*/
- printk("%spci %s: [PE# %.3d] %pV",
+ printk("%spci %s: [PE# %.2x] %pV",
level, pfix, pe->pe_number, &vaf);
va_end(args);
@@ -145,8 +145,8 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
*/
rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
- if (rc != OPAL_SUCCESS)
- pr_warn("%s: Error %lld unfreezing PHB#%d-PE#%d\n",
+ if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
+ pr_warn("%s: Error %lld unfreezing PHB#%x-PE#%x\n",
__func__, rc, phb->hose->global_number, pe_no);
return &phb->ioda.pe_array[pe_no];
@@ -155,13 +155,13 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
{
if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
- pr_warn("%s: Invalid PE %d on PHB#%x\n",
+ pr_warn("%s: Invalid PE %x on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
return;
}
if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
- pr_debug("%s: PE %d was reserved on PHB#%x\n",
+ pr_debug("%s: PE %x was reserved on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
pnv_ioda_init_pe(phb, pe_no);
@@ -229,7 +229,7 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb)
else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
r->end -= (2 * phb->ioda.m64_segsize);
else
- pr_warn(" Cannot strip M64 segment for reserved PE#%d\n",
+ pr_warn(" Cannot strip M64 segment for reserved PE#%x\n",
phb->ioda.reserved_pe_idx);
return 0;
@@ -291,7 +291,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb)
OPAL_M64_WINDOW_TYPE, index, base, 0,
PNV_IODA1_M64_SEGS * segsz);
if (rc != OPAL_SUCCESS) {
- pr_warn(" Error %lld setting M64 PHB#%d-BAR#%d\n",
+ pr_warn(" Error %lld setting M64 PHB#%x-BAR#%d\n",
rc, phb->hose->global_number, index);
goto fail;
}
@@ -300,7 +300,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb)
OPAL_M64_WINDOW_TYPE, index,
OPAL_ENABLE_M64_SPLIT);
if (rc != OPAL_SUCCESS) {
- pr_warn(" Error %lld enabling M64 PHB#%d-BAR#%d\n",
+ pr_warn(" Error %lld enabling M64 PHB#%x-BAR#%d\n",
rc, phb->hose->global_number, index);
goto fail;
}
@@ -316,7 +316,7 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb)
else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
r->end -= (2 * phb->ioda.m64_segsize);
else
- WARN(1, "Wrong reserved PE#%d on PHB#%d\n",
+ WARN(1, "Wrong reserved PE#%x on PHB#%x\n",
phb->ioda.reserved_pe_idx, phb->hose->global_number);
return 0;
@@ -414,7 +414,7 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
pe->pe_number / PNV_IODA1_M64_SEGS,
pe->pe_number % PNV_IODA1_M64_SEGS);
if (rc != OPAL_SUCCESS)
- pr_warn("%s: Error %lld mapping M64 for PHB#%d-PE#%d\n",
+ pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
__func__, rc, phb->hose->global_number,
pe->pe_number);
}
@@ -941,14 +941,14 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
pe->mve_number = pe->pe_number;
rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number);
if (rc != OPAL_SUCCESS) {
- pe_err(pe, "OPAL error %ld setting up MVE %d\n",
+ pe_err(pe, "OPAL error %ld setting up MVE %x\n",
rc, pe->mve_number);
pe->mve_number = -1;
} else {
rc = opal_pci_set_mve_enable(phb->opal_id,
pe->mve_number, OPAL_ENABLE_MVE);
if (rc) {
- pe_err(pe, "OPAL error %ld enabling MVE %d\n",
+ pe_err(pe, "OPAL error %ld enabling MVE %x\n",
rc, pe->mve_number);
pe->mve_number = -1;
}
@@ -1159,10 +1159,10 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe->rid = bus->busn_res.start << 8;
if (all)
- pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
+ pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n",
bus->busn_res.start, bus->busn_res.end, pe->pe_number);
else
- pe_info(pe, "Secondary bus %d associated with PE#%d\n",
+ pe_info(pe, "Secondary bus %d associated with PE#%x\n",
bus->busn_res.start, pe->pe_number);
if (pnv_ioda_configure_pe(phb, pe)) {
@@ -1213,7 +1213,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
* peer NPU.
*/
dev_info(&npu_pdev->dev,
- "Associating to existing PE %d\n", pe_num);
+ "Associating to existing PE %x\n", pe_num);
pci_dev_get(npu_pdev);
npu_pdn = pci_get_pdn(npu_pdev);
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
@@ -1539,7 +1539,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
pci_iov_virtfn_devfn(pdev, vf_index);
- pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
+ pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
hose->global_number, pdev->bus->number,
PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
@@ -2844,7 +2844,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
pnv_set_msi_irq_chip(phb, virq);
pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
- " address=%x_%08x data=%x PE# %d\n",
+ " address=%x_%08x data=%x PE# %x\n",
pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
msg->address_hi, msg->address_lo, data, pe->pe_number);
@@ -2993,7 +2993,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
rc = opal_pci_map_pe_mmio_window(phb->opal_id,
pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
if (rc != OPAL_SUCCESS) {
- pr_err("%s: Error %lld mapping IO segment#%d to PE#%d\n",
+ pr_err("%s: Error %lld mapping IO segment#%d to PE#%x\n",
__func__, rc, index, pe->pe_number);
break;
}
@@ -3017,7 +3017,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
rc = opal_pci_map_pe_mmio_window(phb->opal_id,
pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
if (rc != OPAL_SUCCESS) {
- pr_err("%s: Error %lld mapping M32 segment#%d to PE#%d",
+ pr_err("%s: Error %lld mapping M32 segment#%d to PE#%x",
__func__, rc, index, pe->pe_number);
break;
}
@@ -3281,7 +3281,7 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
pnv_pci_ioda2_setup_dma_pe(phb, pe);
break;
default:
- pr_warn("%s: No DMA for PHB#%d (type %d)\n",
+ pr_warn("%s: No DMA for PHB#%x (type %d)\n",
__func__, phb->hose->global_number, phb->type);
}
}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index db7b802..c6d554f 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -234,7 +234,7 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
int i;
data = (struct OpalIoP7IOCPhbErrorData *)common;
- pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
+ pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n",
hose->global_number, be32_to_cpu(common->version));
if (data->brdgCtl)
@@ -326,7 +326,7 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
int i;
data = (struct OpalIoPhb3ErrorData*)common;
- pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
+ pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n",
hose->global_number, be32_to_cpu(common->version));
if (data->brdgCtl)
pr_info("brdgCtl: %08x\n",
@@ -516,7 +516,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
}
}
- pr_devel(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
+ pr_devel(" -> EEH check, bdfn=%04x PE#%x fstate=%x\n",
(pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
/* Clear the frozen state if applicable */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index efe8b6b..d50c7d9 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -174,7 +174,7 @@ static void pnv_shutdown(void)
opal_shutdown();
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
static void pnv_kexec_wait_secondaries_down(void)
{
int my_cpu, i, notified = -1;
@@ -245,7 +245,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
}
}
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
static unsigned long pnv_memory_block_size(void)
@@ -311,7 +311,7 @@ define_machine(powernv) {
.machine_shutdown = pnv_shutdown,
.power_save = NULL,
.calibrate_decr = generic_calibrate_decr,
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.kexec_cpu_down = pnv_kexec_cpu_down,
#endif
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index cb3c503..cc2b281 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
vflags &= ~HPTE_V_SECONDARY;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
spin_lock_irqsave(&ps3_htab_lock, flags);
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 3a487e7..6244bc8 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -250,7 +250,7 @@ static int __init ps3_probe(void)
return 1;
}
-#if defined(CONFIG_KEXEC)
+#if defined(CONFIG_KEXEC_CORE)
static void ps3_kexec_cpu_down(int crash_shutdown, int secondary)
{
int cpu = smp_processor_id();
@@ -276,7 +276,7 @@ define_machine(ps3) {
.progress = ps3_progress,
.restart = ps3_restart,
.halt = ps3_halt,
-#if defined(CONFIG_KEXEC)
+#if defined(CONFIG_KEXEC_CORE)
.kexec_cpu_down = ps3_kexec_cpu_down,
#endif
};
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index bec90fb..e1c280a 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -127,3 +127,14 @@
systems. 24x7 is available on Power 8 systems.
If unsure, select Y.
+
+config IBMVIO
+ depends on PPC_PSERIES
+ bool
+ default y
+
+config IBMEBUS
+ depends on PPC_PSERIES && !CPU_LITTLE_ENDIAN
+ bool "Support for GX bus based adapters"
+ help
+ Bus device driver for GX bus based adapters.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index fedc2ccf0..8f4ba08 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -8,7 +8,7 @@
pci.o pci_dlpar.o eeh_pseries.o msi.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCANLOG) += scanlog.o
-obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_KEXEC_CORE) += kexec.o
obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
@@ -21,6 +21,8 @@
obj-$(CONFIG_DTL) += dtl.o
obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
obj-$(CONFIG_LPARCFG) += lparcfg.o
+obj-$(CONFIG_IBMVIO) += vio.o
+obj-$(CONFIG_IBMEBUS) += ibmebus.o
ifeq ($(CONFIG_PPC_PSERIES),y)
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 423e450..76caa4a 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -418,84 +418,136 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
}
}
+static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+ char *arg;
+
+ arg = strsep(cmd, " ");
+ if (!arg)
+ return -EINVAL;
+
+ if (sysfs_streq(arg, "memory")) {
+ hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
+ } else if (sysfs_streq(arg, "cpu")) {
+ hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU;
+ } else {
+ pr_err("Invalid resource specified.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dlpar_parse_action(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+ char *arg;
+
+ arg = strsep(cmd, " ");
+ if (!arg)
+ return -EINVAL;
+
+ if (sysfs_streq(arg, "add")) {
+ hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD;
+ } else if (sysfs_streq(arg, "remove")) {
+ hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE;
+ } else {
+ pr_err("Invalid action specified.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+ char *arg;
+ u32 count, index;
+
+ arg = strsep(cmd, " ");
+ if (!arg)
+ return -EINVAL;
+
+ if (sysfs_streq(arg, "index")) {
+ hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+ arg = strsep(cmd, " ");
+ if (!arg) {
+ pr_err("No DRC Index specified.\n");
+ return -EINVAL;
+ }
+
+ if (kstrtou32(arg, 0, &index)) {
+ pr_err("Invalid DRC Index specified.\n");
+ return -EINVAL;
+ }
+
+ hp_elog->_drc_u.drc_index = cpu_to_be32(index);
+ } else if (sysfs_streq(arg, "count")) {
+ hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT;
+ arg = strsep(cmd, " ");
+ if (!arg) {
+ pr_err("No DRC count specified.\n");
+ return -EINVAL;
+ }
+
+ if (kstrtou32(arg, 0, &count)) {
+ pr_err("Invalid DRC count specified.\n");
+ return -EINVAL;
+ }
+
+ hp_elog->_drc_u.drc_count = cpu_to_be32(count);
+ } else {
+ pr_err("Invalid id_type specified.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
const char *buf, size_t count)
{
struct pseries_hp_errorlog *hp_elog;
struct completion hotplug_done;
- const char *arg;
+ char *argbuf;
+ char *args;
int rc;
+ args = argbuf = kstrdup(buf, GFP_KERNEL);
hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
- if (!hp_elog) {
- rc = -ENOMEM;
- goto dlpar_store_out;
+ if (!hp_elog || !argbuf) {
+ pr_info("Could not allocate resources for DLPAR operation\n");
+ kfree(argbuf);
+ kfree(hp_elog);
+ return -ENOMEM;
}
- /* Parse out the request from the user, this will be in the form
+ /*
+ * Parse out the request from the user, this will be in the form:
* <resource> <action> <id_type> <id>
*/
- arg = buf;
- if (!strncmp(arg, "memory", 6)) {
- hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
- arg += strlen("memory ");
- } else if (!strncmp(arg, "cpu", 3)) {
- hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU;
- arg += strlen("cpu ");
- } else {
- pr_err("Invalid resource specified: \"%s\"\n", buf);
- rc = -EINVAL;
+ rc = dlpar_parse_resource(&args, hp_elog);
+ if (rc)
goto dlpar_store_out;
- }
- if (!strncmp(arg, "add", 3)) {
- hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD;
- arg += strlen("add ");
- } else if (!strncmp(arg, "remove", 6)) {
- hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE;
- arg += strlen("remove ");
- } else {
- pr_err("Invalid action specified: \"%s\"\n", buf);
- rc = -EINVAL;
+ rc = dlpar_parse_action(&args, hp_elog);
+ if (rc)
goto dlpar_store_out;
- }
- if (!strncmp(arg, "index", 5)) {
- u32 index;
-
- hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
- arg += strlen("index ");
- if (kstrtou32(arg, 0, &index)) {
- rc = -EINVAL;
- pr_err("Invalid drc_index specified: \"%s\"\n", buf);
- goto dlpar_store_out;
- }
-
- hp_elog->_drc_u.drc_index = cpu_to_be32(index);
- } else if (!strncmp(arg, "count", 5)) {
- u32 count;
-
- hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT;
- arg += strlen("count ");
- if (kstrtou32(arg, 0, &count)) {
- rc = -EINVAL;
- pr_err("Invalid count specified: \"%s\"\n", buf);
- goto dlpar_store_out;
- }
-
- hp_elog->_drc_u.drc_count = cpu_to_be32(count);
- } else {
- pr_err("Invalid id_type specified: \"%s\"\n", buf);
- rc = -EINVAL;
+ rc = dlpar_parse_id_type(&args, hp_elog);
+ if (rc)
goto dlpar_store_out;
- }
init_completion(&hotplug_done);
queue_hotplug_event(hp_elog, &hotplug_done, &rc);
wait_for_completion(&hotplug_done);
dlpar_store_out:
+ kfree(argbuf);
kfree(hp_elog);
+
+ if (rc)
+ pr_err("Could not handle DLPAR request \"%s\"\n", buf);
+
return rc ? rc : count;
}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 1c428f0..1eef46d 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -270,7 +270,7 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
eeh_add_flag(EEH_ENABLED);
eeh_add_to_parent_pe(edev);
- pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%d-PE#%x\n",
+ pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n",
__func__, pdn->busno, PCI_SLOT(pdn->devfn),
PCI_FUNC(pdn->devfn), pe.phb->global_number,
pe.addr);
@@ -371,7 +371,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
pe->config_addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid), 0);
if (ret) {
- pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n",
+ pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->config_addr);
return 0;
}
@@ -384,7 +384,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
pe->config_addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid), 0);
if (ret) {
- pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n",
+ pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->config_addr);
return 0;
}
@@ -653,7 +653,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
rtas_busy_delay(ret);
}
- pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
+ pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
__func__, pe->phb->global_number, pe->addr, ret);
return ret;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 76ec104..2617f9f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -472,12 +472,15 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
/* Validate that there are enough LMBs to satisfy the request */
for (i = 0; i < num_lmbs; i++) {
- if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+ if (lmb_is_removable(&lmbs[i]))
lmbs_available++;
}
- if (lmbs_available < lmbs_to_remove)
+ if (lmbs_available < lmbs_to_remove) {
+ pr_info("Not enough LMBs available (%d of %d) to satisfy request\n",
+ lmbs_available, lmbs_to_remove);
return -EINVAL;
+ }
for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) {
rc = dlpar_remove_lmb(&lmbs[i]);
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
similarity index 66%
rename from arch/powerpc/kernel/ibmebus.c
rename to arch/powerpc/platforms/pseries/ibmebus.c
index 35f5244..614c285 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -423,303 +423,6 @@ static struct device_attribute ibmebus_bus_device_attrs[] = {
__ATTR_NULL
};
-#ifdef CONFIG_PM_SLEEP
-static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
-{
- struct platform_device *of_dev = to_platform_device(dev);
- struct platform_driver *drv = to_platform_driver(dev->driver);
- int ret = 0;
-
- if (dev->driver && drv->suspend)
- ret = drv->suspend(of_dev, mesg);
- return ret;
-}
-
-static int ibmebus_bus_legacy_resume(struct device *dev)
-{
- struct platform_device *of_dev = to_platform_device(dev);
- struct platform_driver *drv = to_platform_driver(dev->driver);
- int ret = 0;
-
- if (dev->driver && drv->resume)
- ret = drv->resume(of_dev);
- return ret;
-}
-
-static int ibmebus_bus_pm_prepare(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (drv && drv->pm && drv->pm->prepare)
- ret = drv->pm->prepare(dev);
-
- return ret;
-}
-
-static void ibmebus_bus_pm_complete(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
-
- if (drv && drv->pm && drv->pm->complete)
- drv->pm->complete(dev);
-}
-
-#ifdef CONFIG_SUSPEND
-
-static int ibmebus_bus_pm_suspend(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->suspend)
- ret = drv->pm->suspend(dev);
- } else {
- ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_suspend_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->suspend_noirq)
- ret = drv->pm->suspend_noirq(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_resume(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->resume)
- ret = drv->pm->resume(dev);
- } else {
- ret = ibmebus_bus_legacy_resume(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_resume_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->resume_noirq)
- ret = drv->pm->resume_noirq(dev);
- }
-
- return ret;
-}
-
-#else /* !CONFIG_SUSPEND */
-
-#define ibmebus_bus_pm_suspend NULL
-#define ibmebus_bus_pm_resume NULL
-#define ibmebus_bus_pm_suspend_noirq NULL
-#define ibmebus_bus_pm_resume_noirq NULL
-
-#endif /* !CONFIG_SUSPEND */
-
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-
-static int ibmebus_bus_pm_freeze(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->freeze)
- ret = drv->pm->freeze(dev);
- } else {
- ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_freeze_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->freeze_noirq)
- ret = drv->pm->freeze_noirq(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_thaw(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->thaw)
- ret = drv->pm->thaw(dev);
- } else {
- ret = ibmebus_bus_legacy_resume(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_thaw_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->thaw_noirq)
- ret = drv->pm->thaw_noirq(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_poweroff(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->poweroff)
- ret = drv->pm->poweroff(dev);
- } else {
- ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_poweroff_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->poweroff_noirq)
- ret = drv->pm->poweroff_noirq(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_restore(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->restore)
- ret = drv->pm->restore(dev);
- } else {
- ret = ibmebus_bus_legacy_resume(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_restore_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->restore_noirq)
- ret = drv->pm->restore_noirq(dev);
- }
-
- return ret;
-}
-
-#else /* !CONFIG_HIBERNATE_CALLBACKS */
-
-#define ibmebus_bus_pm_freeze NULL
-#define ibmebus_bus_pm_thaw NULL
-#define ibmebus_bus_pm_poweroff NULL
-#define ibmebus_bus_pm_restore NULL
-#define ibmebus_bus_pm_freeze_noirq NULL
-#define ibmebus_bus_pm_thaw_noirq NULL
-#define ibmebus_bus_pm_poweroff_noirq NULL
-#define ibmebus_bus_pm_restore_noirq NULL
-
-#endif /* !CONFIG_HIBERNATE_CALLBACKS */
-
-static struct dev_pm_ops ibmebus_bus_dev_pm_ops = {
- .prepare = ibmebus_bus_pm_prepare,
- .complete = ibmebus_bus_pm_complete,
- .suspend = ibmebus_bus_pm_suspend,
- .resume = ibmebus_bus_pm_resume,
- .freeze = ibmebus_bus_pm_freeze,
- .thaw = ibmebus_bus_pm_thaw,
- .poweroff = ibmebus_bus_pm_poweroff,
- .restore = ibmebus_bus_pm_restore,
- .suspend_noirq = ibmebus_bus_pm_suspend_noirq,
- .resume_noirq = ibmebus_bus_pm_resume_noirq,
- .freeze_noirq = ibmebus_bus_pm_freeze_noirq,
- .thaw_noirq = ibmebus_bus_pm_thaw_noirq,
- .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq,
- .restore_noirq = ibmebus_bus_pm_restore_noirq,
-};
-
-#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops)
-
-#else /* !CONFIG_PM_SLEEP */
-
-#define IBMEBUS_BUS_PM_OPS_PTR NULL
-
-#endif /* !CONFIG_PM_SLEEP */
-
struct bus_type ibmebus_bus_type = {
.name = "ibmebus",
.uevent = of_device_uevent_modalias,
@@ -729,7 +432,6 @@ struct bus_type ibmebus_bus_type = {
.remove = ibmebus_bus_device_remove,
.shutdown = ibmebus_bus_device_shutdown,
.dev_attrs = ibmebus_bus_device_attrs,
- .pm = IBMEBUS_BUS_PM_OPS_PTR,
};
EXPORT_SYMBOL(ibmebus_bus_type);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 24ad43a..486e570 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
hpte_group, vpn, pa, rflags, vflags, psize);
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 97aa3f3..7736352 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -367,7 +367,7 @@ void pseries_disable_reloc_on_exc(void)
}
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
static void pSeries_machine_kexec(struct kimage *image)
{
if (firmware_has_feature(FW_FEATURE_SET_MODE))
@@ -725,7 +725,7 @@ define_machine(pseries) {
.progress = rtas_progress,
.system_reset_exception = pSeries_system_reset_exception,
.machine_check_exception = pSeries_machine_check_exception,
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
.machine_kexec = pSeries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
#endif
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/platforms/pseries/vio.c
similarity index 100%
rename from arch/powerpc/kernel/vio.c
rename to arch/powerpc/platforms/pseries/vio.c
diff --git a/arch/powerpc/purgatory/.gitignore b/arch/powerpc/purgatory/.gitignore
new file mode 100644
index 0000000..e9e66f1
--- /dev/null
+++ b/arch/powerpc/purgatory/.gitignore
@@ -0,0 +1,2 @@
+kexec-purgatory.c
+purgatory.ro
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
new file mode 100644
index 0000000..ac8793c
--- /dev/null
+++ b/arch/powerpc/purgatory/Makefile
@@ -0,0 +1,15 @@
+targets += trampoline.o purgatory.ro kexec-purgatory.c
+
+LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
+
+$(obj)/purgatory.ro: $(obj)/trampoline.o FORCE
+ $(call if_changed,ld)
+
+CMD_BIN2C = $(objtree)/scripts/basic/bin2c
+quiet_cmd_bin2c = BIN2C $@
+ cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@
+
+$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
+ $(call if_changed,bin2c)
+
+obj-y += kexec-purgatory.o
diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S
new file mode 100644
index 0000000..f9760cc
--- /dev/null
+++ b/arch/powerpc/purgatory/trampoline.S
@@ -0,0 +1,128 @@
+/*
+ * kexec trampoline
+ *
+ * Based on code taken from kexec-tools and kexec-lite.
+ *
+ * Copyright (C) 2004 - 2005, Milton D Miller II, IBM Corporation
+ * Copyright (C) 2006, Mohan Kumar M, IBM Corporation
+ * Copyright (C) 2013, Anton Blanchard, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation (version 2 of the License).
+ */
+
+#if defined(__LITTLE_ENDIAN__)
+#define STWX_BE stwbrx
+#define LWZX_BE lwbrx
+#elif defined(__BIG_ENDIAN__)
+#define STWX_BE stwx
+#define LWZX_BE lwzx
+#else
+#error no endianness defined!
+#endif
+
+ .machine ppc64
+ .balign 256
+ .globl purgatory_start
+purgatory_start:
+ b master
+
+ /* ABI: possible run_at_load flag at 0x5c */
+ .org purgatory_start + 0x5c
+ .globl run_at_load
+run_at_load:
+ .long 0
+ .size run_at_load, . - run_at_load
+
+ /* ABI: slaves start at 60 with r3=phys */
+ .org purgatory_start + 0x60
+slave:
+ b .
+ /* ABI: end of copied region */
+ .org purgatory_start + 0x100
+ .size purgatory_start, . - purgatory_start
+
+/*
+ * The above 0x100 bytes at purgatory_start are replaced with the
+ * code from the kernel (or next stage) by setup_purgatory().
+ */
+
+master:
+ or %r1,%r1,%r1 /* low priority to let other threads catchup */
+ isync
+ mr %r17,%r3 /* save cpu id to r17 */
+ mr %r15,%r4 /* save physical address in reg15 */
+
+ or %r3,%r3,%r3 /* ok now to high priority, lets boot */
+ lis %r6,0x1
+ mtctr %r6 /* delay a bit for slaves to catch up */
+ bdnz . /* before we overwrite 0-100 again */
+
+ bl 0f /* Work out where we're running */
+0: mflr %r18
+
+ /* load device-tree address */
+ ld %r3, (dt_offset - 0b)(%r18)
+ mr %r16,%r3 /* save dt address in reg16 */
+ li %r4,20
+ LWZX_BE %r6,%r3,%r4 /* fetch __be32 version number at byte 20 */
+ cmpwi %r0,%r6,2 /* v2 or later? */
+ blt 1f
+ li %r4,28
+ STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */
+1:
+ /* load the kernel address */
+ ld %r4,(kernel - 0b)(%r18)
+
+ /* load the run_at_load flag */
+ /* possibly patched by kexec */
+ ld %r6,(run_at_load - 0b)(%r18)
+ /* and patch it into the kernel */
+ stw %r6,(0x5c)(%r4)
+
+ mr %r3,%r16 /* restore dt address */
+
+ li %r5,0 /* r5 will be 0 for kernel */
+
+ mfmsr %r11
+ andi. %r10,%r11,1 /* test MSR_LE */
+ bne .Little_endian
+
+ mtctr %r4 /* prepare branch to */
+ bctr /* start kernel */
+
+.Little_endian:
+ mtsrr0 %r4 /* prepare branch to */
+
+ clrrdi %r11,%r11,1 /* clear MSR_LE */
+ mtsrr1 %r11
+
+ rfid /* update MSR and start kernel */
+
+
+ .balign 8
+ .globl kernel
+kernel:
+ .llong 0x0
+ .size kernel, . - kernel
+
+ .balign 8
+ .globl dt_offset
+dt_offset:
+ .llong 0x0
+ .size dt_offset, . - dt_offset
+
+
+ .data
+ .balign 8
+.globl sha256_digest
+sha256_digest:
+ .skip 32
+ .size sha256_digest, . - sha256_digest
+
+ .balign 8
+.globl sha_regions
+sha_regions:
+ .skip 8 * 2 * 16
+ .size sha_regions, . - sha_regions
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 7605455..9c0e17c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -10,6 +10,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/smp.h>
@@ -225,6 +227,7 @@ Commands:\n\
#endif
"\
dr dump stream of raw bytes\n\
+ dt dump the tracing buffers (uses printk)\n\
e print exception information\n\
f flush cache\n\
la lookup symbol+offset of specified address\n\
@@ -2364,6 +2367,9 @@ dump(void)
dump_log_buf();
} else if (c == 'o') {
dump_opal_msglog();
+ } else if (c == 't') {
+ ftrace_dump(DUMP_ALL);
+ tracing_on();
} else if (c == 'r') {
scanhex(&ndump);
if (ndump == 0)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 650830e..3741461 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -631,9 +631,9 @@ static int determine_backup_region(u64 start, u64 end, void *arg)
int crash_load_segments(struct kimage *image)
{
- unsigned long src_start, src_sz, elf_sz;
- void *elf_addr;
int ret;
+ struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+ .buf_max = ULONG_MAX, .top_down = false };
/*
* Determine and load a segment for backup area. First 640K RAM
@@ -647,43 +647,44 @@ int crash_load_segments(struct kimage *image)
if (ret < 0)
return ret;
- src_start = image->arch.backup_src_start;
- src_sz = image->arch.backup_src_sz;
-
/* Add backup segment. */
- if (src_sz) {
+ if (image->arch.backup_src_sz) {
+ kbuf.buffer = &crash_zero_bytes;
+ kbuf.bufsz = sizeof(crash_zero_bytes);
+ kbuf.memsz = image->arch.backup_src_sz;
+ kbuf.buf_align = PAGE_SIZE;
/*
* Ideally there is no source for backup segment. This is
* copied in purgatory after crash. Just add a zero filled
* segment for now to make sure checksum logic works fine.
*/
- ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
- sizeof(crash_zero_bytes), src_sz,
- PAGE_SIZE, 0, -1, 0,
- &image->arch.backup_load_addr);
+ ret = kexec_add_buffer(&kbuf);
if (ret)
return ret;
+ image->arch.backup_load_addr = kbuf.mem;
pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
- image->arch.backup_load_addr, src_start, src_sz);
+ image->arch.backup_load_addr,
+ image->arch.backup_src_start, kbuf.memsz);
}
/* Prepare elf headers and add a segment */
- ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
+ ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
if (ret)
return ret;
- image->arch.elf_headers = elf_addr;
- image->arch.elf_headers_sz = elf_sz;
+ image->arch.elf_headers = kbuf.buffer;
+ image->arch.elf_headers_sz = kbuf.bufsz;
- ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
- ELF_CORE_HEADER_ALIGN, 0, -1, 0,
- &image->arch.elf_load_addr);
+ kbuf.memsz = kbuf.bufsz;
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ ret = kexec_add_buffer(&kbuf);
if (ret) {
vfree((void *)image->arch.elf_headers);
return ret;
}
+ image->arch.elf_load_addr = kbuf.mem;
pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->arch.elf_load_addr, elf_sz, elf_sz);
+ image->arch.elf_load_addr, kbuf.bufsz, kbuf.bufsz);
return ret;
}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 3407b14..d0a814a 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -331,17 +331,17 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
struct setup_header *header;
int setup_sects, kern16_size, ret = 0;
- unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
+ unsigned long setup_header_size, params_cmdline_sz;
struct boot_params *params;
unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
unsigned long purgatory_load_addr;
- unsigned long kernel_bufsz, kernel_memsz, kernel_align;
- char *kernel_buf;
struct bzimage64_data *ldata;
struct kexec_entry64_regs regs64;
void *stack;
unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
+ struct kexec_buf kbuf = { .image = image, .buf_max = ULONG_MAX,
+ .top_down = true };
header = (struct setup_header *)(kernel + setup_hdr_offset);
setup_sects = header->setup_sects;
@@ -402,11 +402,11 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
MAX_ELFCOREHDR_STR_LEN;
params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
- params_misc_sz = params_cmdline_sz + efi_map_sz +
+ kbuf.bufsz = params_cmdline_sz + efi_map_sz +
sizeof(struct setup_data) +
sizeof(struct efi_setup_data);
- params = kzalloc(params_misc_sz, GFP_KERNEL);
+ params = kzalloc(kbuf.bufsz, GFP_KERNEL);
if (!params)
return ERR_PTR(-ENOMEM);
efi_map_offset = params_cmdline_sz;
@@ -418,37 +418,41 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
/* Is there a limit on setup header size? */
memcpy(¶ms->hdr, (kernel + setup_hdr_offset), setup_header_size);
- ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
- params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
- ULONG_MAX, 1, &bootparam_load_addr);
+ kbuf.buffer = params;
+ kbuf.memsz = kbuf.bufsz;
+ kbuf.buf_align = 16;
+ kbuf.buf_min = MIN_BOOTPARAM_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ bootparam_load_addr = kbuf.mem;
pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- bootparam_load_addr, params_misc_sz, params_misc_sz);
+ bootparam_load_addr, kbuf.bufsz, kbuf.bufsz);
/* Load kernel */
- kernel_buf = kernel + kern16_size;
- kernel_bufsz = kernel_len - kern16_size;
- kernel_memsz = PAGE_ALIGN(header->init_size);
- kernel_align = header->kernel_alignment;
-
- ret = kexec_add_buffer(image, kernel_buf,
- kernel_bufsz, kernel_memsz, kernel_align,
- MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
- &kernel_load_addr);
+ kbuf.buffer = kernel + kern16_size;
+ kbuf.bufsz = kernel_len - kern16_size;
+ kbuf.memsz = PAGE_ALIGN(header->init_size);
+ kbuf.buf_align = header->kernel_alignment;
+ kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ kernel_load_addr = kbuf.mem;
pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- kernel_load_addr, kernel_memsz, kernel_memsz);
+ kernel_load_addr, kbuf.bufsz, kbuf.memsz);
/* Load initrd high */
if (initrd) {
- ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
- PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
- ULONG_MAX, 1, &initrd_load_addr);
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
+ initrd_load_addr = kbuf.mem;
pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
initrd_load_addr, initrd_len, initrd_len);
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 2e5233b..1b35e33 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -9,18 +9,119 @@
#include <linux/pci.h>
#include <linux/slab.h>
-#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <misc/cxl.h>
-#include <linux/fs.h>
#include <asm/pnv-pci.h>
#include <linux/msi.h>
+#include <linux/module.h>
+#include <linux/mount.h>
#include "cxl.h"
+/*
+ * Since we want to track memory mappings to be able to force-unmap
+ * when the AFU is no longer reachable, we need an inode. For devices
+ * opened through the cxl user API, this is not a problem, but a
+ * userland process can also get a cxl fd through the cxl_get_fd()
+ * API, which is used by the cxlflash driver.
+ *
+ * Therefore we implement our own simple pseudo-filesystem and inode
+ * allocator. We don't use the anonymous inode, as we need the
+ * meta-data associated with it (address_space) and it is shared by
+ * other drivers/processes, so it could lead to cxl unmapping VMAs
+ * from random processes.
+ */
+
+#define CXL_PSEUDO_FS_MAGIC 0x1697697f
+
+static int cxl_fs_cnt;
+static struct vfsmount *cxl_vfs_mount;
+
+static const struct dentry_operations cxl_fs_dops = {
+ .d_dname = simple_dname,
+};
+
+static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
+ CXL_PSEUDO_FS_MAGIC);
+}
+
+static struct file_system_type cxl_fs_type = {
+ .name = "cxl",
+ .owner = THIS_MODULE,
+ .mount = cxl_fs_mount,
+ .kill_sb = kill_anon_super,
+};
+
+
+void cxl_release_mapping(struct cxl_context *ctx)
+{
+ if (ctx->kernelapi && ctx->mapping)
+ simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
+}
+
+static struct file *cxl_getfile(const char *name,
+ const struct file_operations *fops,
+ void *priv, int flags)
+{
+ struct qstr this;
+ struct path path;
+ struct file *file;
+ struct inode *inode = NULL;
+ int rc;
+
+ /* strongly inspired by anon_inode_getfile() */
+
+ if (fops->owner && !try_module_get(fops->owner))
+ return ERR_PTR(-ENOENT);
+
+ rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
+ if (rc < 0) {
+ pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
+ file = ERR_PTR(rc);
+ goto err_module;
+ }
+
+ inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
+ if (IS_ERR(inode)) {
+ file = ERR_CAST(inode);
+ goto err_fs;
+ }
+
+ file = ERR_PTR(-ENOMEM);
+ this.name = name;
+ this.len = strlen(name);
+ this.hash = 0;
+ path.dentry = d_alloc_pseudo(cxl_vfs_mount->mnt_sb, &this);
+ if (!path.dentry)
+ goto err_inode;
+
+ path.mnt = mntget(cxl_vfs_mount);
+ d_instantiate(path.dentry, inode);
+
+ file = alloc_file(&path, OPEN_FMODE(flags), fops);
+ if (IS_ERR(file))
+ goto err_dput;
+ file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
+ file->private_data = priv;
+
+ return file;
+
+err_dput:
+ path_put(&path);
+err_inode:
+ iput(inode);
+err_fs:
+ simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
+err_module:
+ module_put(fops->owner);
+ return file;
+}
+
struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
{
- struct address_space *mapping;
struct cxl_afu *afu;
struct cxl_context *ctx;
int rc;
@@ -30,38 +131,20 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
return ERR_CAST(afu);
ctx = cxl_context_alloc();
- if (IS_ERR(ctx)) {
- rc = PTR_ERR(ctx);
- goto err_dev;
- }
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
ctx->kernelapi = true;
- /*
- * Make our own address space since we won't have one from the
- * filesystem like the user api has, and even if we do associate a file
- * with this context we don't want to use the global anonymous inode's
- * address space as that can invalidate unrelated users:
- */
- mapping = kmalloc(sizeof(struct address_space), GFP_KERNEL);
- if (!mapping) {
- rc = -ENOMEM;
- goto err_ctx;
- }
- address_space_init_once(mapping);
-
/* Make it a slave context. We can promote it later? */
- rc = cxl_context_init(ctx, afu, false, mapping);
+ rc = cxl_context_init(ctx, afu, false);
if (rc)
- goto err_mapping;
+ goto err_ctx;
return ctx;
-err_mapping:
- kfree(mapping);
err_ctx:
kfree(ctx);
-err_dev:
return ERR_PTR(rc);
}
EXPORT_SYMBOL_GPL(cxl_dev_context_init);
@@ -340,6 +423,11 @@ struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
{
struct file *file;
int rc, flags, fdtmp;
+ char *name = NULL;
+
+ /* only allow one per context */
+ if (ctx->mapping)
+ return ERR_PTR(-EEXIST);
flags = O_RDWR | O_CLOEXEC;
@@ -363,12 +451,13 @@ struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
} else /* use default ops */
fops = (struct file_operations *)&afu_fops;
- file = anon_inode_getfile("cxl", fops, ctx, flags);
+ name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
+ file = cxl_getfile(name, fops, ctx, flags);
+ kfree(name);
if (IS_ERR(file))
goto err_fd;
- file->f_mapping = ctx->mapping;
-
+ cxl_context_set_mapping(ctx, file->f_mapping);
*fd = fdtmp;
return file;
@@ -541,7 +630,7 @@ int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
if (remaining > 0) {
new_ctx = cxl_dev_context_init(pdev);
- if (!new_ctx) {
+ if (IS_ERR(new_ctx)) {
pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
return -ENOSPC;
}
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 5e506c1..ff5e7e8 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -34,8 +34,7 @@ struct cxl_context *cxl_context_alloc(void)
/*
* Initialises a CXL context.
*/
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
- struct address_space *mapping)
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
{
int i;
@@ -44,7 +43,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
ctx->master = master;
ctx->pid = ctx->glpid = NULL; /* Set in start work ioctl */
mutex_init(&ctx->mapping_lock);
- ctx->mapping = mapping;
+ ctx->mapping = NULL;
/*
* Allocate the segment table before we put it in the IDR so that we
@@ -114,6 +113,14 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
return 0;
}
+void cxl_context_set_mapping(struct cxl_context *ctx,
+ struct address_space *mapping)
+{
+ mutex_lock(&ctx->mapping_lock);
+ ctx->mapping = mapping;
+ mutex_unlock(&ctx->mapping_lock);
+}
+
static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct cxl_context *ctx = vma->vm_file->private_data;
@@ -300,8 +307,6 @@ static void reclaim_ctx(struct rcu_head *rcu)
if (ctx->ff_page)
__free_page(ctx->ff_page);
ctx->sstp = NULL;
- if (ctx->kernelapi)
- kfree(ctx->mapping);
kfree(ctx->irq_bitmap);
@@ -313,6 +318,8 @@ static void reclaim_ctx(struct rcu_head *rcu)
void cxl_context_free(struct cxl_context *ctx)
{
+ if (ctx->kernelapi && ctx->mapping)
+ cxl_release_mapping(ctx);
mutex_lock(&ctx->afu->contexts_lock);
idr_remove(&ctx->afu->contexts_idr, ctx->pe);
mutex_unlock(&ctx->afu->contexts_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a144073..b24d767 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -817,8 +817,9 @@ void cxl_dump_debug_buffer(void *addr, size_t size);
void init_cxl_native(void);
struct cxl_context *cxl_context_alloc(void);
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
- struct address_space *mapping);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
+void cxl_context_set_mapping(struct cxl_context *ctx,
+ struct address_space *mapping);
void cxl_context_free(struct cxl_context *ctx);
int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
@@ -877,6 +878,7 @@ void cxl_native_err_irq_dump_regs(struct cxl *adapter);
void cxl_stop_trace(struct cxl *cxl);
int cxl_pci_vphb_add(struct cxl_afu *afu);
void cxl_pci_vphb_remove(struct cxl_afu *afu);
+void cxl_release_mapping(struct cxl_context *ctx);
extern struct pci_driver cxl_pci_driver;
extern struct platform_driver cxl_of_driver;
diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c
index ec7b8a0..9c06ac8 100644
--- a/drivers/misc/cxl/debugfs.c
+++ b/drivers/misc/cxl/debugfs.c
@@ -43,12 +43,14 @@ static int debugfs_io_u64_set(void *data, u64 val)
out_be64((u64 __iomem *)data, val);
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, "0x%016llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set,
+ "0x%016llx\n");
static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode,
struct dentry *parent, u64 __iomem *value)
{
- return debugfs_create_file(name, mode, parent, (void __force *)value, &fops_io_x64);
+ return debugfs_create_file_unsafe(name, mode, parent,
+ (void __force *)value, &fops_io_x64);
}
void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir)
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 77080cc..859959f 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -86,9 +86,12 @@ static int __afu_open(struct inode *inode, struct file *file, bool master)
goto err_put_afu;
}
- if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping)))
+ rc = cxl_context_init(ctx, afu, master);
+ if (rc)
goto err_put_afu;
+ cxl_context_set_mapping(ctx, inode->i_mapping);
+
pr_devel("afu_open pe: %i\n", ctx->pe);
file->private_data = ctx;
cxl_ctx_get();
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 3e102cd..e04bc4d 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -887,7 +887,7 @@ static void afu_handle_errstate(struct work_struct *work)
afu_guest->previous_state == H_STATE_PERM_UNAVAILABLE)
return;
- if (afu_guest->handle_err == true)
+ if (afu_guest->handle_err)
schedule_delayed_work(&afu_guest->work_err,
msecs_to_jiffies(3000));
}
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index dec60f5..1a402bb 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -104,7 +104,7 @@ irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_i
} else {
spin_lock(&ctx->lock);
ctx->afu_err = irq_info->afu_err;
- ctx->pending_afu_err = 1;
+ ctx->pending_afu_err = true;
spin_unlock(&ctx->lock);
wake_up_all(&ctx->wq);
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index a217a74..09505f4 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -10,7 +10,6 @@
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/slab.h>
-#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
@@ -54,7 +53,7 @@ static int afu_control(struct cxl_afu *afu, u64 command, u64 clear,
AFU_Cntl | command);
cpu_relax();
AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
- };
+ }
if (AFU_Cntl & CXL_AFU_Cntl_An_RA) {
/*
@@ -167,7 +166,7 @@ int cxl_psl_purge(struct cxl_afu *afu)
cpu_relax();
}
PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
- };
+ }
end = local_clock();
pr_devel("PSL purged in %lld ns\n", end - start);
@@ -931,9 +930,18 @@ static irqreturn_t native_irq_multiplexed(int irq, void *data)
struct cxl_afu *afu = data;
struct cxl_context *ctx;
struct cxl_irq_info irq_info;
- int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff;
- int ret;
+ u64 phreg = cxl_p2n_read(afu, CXL_PSL_PEHandle_An);
+ int ph, ret;
+ /* check if eeh kicked in while the interrupt was in flight */
+ if (unlikely(phreg == ~0ULL)) {
+ dev_warn(&afu->dev,
+ "Ignoring slice interrupt(%d) due to fenced card",
+ irq);
+ return IRQ_HANDLED;
+ }
+ /* Mask the pe-handle from register value */
+ ph = phreg & 0xffff;
if ((ret = native_get_irq_info(afu, &irq_info))) {
WARN(1, "Unable to get CXL IRQ Info: %i\n", ret);
return fail_psl_irq(afu, &irq_info);
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index e96be9c..80a87ab 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1921,7 +1921,7 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
goto err;
ctx = cxl_dev_context_init(afu_dev);
- if (!ctx)
+ if (IS_ERR(ctx))
goto err;
afu_dev->dev.archdata.cxl_ctx = ctx;
diff --git a/drivers/misc/cxl/phb.c b/drivers/misc/cxl/phb.c
index 0935d44..6ec69ad 100644
--- a/drivers/misc/cxl/phb.c
+++ b/drivers/misc/cxl/phb.c
@@ -20,7 +20,7 @@ bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu
* in the virtual phb, we'll need a default context to attach them to.
*/
ctx = cxl_dev_context_init(dev);
- if (!ctx)
+ if (IS_ERR(ctx))
return false;
dev->dev.archdata.cxl_ctx = ctx;
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 80378dd..c882357 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -31,49 +31,49 @@
static void tce_iommu_detach_group(void *iommu_data,
struct iommu_group *iommu_group);
-static long try_increment_locked_vm(long npages)
+static long try_increment_locked_vm(struct mm_struct *mm, long npages)
{
long ret = 0, locked, lock_limit;
- if (!current || !current->mm)
- return -ESRCH; /* process exited */
+ if (WARN_ON_ONCE(!mm))
+ return -EPERM;
if (!npages)
return 0;
- down_write(¤t->mm->mmap_sem);
- locked = current->mm->locked_vm + npages;
+ down_write(&mm->mmap_sem);
+ locked = mm->locked_vm + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
- current->mm->locked_vm += npages;
+ mm->locked_vm += npages;
pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
npages << PAGE_SHIFT,
- current->mm->locked_vm << PAGE_SHIFT,
+ mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK),
ret ? " - exceeded" : "");
- up_write(¤t->mm->mmap_sem);
+ up_write(&mm->mmap_sem);
return ret;
}
-static void decrement_locked_vm(long npages)
+static void decrement_locked_vm(struct mm_struct *mm, long npages)
{
- if (!current || !current->mm || !npages)
- return; /* process exited */
+ if (!mm || !npages)
+ return;
- down_write(¤t->mm->mmap_sem);
- if (WARN_ON_ONCE(npages > current->mm->locked_vm))
- npages = current->mm->locked_vm;
- current->mm->locked_vm -= npages;
+ down_write(&mm->mmap_sem);
+ if (WARN_ON_ONCE(npages > mm->locked_vm))
+ npages = mm->locked_vm;
+ mm->locked_vm -= npages;
pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
npages << PAGE_SHIFT,
- current->mm->locked_vm << PAGE_SHIFT,
+ mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK));
- up_write(¤t->mm->mmap_sem);
+ up_write(&mm->mmap_sem);
}
/*
@@ -89,6 +89,15 @@ struct tce_iommu_group {
};
/*
+ * A container needs to remember which preregistered region it has
+ * referenced to do proper cleanup at the userspace process exit.
+ */
+struct tce_iommu_prereg {
+ struct list_head next;
+ struct mm_iommu_table_group_mem_t *mem;
+};
+
+/*
* The container descriptor supports only a single group per container.
* Required by the API as the container is not supplied with the IOMMU group
* at the moment of initialization.
@@ -97,24 +106,68 @@ struct tce_container {
struct mutex lock;
bool enabled;
bool v2;
+ bool def_window_pending;
unsigned long locked_pages;
+ struct mm_struct *mm;
struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
struct list_head group_list;
+ struct list_head prereg_list;
};
+static long tce_iommu_mm_set(struct tce_container *container)
+{
+ if (container->mm) {
+ if (container->mm == current->mm)
+ return 0;
+ return -EPERM;
+ }
+ BUG_ON(!current->mm);
+ container->mm = current->mm;
+ atomic_inc(&container->mm->mm_count);
+
+ return 0;
+}
+
+static long tce_iommu_prereg_free(struct tce_container *container,
+ struct tce_iommu_prereg *tcemem)
+{
+ long ret;
+
+ ret = mm_iommu_put(container->mm, tcemem->mem);
+ if (ret)
+ return ret;
+
+ list_del(&tcemem->next);
+ kfree(tcemem);
+
+ return 0;
+}
+
static long tce_iommu_unregister_pages(struct tce_container *container,
__u64 vaddr, __u64 size)
{
struct mm_iommu_table_group_mem_t *mem;
+ struct tce_iommu_prereg *tcemem;
+ bool found = false;
if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
return -EINVAL;
- mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT);
+ mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
if (!mem)
return -ENOENT;
- return mm_iommu_put(mem);
+ list_for_each_entry(tcemem, &container->prereg_list, next) {
+ if (tcemem->mem == mem) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ return tce_iommu_prereg_free(container, tcemem);
}
static long tce_iommu_register_pages(struct tce_container *container,
@@ -122,22 +175,36 @@ static long tce_iommu_register_pages(struct tce_container *container,
{
long ret = 0;
struct mm_iommu_table_group_mem_t *mem = NULL;
+ struct tce_iommu_prereg *tcemem;
unsigned long entries = size >> PAGE_SHIFT;
if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
((vaddr + size) < vaddr))
return -EINVAL;
- ret = mm_iommu_get(vaddr, entries, &mem);
+ mem = mm_iommu_find(container->mm, vaddr, entries);
+ if (mem) {
+ list_for_each_entry(tcemem, &container->prereg_list, next) {
+ if (tcemem->mem == mem)
+ return -EBUSY;
+ }
+ }
+
+ ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
if (ret)
return ret;
+ tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
+ tcemem->mem = mem;
+ list_add(&tcemem->next, &container->prereg_list);
+
container->enabled = true;
return 0;
}
-static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl)
+static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
+ struct mm_struct *mm)
{
unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
tbl->it_size, PAGE_SIZE);
@@ -146,13 +213,13 @@ static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl)
BUG_ON(tbl->it_userspace);
- ret = try_increment_locked_vm(cb >> PAGE_SHIFT);
+ ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT);
if (ret)
return ret;
uas = vzalloc(cb);
if (!uas) {
- decrement_locked_vm(cb >> PAGE_SHIFT);
+ decrement_locked_vm(mm, cb >> PAGE_SHIFT);
return -ENOMEM;
}
tbl->it_userspace = uas;
@@ -160,7 +227,8 @@ static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl)
return 0;
}
-static void tce_iommu_userspace_view_free(struct iommu_table *tbl)
+static void tce_iommu_userspace_view_free(struct iommu_table *tbl,
+ struct mm_struct *mm)
{
unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
tbl->it_size, PAGE_SIZE);
@@ -170,7 +238,7 @@ static void tce_iommu_userspace_view_free(struct iommu_table *tbl)
vfree(tbl->it_userspace);
tbl->it_userspace = NULL;
- decrement_locked_vm(cb >> PAGE_SHIFT);
+ decrement_locked_vm(mm, cb >> PAGE_SHIFT);
}
static bool tce_page_is_contained(struct page *page, unsigned page_shift)
@@ -230,9 +298,6 @@ static int tce_iommu_enable(struct tce_container *container)
struct iommu_table_group *table_group;
struct tce_iommu_group *tcegrp;
- if (!current->mm)
- return -ESRCH; /* process exited */
-
if (container->enabled)
return -EBUSY;
@@ -277,8 +342,12 @@ static int tce_iommu_enable(struct tce_container *container)
if (!table_group->tce32_size)
return -EPERM;
+ ret = tce_iommu_mm_set(container);
+ if (ret)
+ return ret;
+
locked = table_group->tce32_size >> PAGE_SHIFT;
- ret = try_increment_locked_vm(locked);
+ ret = try_increment_locked_vm(container->mm, locked);
if (ret)
return ret;
@@ -296,10 +365,8 @@ static void tce_iommu_disable(struct tce_container *container)
container->enabled = false;
- if (!current->mm)
- return;
-
- decrement_locked_vm(container->locked_pages);
+ BUG_ON(!container->mm);
+ decrement_locked_vm(container->mm, container->locked_pages);
}
static void *tce_iommu_open(unsigned long arg)
@@ -317,6 +384,7 @@ static void *tce_iommu_open(unsigned long arg)
mutex_init(&container->lock);
INIT_LIST_HEAD_RCU(&container->group_list);
+ INIT_LIST_HEAD_RCU(&container->prereg_list);
container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
@@ -326,7 +394,8 @@ static void *tce_iommu_open(unsigned long arg)
static int tce_iommu_clear(struct tce_container *container,
struct iommu_table *tbl,
unsigned long entry, unsigned long pages);
-static void tce_iommu_free_table(struct iommu_table *tbl);
+static void tce_iommu_free_table(struct tce_container *container,
+ struct iommu_table *tbl);
static void tce_iommu_release(void *iommu_data)
{
@@ -351,10 +420,20 @@ static void tce_iommu_release(void *iommu_data)
continue;
tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
- tce_iommu_free_table(tbl);
+ tce_iommu_free_table(container, tbl);
+ }
+
+ while (!list_empty(&container->prereg_list)) {
+ struct tce_iommu_prereg *tcemem;
+
+ tcemem = list_first_entry(&container->prereg_list,
+ struct tce_iommu_prereg, next);
+ WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem));
}
tce_iommu_disable(container);
+ if (container->mm)
+ mmdrop(container->mm);
mutex_destroy(&container->lock);
kfree(container);
@@ -369,13 +448,14 @@ static void tce_iommu_unuse_page(struct tce_container *container,
put_page(page);
}
-static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size,
+static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
+ unsigned long tce, unsigned long size,
unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
{
long ret = 0;
struct mm_iommu_table_group_mem_t *mem;
- mem = mm_iommu_lookup(tce, size);
+ mem = mm_iommu_lookup(container->mm, tce, size);
if (!mem)
return -EINVAL;
@@ -388,18 +468,18 @@ static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size,
return 0;
}
-static void tce_iommu_unuse_page_v2(struct iommu_table *tbl,
- unsigned long entry)
+static void tce_iommu_unuse_page_v2(struct tce_container *container,
+ struct iommu_table *tbl, unsigned long entry)
{
struct mm_iommu_table_group_mem_t *mem = NULL;
int ret;
unsigned long hpa = 0;
unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
- if (!pua || !current || !current->mm)
+ if (!pua)
return;
- ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl),
+ ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl),
&hpa, &mem);
if (ret)
pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
@@ -429,7 +509,7 @@ static int tce_iommu_clear(struct tce_container *container,
continue;
if (container->v2) {
- tce_iommu_unuse_page_v2(tbl, entry);
+ tce_iommu_unuse_page_v2(container, tbl, entry);
continue;
}
@@ -509,13 +589,19 @@ static long tce_iommu_build_v2(struct tce_container *container,
unsigned long hpa;
enum dma_data_direction dirtmp;
+ if (!tbl->it_userspace) {
+ ret = tce_iommu_userspace_view_alloc(tbl, container->mm);
+ if (ret)
+ return ret;
+ }
+
for (i = 0; i < pages; ++i) {
struct mm_iommu_table_group_mem_t *mem = NULL;
unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
entry + i);
- ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl),
- &hpa, &mem);
+ ret = tce_iommu_prereg_ua_to_hpa(container,
+ tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem);
if (ret)
break;
@@ -536,7 +622,7 @@ static long tce_iommu_build_v2(struct tce_container *container,
ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
if (ret) {
/* dirtmp cannot be DMA_NONE here */
- tce_iommu_unuse_page_v2(tbl, entry + i);
+ tce_iommu_unuse_page_v2(container, tbl, entry + i);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
__func__, entry << tbl->it_page_shift,
tce, ret);
@@ -544,7 +630,7 @@ static long tce_iommu_build_v2(struct tce_container *container,
}
if (dirtmp != DMA_NONE)
- tce_iommu_unuse_page_v2(tbl, entry + i);
+ tce_iommu_unuse_page_v2(container, tbl, entry + i);
*pua = tce;
@@ -572,7 +658,7 @@ static long tce_iommu_create_table(struct tce_container *container,
if (!table_size)
return -EINVAL;
- ret = try_increment_locked_vm(table_size >> PAGE_SHIFT);
+ ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
if (ret)
return ret;
@@ -582,25 +668,17 @@ static long tce_iommu_create_table(struct tce_container *container,
WARN_ON(!ret && !(*ptbl)->it_ops->free);
WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
- if (!ret && container->v2) {
- ret = tce_iommu_userspace_view_alloc(*ptbl);
- if (ret)
- (*ptbl)->it_ops->free(*ptbl);
- }
-
- if (ret)
- decrement_locked_vm(table_size >> PAGE_SHIFT);
-
return ret;
}
-static void tce_iommu_free_table(struct iommu_table *tbl)
+static void tce_iommu_free_table(struct tce_container *container,
+ struct iommu_table *tbl)
{
unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
- tce_iommu_userspace_view_free(tbl);
+ tce_iommu_userspace_view_free(tbl, container->mm);
tbl->it_ops->free(tbl);
- decrement_locked_vm(pages);
+ decrement_locked_vm(container->mm, pages);
}
static long tce_iommu_create_window(struct tce_container *container,
@@ -663,7 +741,7 @@ static long tce_iommu_create_window(struct tce_container *container,
table_group = iommu_group_get_iommudata(tcegrp->grp);
table_group->ops->unset_window(table_group, num);
}
- tce_iommu_free_table(tbl);
+ tce_iommu_free_table(container, tbl);
return ret;
}
@@ -701,12 +779,41 @@ static long tce_iommu_remove_window(struct tce_container *container,
/* Free table */
tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
- tce_iommu_free_table(tbl);
+ tce_iommu_free_table(container, tbl);
container->tables[num] = NULL;
return 0;
}
+static long tce_iommu_create_default_window(struct tce_container *container)
+{
+ long ret;
+ __u64 start_addr = 0;
+ struct tce_iommu_group *tcegrp;
+ struct iommu_table_group *table_group;
+
+ if (!container->def_window_pending)
+ return 0;
+
+ if (!tce_groups_attached(container))
+ return -ENODEV;
+
+ tcegrp = list_first_entry(&container->group_list,
+ struct tce_iommu_group, next);
+ table_group = iommu_group_get_iommudata(tcegrp->grp);
+ if (!table_group)
+ return -ENODEV;
+
+ ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
+ table_group->tce32_size, 1, &start_addr);
+ WARN_ON_ONCE(!ret && start_addr);
+
+ if (!ret)
+ container->def_window_pending = false;
+
+ return ret;
+}
+
static long tce_iommu_ioctl(void *iommu_data,
unsigned int cmd, unsigned long arg)
{
@@ -727,7 +834,17 @@ static long tce_iommu_ioctl(void *iommu_data,
}
return (ret < 0) ? 0 : ret;
+ }
+ /*
+ * Sanity check to prevent one userspace from manipulating
+ * another userspace mm.
+ */
+ BUG_ON(!container);
+ if (container->mm && container->mm != current->mm)
+ return -EPERM;
+
+ switch (cmd) {
case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
struct vfio_iommu_spapr_tce_info info;
struct tce_iommu_group *tcegrp;
@@ -797,6 +914,10 @@ static long tce_iommu_ioctl(void *iommu_data,
VFIO_DMA_MAP_FLAG_WRITE))
return -EINVAL;
+ ret = tce_iommu_create_default_window(container);
+ if (ret)
+ return ret;
+
num = tce_iommu_find_table(container, param.iova, &tbl);
if (num < 0)
return -ENXIO;
@@ -860,6 +981,10 @@ static long tce_iommu_ioctl(void *iommu_data,
if (param.flags)
return -EINVAL;
+ ret = tce_iommu_create_default_window(container);
+ if (ret)
+ return ret;
+
num = tce_iommu_find_table(container, param.iova, &tbl);
if (num < 0)
return -ENXIO;
@@ -888,6 +1013,10 @@ static long tce_iommu_ioctl(void *iommu_data,
minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
size);
+ ret = tce_iommu_mm_set(container);
+ if (ret)
+ return ret;
+
if (copy_from_user(¶m, (void __user *)arg, minsz))
return -EFAULT;
@@ -911,6 +1040,9 @@ static long tce_iommu_ioctl(void *iommu_data,
if (!container->v2)
break;
+ if (!container->mm)
+ return -EPERM;
+
minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
size);
@@ -969,6 +1101,10 @@ static long tce_iommu_ioctl(void *iommu_data,
if (!container->v2)
break;
+ ret = tce_iommu_mm_set(container);
+ if (ret)
+ return ret;
+
if (!tce_groups_attached(container))
return -ENXIO;
@@ -986,6 +1122,10 @@ static long tce_iommu_ioctl(void *iommu_data,
mutex_lock(&container->lock);
+ ret = tce_iommu_create_default_window(container);
+ if (ret)
+ return ret;
+
ret = tce_iommu_create_window(container, create.page_shift,
create.window_size, create.levels,
&create.start_addr);
@@ -1003,6 +1143,10 @@ static long tce_iommu_ioctl(void *iommu_data,
if (!container->v2)
break;
+ ret = tce_iommu_mm_set(container);
+ if (ret)
+ return ret;
+
if (!tce_groups_attached(container))
return -ENXIO;
@@ -1018,6 +1162,11 @@ static long tce_iommu_ioctl(void *iommu_data,
if (remove.flags)
return -EINVAL;
+ if (container->def_window_pending && !remove.start_addr) {
+ container->def_window_pending = false;
+ return 0;
+ }
+
mutex_lock(&container->lock);
ret = tce_iommu_remove_window(container, remove.start_addr);
@@ -1043,7 +1192,7 @@ static void tce_iommu_release_ownership(struct tce_container *container,
continue;
tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
- tce_iommu_userspace_view_free(tbl);
+ tce_iommu_userspace_view_free(tbl, container->mm);
if (tbl->it_map)
iommu_release_ownership(tbl);
@@ -1062,10 +1211,7 @@ static int tce_iommu_take_ownership(struct tce_container *container,
if (!tbl || !tbl->it_map)
continue;
- rc = tce_iommu_userspace_view_alloc(tbl);
- if (!rc)
- rc = iommu_take_ownership(tbl);
-
+ rc = iommu_take_ownership(tbl);
if (rc) {
for (j = 0; j < i; ++j)
iommu_release_ownership(
@@ -1100,9 +1246,6 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
static long tce_iommu_take_ownership_ddw(struct tce_container *container,
struct iommu_table_group *table_group)
{
- long i, ret = 0;
- struct iommu_table *tbl = NULL;
-
if (!table_group->ops->create_table || !table_group->ops->set_window ||
!table_group->ops->release_ownership) {
WARN_ON_ONCE(1);
@@ -1111,47 +1254,7 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
table_group->ops->take_ownership(table_group);
- /*
- * If it the first group attached, check if there is
- * a default DMA window and create one if none as
- * the userspace expects it to exist.
- */
- if (!tce_groups_attached(container) && !container->tables[0]) {
- ret = tce_iommu_create_table(container,
- table_group,
- 0, /* window number */
- IOMMU_PAGE_SHIFT_4K,
- table_group->tce32_size,
- 1, /* default levels */
- &tbl);
- if (ret)
- goto release_exit;
- else
- container->tables[0] = tbl;
- }
-
- /* Set all windows to the new group */
- for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
- tbl = container->tables[i];
-
- if (!tbl)
- continue;
-
- /* Set the default window to a new group */
- ret = table_group->ops->set_window(table_group, i, tbl);
- if (ret)
- goto release_exit;
- }
-
return 0;
-
-release_exit:
- for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
- table_group->ops->unset_window(table_group, i);
-
- table_group->ops->release_ownership(table_group);
-
- return ret;
}
static int tce_iommu_attach_group(void *iommu_data,
@@ -1203,10 +1306,13 @@ static int tce_iommu_attach_group(void *iommu_data,
}
if (!table_group->ops || !table_group->ops->take_ownership ||
- !table_group->ops->release_ownership)
+ !table_group->ops->release_ownership) {
ret = tce_iommu_take_ownership(container, table_group);
- else
+ } else {
ret = tce_iommu_take_ownership_ddw(container, table_group);
+ if (!tce_groups_attached(container) && !container->tables[0])
+ container->def_window_pending = true;
+ }
if (!ret) {
tcegrp->grp = iommu_group;
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 406c33d..a33f633 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -148,7 +148,36 @@ struct kexec_file_ops {
kexec_verify_sig_t *verify_sig;
#endif
};
-#endif
+
+/**
+ * struct kexec_buf - parameters for finding a place for a buffer in memory
+ * @image: kexec image in which memory to search.
+ * @buffer: Contents which will be copied to the allocated memory.
+ * @bufsz: Size of @buffer.
+ * @mem: On return will have address of the buffer in memory.
+ * @memsz: Size for the buffer in memory.
+ * @buf_align: Minimum alignment needed.
+ * @buf_min: The buffer can't be placed below this address.
+ * @buf_max: The buffer can't be placed above this address.
+ * @top_down: Allocate from top of memory.
+ */
+struct kexec_buf {
+ struct kimage *image;
+ void *buffer;
+ unsigned long bufsz;
+ unsigned long mem;
+ unsigned long memsz;
+ unsigned long buf_align;
+ unsigned long buf_min;
+ unsigned long buf_max;
+ bool top_down;
+};
+
+int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
+ int (*func)(u64, u64, void *));
+extern int kexec_add_buffer(struct kexec_buf *kbuf);
+int kexec_locate_mem_hole(struct kexec_buf *kbuf);
+#endif /* CONFIG_KEXEC_FILE */
struct kimage {
kimage_entry_t head;
@@ -212,11 +241,6 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
struct kexec_segment __user *segments,
unsigned long flags);
extern int kernel_kexec(void);
-extern int kexec_add_buffer(struct kimage *image, char *buffer,
- unsigned long bufsz, unsigned long memsz,
- unsigned long buf_align, unsigned long buf_min,
- unsigned long buf_max, bool top_down,
- unsigned long *load_addr);
extern struct page *kimage_alloc_control_pages(struct kimage *image,
unsigned int order);
extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 037c321..0c2df7f 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -428,25 +428,65 @@ static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
return locate_mem_hole_bottom_up(start, end, kbuf);
}
-/*
- * Helper function for placing a buffer in a kexec segment. This assumes
- * that kexec_mutex is held.
+/**
+ * arch_kexec_walk_mem - call func(data) on free memory regions
+ * @kbuf: Context info for the search. Also passed to @func.
+ * @func: Function to call for each memory region.
+ *
+ * Return: The memory walk will stop when func returns a non-zero value
+ * and that value will be returned. If all free regions are visited without
+ * func returning non-zero, then zero will be returned.
*/
-int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
- unsigned long memsz, unsigned long buf_align,
- unsigned long buf_min, unsigned long buf_max,
- bool top_down, unsigned long *load_addr)
+int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
+ int (*func)(u64, u64, void *))
+{
+ if (kbuf->image->type == KEXEC_TYPE_CRASH)
+ return walk_iomem_res_desc(crashk_res.desc,
+ IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+ crashk_res.start, crashk_res.end,
+ kbuf, func);
+ else
+ return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
+}
+
+/**
+ * kexec_locate_mem_hole - find free memory for the purgatory or the next kernel
+ * @kbuf: Parameters for the memory search.
+ *
+ * On success, kbuf->mem will have the start address of the memory region found.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+ int ret;
+
+ ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
+
+ return ret == 1 ? 0 : -EADDRNOTAVAIL;
+}
+
+/**
+ * kexec_add_buffer - place a buffer in a kexec segment
+ * @kbuf: Buffer contents and memory parameters.
+ *
+ * This function assumes that kexec_mutex is held.
+ * On successful return, @kbuf->mem will have the physical address of
+ * the buffer in memory.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int kexec_add_buffer(struct kexec_buf *kbuf)
{
struct kexec_segment *ksegment;
- struct kexec_buf buf, *kbuf;
int ret;
/* Currently adding segment this way is allowed only in file mode */
- if (!image->file_mode)
+ if (!kbuf->image->file_mode)
return -EINVAL;
- if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+ if (kbuf->image->nr_segments >= KEXEC_SEGMENT_MAX)
return -EINVAL;
/*
@@ -456,45 +496,27 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
* logic goes through list of segments to make sure there are
* no destination overlaps.
*/
- if (!list_empty(&image->control_pages)) {
+ if (!list_empty(&kbuf->image->control_pages)) {
WARN_ON(1);
return -EINVAL;
}
- memset(&buf, 0, sizeof(struct kexec_buf));
- kbuf = &buf;
- kbuf->image = image;
- kbuf->buffer = buffer;
- kbuf->bufsz = bufsz;
-
- kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
- kbuf->buf_align = max(buf_align, PAGE_SIZE);
- kbuf->buf_min = buf_min;
- kbuf->buf_max = buf_max;
- kbuf->top_down = top_down;
+ /* Ensure minimum alignment needed for segments. */
+ kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
+ kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
/* Walk the RAM ranges and allocate a suitable range for the buffer */
- if (image->type == KEXEC_TYPE_CRASH)
- ret = walk_iomem_res_desc(crashk_res.desc,
- IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
- crashk_res.start, crashk_res.end, kbuf,
- locate_mem_hole_callback);
- else
- ret = walk_system_ram_res(0, -1, kbuf,
- locate_mem_hole_callback);
- if (ret != 1) {
- /* A suitable memory range could not be found for buffer */
- return -EADDRNOTAVAIL;
- }
+ ret = kexec_locate_mem_hole(kbuf);
+ if (ret)
+ return ret;
/* Found a suitable memory range */
- ksegment = &image->segment[image->nr_segments];
+ ksegment = &kbuf->image->segment[kbuf->image->nr_segments];
ksegment->kbuf = kbuf->buffer;
ksegment->bufsz = kbuf->bufsz;
ksegment->mem = kbuf->mem;
ksegment->memsz = kbuf->memsz;
- image->nr_segments++;
- *load_addr = ksegment->mem;
+ kbuf->image->nr_segments++;
return 0;
}
@@ -616,13 +638,15 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
unsigned long max, int top_down)
{
struct purgatory_info *pi = &image->purgatory_info;
- unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
- unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+ unsigned long align, bss_align, bss_sz, bss_pad;
+ unsigned long entry, load_addr, curr_load_addr, bss_addr, offset;
unsigned char *buf_addr, *src;
int i, ret = 0, entry_sidx = -1;
const Elf_Shdr *sechdrs_c;
Elf_Shdr *sechdrs = NULL;
- void *purgatory_buf = NULL;
+ struct kexec_buf kbuf = { .image = image, .bufsz = 0, .buf_align = 1,
+ .buf_min = min, .buf_max = max,
+ .top_down = top_down };
/*
* sechdrs_c points to section headers in purgatory and are read
@@ -688,9 +712,7 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
}
/* Determine how much memory is needed to load relocatable object. */
- buf_align = 1;
bss_align = 1;
- buf_sz = 0;
bss_sz = 0;
for (i = 0; i < pi->ehdr->e_shnum; i++) {
@@ -699,10 +721,10 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
align = sechdrs[i].sh_addralign;
if (sechdrs[i].sh_type != SHT_NOBITS) {
- if (buf_align < align)
- buf_align = align;
- buf_sz = ALIGN(buf_sz, align);
- buf_sz += sechdrs[i].sh_size;
+ if (kbuf.buf_align < align)
+ kbuf.buf_align = align;
+ kbuf.bufsz = ALIGN(kbuf.bufsz, align);
+ kbuf.bufsz += sechdrs[i].sh_size;
} else {
/* bss section */
if (bss_align < align)
@@ -714,32 +736,31 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
/* Determine the bss padding required to align bss properly */
bss_pad = 0;
- if (buf_sz & (bss_align - 1))
- bss_pad = bss_align - (buf_sz & (bss_align - 1));
+ if (kbuf.bufsz & (bss_align - 1))
+ bss_pad = bss_align - (kbuf.bufsz & (bss_align - 1));
- memsz = buf_sz + bss_pad + bss_sz;
+ kbuf.memsz = kbuf.bufsz + bss_pad + bss_sz;
/* Allocate buffer for purgatory */
- purgatory_buf = vzalloc(buf_sz);
- if (!purgatory_buf) {
+ kbuf.buffer = vzalloc(kbuf.bufsz);
+ if (!kbuf.buffer) {
ret = -ENOMEM;
goto out;
}
- if (buf_align < bss_align)
- buf_align = bss_align;
+ if (kbuf.buf_align < bss_align)
+ kbuf.buf_align = bss_align;
/* Add buffer to segment list */
- ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
- buf_align, min, max, top_down,
- &pi->purgatory_load_addr);
+ ret = kexec_add_buffer(&kbuf);
if (ret)
goto out;
+ pi->purgatory_load_addr = kbuf.mem;
/* Load SHF_ALLOC sections */
- buf_addr = purgatory_buf;
+ buf_addr = kbuf.buffer;
load_addr = curr_load_addr = pi->purgatory_load_addr;
- bss_addr = load_addr + buf_sz + bss_pad;
+ bss_addr = load_addr + kbuf.bufsz + bss_pad;
for (i = 0; i < pi->ehdr->e_shnum; i++) {
if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -785,11 +806,11 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
* Used later to identify which section is purgatory and skip it
* from checksumming.
*/
- pi->purgatory_buf = purgatory_buf;
+ pi->purgatory_buf = kbuf.buffer;
return ret;
out:
vfree(sechdrs);
- vfree(purgatory_buf);
+ vfree(kbuf.buffer);
return ret;
}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 0a52315..4cef7e4 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -20,22 +20,6 @@ struct kexec_sha_region {
unsigned long len;
};
-/*
- * Keeps track of buffer parameters as provided by caller for requesting
- * memory placement of buffer.
- */
-struct kexec_buf {
- struct kimage *image;
- char *buffer;
- unsigned long bufsz;
- unsigned long mem;
- unsigned long memsz;
- unsigned long buf_align;
- unsigned long buf_min;
- unsigned long buf_max;
- bool top_down; /* allocate from top of memory hole */
-};
-
void kimage_file_post_load_cleanup(struct kimage *image);
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }