mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Pull KVM updates from Paolo Bonzini:
"ARM:
- Support for userspace handling of synchronous external aborts
(SEAs), allowing the VMM to potentially handle the abort in a
non-fatal manner
- Large rework of the VGIC's list register handling with the goal of
supporting more active/pending IRQs than available list registers
in hardware. In addition, the VGIC now supports EOImode==1 style
deactivations for IRQs which may occur on a separate vCPU than the
one that acked the IRQ
- Support for FEAT_XNX (user / privileged execute permissions) and
FEAT_HAF (hardware update to the Access Flag) in the software page
table walkers and shadow MMU
- Allow page table destruction to reschedule, fixing long
need_resched latencies observed when destroying a large VM
- Minor fixes to KVM and selftests
Loongarch:
- Get VM PMU capability from HW GCFG register
- Add AVEC basic support
- Use 64-bit register definition for EIOINTC
- Add KVM timer test cases for tools/selftests
RISC/V:
- SBI message passing (MPXY) support for KVM guest
- Give a new, more specific error subcode for the case when in-kernel
AIA virtualization fails to allocate IMSIC VS-file
- Support KVM_DIRTY_LOG_INITIALLY_SET, enabling dirty log gradually
in small chunks
- Fix guest page fault within HLV* instructions
- Flush VS-stage TLB after VCPU migration for Andes cores
s390:
- Always allocate ESCA (Extended System Control Area), instead of
starting with the basic SCA and converting to ESCA with the
addition of the 65th vCPU. The price is increased number of exits
(and worse performance) on z10 and earlier processor; ESCA was
introduced by z114/z196 in 2010
- VIRT_XFER_TO_GUEST_WORK support
- Operation exception forwarding support
- Cleanups
x86:
- Skip the costly "zap all SPTEs" on an MMIO generation wrap if MMIO
SPTE caching is disabled, as there can't be any relevant SPTEs to
zap
- Relocate a misplaced export
- Fix an async #PF bug where KVM would clear the completion queue
when the guest transitioned in and out of paging mode, e.g. when
handling an SMI and then returning to paged mode via RSM
- Leave KVM's user-return notifier registered even when disabling
virtualization, as long as kvm.ko is loaded. On reboot/shutdown,
keeping the notifier registered is ok; the kernel does not use the
MSRs and the callback will run cleanly and restore host MSRs if the
CPU manages to return to userspace before the system goes down
- Use the checked version of {get,put}_user()
- Fix a long-lurking bug where KVM's lack of catch-up logic for
periodic APIC timers can result in a hard lockup in the host
- Revert the periodic kvmclock sync logic now that KVM doesn't use a
clocksource that's subject to NTP corrections
- Clean up KVM's handling of MMIO Stale Data and L1TF, and bury the
latter behind CONFIG_CPU_MITIGATIONS
- Context switch XCR0, XSS, and PKRU outside of the entry/exit fast
path; the only reason they were handled in the fast path was to
paper of a bug in the core #MC code, and that has long since been
fixed
- Add emulator support for AVX MOV instructions, to play nice with
emulated devices whose guest drivers like to access PCI BARs with
large multi-byte instructions
x86 (AMD):
- Fix a few missing "VMCB dirty" bugs
- Fix the worst of KVM's lack of EFER.LMSLE emulation
- Add AVIC support for addressing 4k vCPUs in x2AVIC mode
- Fix incorrect handling of selective CR0 writes when checking
intercepts during emulation of L2 instructions
- Fix a currently-benign bug where KVM would clobber SPEC_CTRL[63:32]
on VMRUN and #VMEXIT
- Fix a bug where KVM corrupt the guest code stream when re-injecting
a soft interrupt if the guest patched the underlying code after the
VM-Exit, e.g. when Linux patches code with a temporary INT3
- Add KVM_X86_SNP_POLICY_BITS to advertise supported SNP policy bits
to userspace, and extend KVM "support" to all policy bits that
don't require any actual support from KVM
x86 (Intel):
- Use the root role from kvm_mmu_page to construct EPTPs instead of
the current vCPU state, partly as worthwhile cleanup, but mostly to
pave the way for tracking per-root TLB flushes, and elide EPT
flushes on pCPU migration if the root is clean from a previous
flush
- Add a few missing nested consistency checks
- Rip out support for doing "early" consistency checks via hardware
as the functionality hasn't been used in years and is no longer
useful in general; replace it with an off-by-default module param
to WARN if hardware fails a check that KVM does not perform
- Fix a currently-benign bug where KVM would drop the guest's
SPEC_CTRL[63:32] on VM-Enter
- Misc cleanups
- Overhaul the TDX code to address systemic races where KVM (acting
on behalf of userspace) could inadvertantly trigger lock contention
in the TDX-Module; KVM was either working around these in weird,
ugly ways, or was simply oblivious to them (though even Yan's
devilish selftests could only break individual VMs, not the host
kernel)
- Fix a bug where KVM could corrupt a vCPU's cpu_list when freeing a
TDX vCPU, if creating said vCPU failed partway through
- Fix a few sparse warnings (bad annotation, 0 != NULL)
- Use struct_size() to simplify copying TDX capabilities to userspace
- Fix a bug where TDX would effectively corrupt user-return MSR
values if the TDX Module rejects VP.ENTER and thus doesn't clobber
host MSRs as expected
Selftests:
- Fix a math goof in mmu_stress_test when running on a single-CPU
system/VM
- Forcefully override ARCH from x86_64 to x86 to play nice with
specifying ARCH=x86_64 on the command line
- Extend a bunch of nested VMX to validate nested SVM as well
- Add support for LA57 in the core VM_MODE_xxx macro, and add a test
to verify KVM can save/restore nested VMX state when L1 is using
5-level paging, but L2 is not
- Clean up the guest paging code in anticipation of sharing the core
logic for nested EPT and nested NPT
guest_memfd:
- Add NUMA mempolicy support for guest_memfd, and clean up a variety
of rough edges in guest_memfd along the way
- Define a CLASS to automatically handle get+put when grabbing a
guest_memfd from a memslot to make it harder to leak references
- Enhance KVM selftests to make it easer to develop and debug
selftests like those added for guest_memfd NUMA support, e.g. where
test and/or KVM bugs often result in hard-to-debug SIGBUS errors
- Misc cleanups
Generic:
- Use the recently-added WQ_PERCPU when creating the per-CPU
workqueue for irqfd cleanup
- Fix a goof in the dirty ring documentation
- Fix choice of target for directed yield across different calls to
kvm_vcpu_on_spin(); the function was always starting from the first
vCPU instead of continuing the round-robin search"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (260 commits)
KVM: arm64: at: Update AF on software walk only if VM has FEAT_HAFDBS
KVM: arm64: at: Use correct HA bit in TCR_EL2 when regime is EL2
KVM: arm64: Document KVM_PGTABLE_PROT_{UX,PX}
KVM: arm64: Fix spelling mistake "Unexpeced" -> "Unexpected"
KVM: arm64: Add break to default case in kvm_pgtable_stage2_pte_prot()
KVM: arm64: Add endian casting to kvm_swap_s[12]_desc()
KVM: arm64: Fix compilation when CONFIG_ARM64_USE_LSE_ATOMICS=n
KVM: arm64: selftests: Add test for AT emulation
KVM: arm64: nv: Expose hardware access flag management to NV guests
KVM: arm64: nv: Implement HW access flag management in stage-2 SW PTW
KVM: arm64: Implement HW access flag management in stage-1 SW PTW
KVM: arm64: Propagate PTW errors up to AT emulation
KVM: arm64: Add helper for swapping guest descriptor
KVM: arm64: nv: Use pgtable definitions in stage-2 walk
KVM: arm64: Handle endianness in read helper for emulated PTW
KVM: arm64: nv: Stop passing vCPU through void ptr in S2 PTW
KVM: arm64: Call helper for reading descriptors directly
KVM: arm64: nv: Advertise support for FEAT_XNX
KVM: arm64: Teach ptdump about FEAT_XNX permissions
KVM: s390: Use generic VIRT_XFER_TO_GUEST_WORK functions
...
615 lines
18 KiB
ArmAsm
615 lines
18 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* S390 low-level entry points.
|
|
*
|
|
* Copyright IBM Corp. 1999, 2012
|
|
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
|
|
* Hartmut Penner (hp@de.ibm.com),
|
|
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
|
|
*/
|
|
|
|
#include <linux/export.h>
|
|
#include <linux/init.h>
|
|
#include <linux/linkage.h>
|
|
#include <asm/asm-extable.h>
|
|
#include <asm/alternative.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/dwarf.h>
|
|
#include <asm/errno.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/unistd.h>
|
|
#include <asm/page.h>
|
|
#include <asm/sigp.h>
|
|
#include <asm/irq.h>
|
|
#include <asm/fpu-insn.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/nmi.h>
|
|
#include <asm/nospec-insn.h>
|
|
#include <asm/lowcore.h>
|
|
#include <asm/machine.h>
|
|
|
|
_LPP_OFFSET = __LC_LPP
|
|
|
|
.macro STBEAR address
|
|
ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
|
|
.endm
|
|
|
|
.macro LBEAR address
|
|
ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
|
|
.endm
|
|
|
|
.macro LPSWEY address, lpswe
|
|
ALTERNATIVE_2 "b \lpswe;nopr", \
|
|
".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \
|
|
__stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
|
|
ALT_FEATURE(MFEATURE_LOWCORE)
|
|
.endm
|
|
|
|
.macro MBEAR reg, lowcore
|
|
ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
|
|
ALT_FACILITY(193)
|
|
.endm
|
|
|
|
.macro CHECK_VMAP_STACK savearea, lowcore, oklabel
|
|
lgr %r14,%r15
|
|
nill %r14,0x10000 - THREAD_SIZE
|
|
oill %r14,STACK_INIT_OFFSET
|
|
clg %r14,__LC_KERNEL_STACK(\lowcore)
|
|
je \oklabel
|
|
clg %r14,__LC_ASYNC_STACK(\lowcore)
|
|
je \oklabel
|
|
clg %r14,__LC_MCCK_STACK(\lowcore)
|
|
je \oklabel
|
|
clg %r14,__LC_NODAT_STACK(\lowcore)
|
|
je \oklabel
|
|
clg %r14,__LC_RESTART_STACK(\lowcore)
|
|
je \oklabel
|
|
la %r14,\savearea(\lowcore)
|
|
j stack_invalid
|
|
.endm
|
|
|
|
/*
|
|
* The TSTMSK macro generates a test-under-mask instruction by
|
|
* calculating the memory offset for the specified mask value.
|
|
* Mask value can be any constant. The macro shifts the mask
|
|
* value to calculate the memory offset for the test-under-mask
|
|
* instruction.
|
|
*/
|
|
.macro TSTMSK addr, mask, size=8, bytepos=0
|
|
.if (\bytepos < \size) && (\mask >> 8)
|
|
.if (\mask & 0xff)
|
|
.error "Mask exceeds byte boundary"
|
|
.endif
|
|
TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
|
|
.exitm
|
|
.endif
|
|
.ifeq \mask
|
|
.error "Mask must not be zero"
|
|
.endif
|
|
off = \size - \bytepos - 1
|
|
tm off+\addr, \mask
|
|
.endm
|
|
|
|
.macro BPOFF
|
|
ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
|
|
.endm
|
|
|
|
.macro BPON
|
|
ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
|
|
.endm
|
|
|
|
.macro BPENTER tif_ptr,tif_mask
|
|
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
|
|
"j .+12; nop; nop", ALT_SPEC(82)
|
|
.endm
|
|
|
|
.macro BPEXIT tif_ptr,tif_mask
|
|
TSTMSK \tif_ptr,\tif_mask
|
|
ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
|
|
"jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
|
|
.endm
|
|
|
|
#if IS_ENABLED(CONFIG_KVM)
|
|
.macro SIEEXIT sie_control,lowcore
|
|
lg %r9,\sie_control # get control block pointer
|
|
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
|
|
lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce
|
|
lg %r9,__LC_CURRENT(\lowcore)
|
|
mvi __TI_sie(%r9),0
|
|
larl %r9,sie_exit # skip forward to sie_exit
|
|
.endm
|
|
#endif
|
|
|
|
.macro STACKLEAK_ERASE
|
|
#ifdef CONFIG_KSTACK_ERASE
|
|
brasl %r14,stackleak_erase_on_task_stack
|
|
#endif
|
|
.endm
|
|
|
|
GEN_BR_THUNK %r14
|
|
|
|
.section .kprobes.text, "ax"
|
|
.Ldummy:
|
|
/*
|
|
* The following nop exists only in order to avoid that the next
|
|
* symbol starts at the beginning of the kprobes text section.
|
|
* In that case there would be several symbols at the same address.
|
|
* E.g. objdump would take an arbitrary symbol when disassembling
|
|
* the code.
|
|
* With the added nop in between this cannot happen.
|
|
*/
|
|
nop 0
|
|
|
|
/*
|
|
* Scheduler resume function, called by __switch_to
|
|
* gpr2 = (task_struct *)prev
|
|
* gpr3 = (task_struct *)next
|
|
* Returns:
|
|
* gpr2 = prev
|
|
*/
|
|
SYM_FUNC_START(__switch_to_asm)
|
|
stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
|
|
lghi %r4,__TASK_stack
|
|
lghi %r1,__TASK_thread
|
|
llill %r5,STACK_INIT_OFFSET
|
|
stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
|
|
lg %r15,0(%r4,%r3) # start of kernel stack of next
|
|
agr %r15,%r5 # end of kernel stack of next
|
|
GET_LC %r13
|
|
stg %r3,__LC_CURRENT(%r13) # store task struct of next
|
|
stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack
|
|
lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
|
|
lay %r4,__TASK_pid(%r3)
|
|
mvc __LC_CURRENT_PID(4,%r13),0(%r4) # store pid of next
|
|
ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
|
|
#ifdef CONFIG_STACKPROTECTOR
|
|
lg %r3,__TASK_stack_canary(%r3)
|
|
stg %r3,__LC_STACK_CANARY(%r13)
|
|
#endif
|
|
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
|
|
BR_EX %r14
|
|
SYM_FUNC_END(__switch_to_asm)
|
|
|
|
#if IS_ENABLED(CONFIG_KVM)
|
|
/*
|
|
* __sie64a calling convention:
|
|
* %r2 pointer to sie control block phys
|
|
* %r3 pointer to sie control block virt
|
|
* %r4 guest register save area
|
|
* %r5 guest asce
|
|
*/
|
|
SYM_FUNC_START(__sie64a)
|
|
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
|
|
GET_LC %r13
|
|
lg %r14,__LC_CURRENT(%r13)
|
|
stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
|
|
stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
|
|
stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
|
|
stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
|
|
xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
|
|
mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
|
|
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
|
|
mvi __TI_sie(%r14),1
|
|
stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts
|
|
lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
|
|
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
|
|
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
|
|
tm __SIE_PROG20+3(%r14),3 # last exit...
|
|
jnz .Lsie_skip
|
|
lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr
|
|
BPEXIT __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
|
|
.Lsie_entry:
|
|
sie 0(%r14)
|
|
# Let the next instruction be NOP to avoid triggering a machine check
|
|
# and handling it in a guest as result of the instruction execution.
|
|
nopr 7
|
|
.Lsie_leave:
|
|
BPOFF
|
|
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
|
|
.Lsie_skip:
|
|
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
|
|
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
|
|
GET_LC %r14
|
|
lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce
|
|
lg %r14,__LC_CURRENT(%r14)
|
|
mvi __TI_sie(%r14),0
|
|
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
|
|
stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts
|
|
lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
|
|
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
|
|
xgr %r0,%r0 # clear guest registers to
|
|
xgr %r1,%r1 # prevent speculative use
|
|
xgr %r3,%r3
|
|
xgr %r4,%r4
|
|
xgr %r5,%r5
|
|
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
|
|
lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
|
|
BR_EX %r14
|
|
SYM_FUNC_END(__sie64a)
|
|
EXPORT_SYMBOL(__sie64a)
|
|
EXPORT_SYMBOL(sie_exit)
|
|
#endif
|
|
|
|
/*
|
|
* SVC interrupt handler routine. System calls are synchronous events and
|
|
* are entered with interrupts disabled.
|
|
*/
|
|
|
|
SYM_CODE_START(system_call)
|
|
STMG_LC %r8,%r15,__LC_SAVE_AREA
|
|
GET_LC %r13
|
|
stpt __LC_SYS_ENTER_TIMER(%r13)
|
|
BPOFF
|
|
lghi %r14,0
|
|
.Lsysc_per:
|
|
STBEAR __LC_LAST_BREAK(%r13)
|
|
lg %r15,__LC_KERNEL_STACK(%r13)
|
|
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
|
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
|
|
# clear user controlled register to prevent speculative use
|
|
xgr %r0,%r0
|
|
xgr %r1,%r1
|
|
xgr %r4,%r4
|
|
xgr %r5,%r5
|
|
xgr %r6,%r6
|
|
xgr %r7,%r7
|
|
xgr %r8,%r8
|
|
xgr %r9,%r9
|
|
xgr %r10,%r10
|
|
xgr %r11,%r11
|
|
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
|
|
mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
|
|
MBEAR %r2,%r13
|
|
lgr %r3,%r14
|
|
brasl %r14,__do_syscall
|
|
STACKLEAK_ERASE
|
|
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
|
|
BPON
|
|
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
|
|
stpt __LC_EXIT_TIMER(%r13)
|
|
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
|
|
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
|
|
SYM_CODE_END(system_call)
|
|
|
|
#
|
|
# a new process exits the kernel with ret_from_fork
|
|
#
|
|
SYM_CODE_START(ret_from_fork)
|
|
lgr %r3,%r11
|
|
brasl %r14,__ret_from_fork
|
|
STACKLEAK_ERASE
|
|
GET_LC %r13
|
|
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
|
|
BPON
|
|
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
|
|
stpt __LC_EXIT_TIMER(%r13)
|
|
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
|
|
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
|
|
SYM_CODE_END(ret_from_fork)
|
|
|
|
/*
|
|
* Program check handler routine
|
|
*/
|
|
|
|
SYM_CODE_START(pgm_check_handler)
|
|
STMG_LC %r8,%r15,__LC_SAVE_AREA
|
|
GET_LC %r13
|
|
stpt __LC_SYS_ENTER_TIMER(%r13)
|
|
BPOFF
|
|
lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
|
|
xgr %r10,%r10
|
|
tmhh %r8,0x0001 # coming from user space?
|
|
jo 3f # -> fault in user space
|
|
#if IS_ENABLED(CONFIG_KVM)
|
|
lg %r11,__LC_CURRENT(%r13)
|
|
tm __TI_sie(%r11),0xff
|
|
jz 1f
|
|
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
|
|
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
|
|
lghi %r10,_PIF_GUEST_FAULT
|
|
#endif
|
|
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
|
|
jnz 2f # -> enabled, can't be a double fault
|
|
tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
|
|
jnz .Lpgm_svcper # -> single stepped svc
|
|
2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
|
|
# CHECK_VMAP_STACK branches to stack_invalid or 4f
|
|
CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
|
|
3: lg %r15,__LC_KERNEL_STACK(%r13)
|
|
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
|
|
stg %r10,__PT_FLAGS(%r11)
|
|
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
|
stmg %r0,%r7,__PT_R0(%r11)
|
|
mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
|
|
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
|
|
stmg %r8,%r9,__PT_PSW(%r11)
|
|
# clear user controlled registers to prevent speculative use
|
|
xgr %r0,%r0
|
|
xgr %r1,%r1
|
|
xgr %r3,%r3
|
|
xgr %r4,%r4
|
|
xgr %r5,%r5
|
|
xgr %r6,%r6
|
|
xgr %r7,%r7
|
|
xgr %r12,%r12
|
|
lgr %r2,%r11
|
|
brasl %r14,__do_pgm_check
|
|
tmhh %r8,0x0001 # returning to user space?
|
|
jno .Lpgm_exit_kernel
|
|
STACKLEAK_ERASE
|
|
BPON
|
|
stpt __LC_EXIT_TIMER(%r13)
|
|
.Lpgm_exit_kernel:
|
|
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
|
|
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
|
|
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
|
|
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
|
|
|
|
#
|
|
# single stepped system call
|
|
#
|
|
.Lpgm_svcper:
|
|
mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
|
|
larl %r14,.Lsysc_per
|
|
stg %r14,__LC_RETURN_PSW+8(%r13)
|
|
lghi %r14,1
|
|
LBEAR __LC_PGM_LAST_BREAK(%r13)
|
|
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
|
|
SYM_CODE_END(pgm_check_handler)
|
|
|
|
/*
|
|
* Interrupt handler macro used for external and IO interrupts.
|
|
*/
|
|
.macro INT_HANDLER name,lc_old_psw,handler
|
|
SYM_CODE_START(\name)
|
|
STMG_LC %r8,%r15,__LC_SAVE_AREA
|
|
GET_LC %r13
|
|
stckf __LC_INT_CLOCK(%r13)
|
|
stpt __LC_SYS_ENTER_TIMER(%r13)
|
|
STBEAR __LC_LAST_BREAK(%r13)
|
|
BPOFF
|
|
lmg %r8,%r9,\lc_old_psw(%r13)
|
|
tmhh %r8,0x0001 # interrupting from user ?
|
|
jnz 1f
|
|
#if IS_ENABLED(CONFIG_KVM)
|
|
lg %r10,__LC_CURRENT(%r13)
|
|
tm __TI_sie(%r10),0xff
|
|
jz 0f
|
|
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
|
|
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
|
|
#endif
|
|
0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
|
|
j 2f
|
|
1: lg %r15,__LC_KERNEL_STACK(%r13)
|
|
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
|
la %r11,STACK_FRAME_OVERHEAD(%r15)
|
|
stmg %r0,%r7,__PT_R0(%r11)
|
|
# clear user controlled registers to prevent speculative use
|
|
xgr %r0,%r0
|
|
xgr %r1,%r1
|
|
xgr %r3,%r3
|
|
xgr %r4,%r4
|
|
xgr %r5,%r5
|
|
xgr %r6,%r6
|
|
xgr %r7,%r7
|
|
xgr %r10,%r10
|
|
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
|
|
mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
|
|
MBEAR %r11,%r13
|
|
stmg %r8,%r9,__PT_PSW(%r11)
|
|
lgr %r2,%r11 # pass pointer to pt_regs
|
|
brasl %r14,\handler
|
|
mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
|
|
tmhh %r8,0x0001 # returning to user ?
|
|
jno 2f
|
|
STACKLEAK_ERASE
|
|
BPON
|
|
stpt __LC_EXIT_TIMER(%r13)
|
|
2: LBEAR __PT_LAST_BREAK(%r11)
|
|
lmg %r0,%r15,__PT_R0(%r11)
|
|
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
|
|
SYM_CODE_END(\name)
|
|
.endm
|
|
|
|
.section .irqentry.text, "ax"
|
|
|
|
INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
|
|
INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
|
|
|
|
.section .kprobes.text, "ax"
|
|
|
|
/*
|
|
* Machine check handler routines
|
|
*/
|
|
SYM_CODE_START(mcck_int_handler)
|
|
BPOFF
|
|
GET_LC %r13
|
|
lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13)
|
|
TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
|
|
jo .Lmcck_panic # yes -> rest of mcck code invalid
|
|
TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
|
|
jno .Lmcck_panic # control registers invalid -> panic
|
|
ptlb
|
|
lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
|
|
mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
|
|
TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
|
|
jo 3f
|
|
la %r14,__LC_SYS_ENTER_TIMER(%r13)
|
|
clc 0(8,%r14),__LC_EXIT_TIMER(%r13)
|
|
jl 1f
|
|
la %r14,__LC_EXIT_TIMER(%r13)
|
|
1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
|
|
jl 2f
|
|
la %r14,__LC_LAST_UPDATE_TIMER(%r13)
|
|
2: spt 0(%r14)
|
|
mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
|
|
3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
|
|
jno .Lmcck_panic
|
|
tmhh %r8,0x0001 # interrupting from user ?
|
|
jnz .Lmcck_user
|
|
TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
|
|
jno .Lmcck_panic
|
|
#if IS_ENABLED(CONFIG_KVM)
|
|
lg %r10,__LC_CURRENT(%r13)
|
|
tm __TI_sie(%r10),0xff
|
|
jz .Lmcck_user
|
|
# Need to compare the address instead of __TI_SIE flag.
|
|
# Otherwise there would be a race between setting the flag
|
|
# and entering SIE (or leaving and clearing the flag). This
|
|
# would cause machine checks targeted at the guest to be
|
|
# handled by the host.
|
|
larl %r14,.Lsie_entry
|
|
clgrjl %r9,%r14, 4f
|
|
larl %r14,.Lsie_leave
|
|
clgrjhe %r9,%r14, 4f
|
|
lg %r10,__LC_PCPU(%r13)
|
|
oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
|
|
4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
|
|
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
|
|
#endif
|
|
.Lmcck_user:
|
|
lg %r15,__LC_MCCK_STACK(%r13)
|
|
la %r11,STACK_FRAME_OVERHEAD(%r15)
|
|
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
|
lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
|
|
mvc __PT_R0(128,%r11),0(%r14)
|
|
# clear user controlled registers to prevent speculative use
|
|
xgr %r0,%r0
|
|
xgr %r1,%r1
|
|
xgr %r3,%r3
|
|
xgr %r4,%r4
|
|
xgr %r5,%r5
|
|
xgr %r6,%r6
|
|
xgr %r7,%r7
|
|
xgr %r10,%r10
|
|
stmg %r8,%r9,__PT_PSW(%r11)
|
|
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
|
|
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
|
lgr %r2,%r11 # pass pointer to pt_regs
|
|
brasl %r14,s390_do_machine_check
|
|
lmg %r0,%r10,__PT_R0(%r11)
|
|
mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
|
|
tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
|
|
jno 0f
|
|
BPON
|
|
stpt __LC_EXIT_TIMER(%r13)
|
|
0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
|
|
ALT_FACILITY(193)
|
|
LBEAR 0(%r12)
|
|
lmg %r11,%r15,__PT_R11(%r11)
|
|
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
|
|
|
|
.Lmcck_panic:
|
|
/*
|
|
* Iterate over all possible CPU addresses in the range 0..0xffff
|
|
* and stop each CPU using signal processor. Use compare and swap
|
|
* to allow just one CPU-stopper and prevent concurrent CPUs from
|
|
* stopping each other while leaving the others running.
|
|
*/
|
|
lhi %r5,0
|
|
lhi %r6,1
|
|
larl %r7,stop_lock
|
|
cs %r5,%r6,0(%r7) # single CPU-stopper only
|
|
jnz 4f
|
|
larl %r7,this_cpu
|
|
stap 0(%r7) # this CPU address
|
|
lh %r4,0(%r7)
|
|
nilh %r4,0
|
|
lhi %r0,1
|
|
sll %r0,16 # CPU counter
|
|
lhi %r3,0 # next CPU address
|
|
0: cr %r3,%r4
|
|
je 2f
|
|
1: sigp %r1,%r3,SIGP_STOP # stop next CPU
|
|
brc SIGP_CC_BUSY,1b
|
|
2: ahi %r3,1
|
|
brct %r0,0b
|
|
3: sigp %r1,%r4,SIGP_STOP # stop this CPU
|
|
brc SIGP_CC_BUSY,3b
|
|
4: j 4b
|
|
SYM_CODE_END(mcck_int_handler)
|
|
|
|
SYM_CODE_START(restart_int_handler)
|
|
ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
|
|
stg %r15,__LC_SAVE_AREA_RESTART
|
|
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
|
|
jz 0f
|
|
lctlg %c0,%c15,__LC_CREGS_SAVE_AREA
|
|
0: larl %r15,daton_psw
|
|
lpswe 0(%r15) # turn dat on, keep irqs off
|
|
.Ldaton:
|
|
GET_LC %r15
|
|
lg %r15,__LC_RESTART_STACK(%r15)
|
|
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
|
|
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
|
|
GET_LC %r13
|
|
mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
|
|
mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
|
|
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
|
|
lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu
|
|
lg %r2,__LC_RESTART_DATA(%r13)
|
|
lgf %r3,__LC_RESTART_SOURCE(%r13)
|
|
ltgr %r3,%r3 # test source cpu address
|
|
jm 1f # negative -> skip source stop
|
|
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
|
|
brc 10,0b # wait for status stored
|
|
1: basr %r14,%r1 # call function
|
|
stap __SF_EMPTY(%r15) # store cpu address
|
|
llgh %r3,__SF_EMPTY(%r15)
|
|
2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
|
|
brc 2,2b
|
|
3: j 3b
|
|
SYM_CODE_END(restart_int_handler)
|
|
|
|
__INIT
|
|
SYM_CODE_START(early_pgm_check_handler)
|
|
STMG_LC %r8,%r15,__LC_SAVE_AREA
|
|
GET_LC %r13
|
|
aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
|
|
la %r11,STACK_FRAME_OVERHEAD(%r15)
|
|
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
|
stmg %r0,%r7,__PT_R0(%r11)
|
|
mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13)
|
|
mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
|
|
lgr %r2,%r11
|
|
brasl %r14,__do_early_pgm_check
|
|
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
|
|
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
|
|
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
|
|
SYM_CODE_END(early_pgm_check_handler)
|
|
__FINIT
|
|
|
|
.section .kprobes.text, "ax"
|
|
|
|
/*
|
|
* The synchronous or the asynchronous stack pointer is invalid. We are dead.
|
|
* No need to properly save the registers, we are going to panic anyway.
|
|
* Setup a pt_regs so that show_trace can provide a good call trace.
|
|
*/
|
|
SYM_CODE_START(stack_invalid)
|
|
GET_LC %r15
|
|
lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
|
|
la %r11,STACK_FRAME_OVERHEAD(%r15)
|
|
stmg %r0,%r7,__PT_R0(%r11)
|
|
stmg %r8,%r9,__PT_PSW(%r11)
|
|
mvc __PT_R8(64,%r11),0(%r14)
|
|
GET_LC %r2
|
|
mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2)
|
|
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
|
lgr %r2,%r11 # pass pointer to pt_regs
|
|
jg kernel_stack_invalid
|
|
SYM_CODE_END(stack_invalid)
|
|
|
|
.section .data, "aw"
|
|
.balign 4
|
|
SYM_DATA_LOCAL(stop_lock, .long 0)
|
|
SYM_DATA_LOCAL(this_cpu, .short 0)
|
|
.balign 8
|
|
SYM_DATA_START_LOCAL(daton_psw)
|
|
.quad PSW_KERNEL_BITS
|
|
.quad .Ldaton
|
|
SYM_DATA_END(daton_psw)
|