From 231fb999a9acd17b1335e79f0fd6fc627353a6bc Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Nov 2025 21:19:10 -0700 Subject: [PATCH 01/18] RISC-V: Enable HOTPLUG_PARALLEL for secondary CPUs The core kernel already supports parallel bringup of secondary CPUs (aka HOTPLUG_PARALLEL). The x86 and MIPS architectures already use HOTPLUG_PARALLEL and ARM is also moving toward it. On RISC-V, there is no arch specific global data accessed in the RISC-V secondary CPU bringup path so enabling HOTPLUG_PARALLEL for RISC-V would only require: 1) Providing RISC-V specific arch_cpuhp_kick_ap_alive() 2) Calling cpuhp_ap_sync_alive() from smp_callin() This patch is tested natively with OpenSBI on QEMU RV64 virt machine with 64 cores and also tested with KVM RISC-V guest with 32 VCPUs. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://patch.msgid.link/20250905122512.71684-1-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/Kconfig | 2 +- arch/riscv/kernel/smpboot.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fadec20b87a8..34c68bb3b857 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -198,7 +198,7 @@ config RISCV select HAVE_SAMPLE_FTRACE_DIRECT_MULTI select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS - select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU + select HOTPLUG_PARALLEL if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 601a321e0f17..d85916a3660c 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -39,7 +39,9 @@ #include "head.h" +#ifndef CONFIG_HOTPLUG_PARALLEL static DECLARE_COMPLETION(cpu_running); +#endif void __init smp_prepare_cpus(unsigned int max_cpus) { @@ -179,6 +181,12 @@ static int start_secondary_cpu(int cpu, struct task_struct *tidle) return -EOPNOTSUPP; } +#ifdef CONFIG_HOTPLUG_PARALLEL +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle) +{ + return start_secondary_cpu(cpu, tidle); +} +#else int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int ret = 0; @@ -199,6 +207,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) return ret; } +#endif void __init smp_cpus_done(unsigned int max_cpus) { @@ -225,6 +234,10 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; +#ifdef CONFIG_HOTPLUG_PARALLEL + cpuhp_ap_sync_alive(); +#endif + store_cpu_topology(curr_cpuid); notify_cpu_starting(curr_cpuid); @@ -243,7 +256,9 @@ asmlinkage __visible void smp_callin(void) */ local_flush_icache_all(); local_flush_tlb_all(); +#ifndef CONFIG_HOTPLUG_PARALLEL complete(&cpu_running); +#endif /* * Disable preemption before enabling interrupts, so we don't try to * schedule a CPU that hasn't actually started yet. From c9a71610448843b6178375547627b2272f0d6193 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 17 Nov 2025 21:19:14 -0700 Subject: [PATCH 02/18] riscv: Remove __GFP_HIGHMEM masking Remove unnecessary __GFP_HIGHMEM masking, which was introduced with commit 380f2c1ae9d4 ("riscv: convert alloc_{pmd, pte}_late() to use ptdescs"). GFP_KERNEL doesn't contain __GFP_HIGHMEM. Signed-off-by: Vishal Moola (Oracle) Link: https://patch.msgid.link/20251107182620.95844-1-vishal.moola@gmail.com Signed-off-by: Paul Walmsley --- arch/riscv/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d85efe74a4b6..addb8a9305be 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -441,7 +441,7 @@ static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) static phys_addr_t __meminit alloc_pte_late(uintptr_t va) { - struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); /* * We do not know which mm the PTE page is associated to at this point. @@ -526,7 +526,7 @@ static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) static phys_addr_t __meminit alloc_pmd_late(uintptr_t va) { - struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); /* See comment in alloc_pte_late() regarding NULL passed the ctor */ BUG_ON(!ptdesc || !pagetable_pmd_ctor(NULL, ptdesc)); From 0a067ae21bf26e245f2687ef24cfb7302393a8a0 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 17 Nov 2025 21:19:21 -0700 Subject: [PATCH 03/18] riscv: pgtable: Use riscv_has_extension_unlikely Use riscv_has_extension_unlikely() to check for RISCV_ISA_EXT_SVVPTC, replacing the use of asm goto with ALTERNATIVE. The "unlikely" variant is used to match the behavior of the original implementation using ALTERNATIVE("nop", "j %l[svvptc]", ...). Note that this makes the check for RISCV_ISA_EXT_SVVPTC a runtime one if RISCV_ALTERNATIVE=n, but it should still be worthwhile to do so given that TLB flushes are relatively slow. Signed-off-by: Vivian Wang Link: https://patch.msgid.link/20251020-riscv-altn-helper-wip-v4-1-ef941c87669a@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable.h | 15 +++++++-------- arch/riscv/mm/pgtable.c | 22 ++++++++++------------ 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 5a08eb5fe99f..45b2021eb2c1 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -496,8 +496,13 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, unsigned int nr) { - asm goto(ALTERNATIVE("nop", "j %l[svvptc]", 0, RISCV_ISA_EXT_SVVPTC, 1) - : : : : svvptc); + /* + * Svvptc guarantees that the new valid pte will be visible within + * a bounded timeframe, so when the uarch does not cache invalid + * entries, we don't have to do anything. + */ + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SVVPTC)) + return; /* * The kernel assumes that TLBs don't cache invalid entries, but @@ -509,12 +514,6 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, while (nr--) local_flush_tlb_page(address + nr * PAGE_SIZE); -svvptc:; - /* - * Svvptc guarantees that the new valid pte will be visible within - * a bounded timeframe, so when the uarch does not cache invalid - * entries, we don't have to do anything. - */ } #define update_mmu_cache(vma, addr, ptep) \ update_mmu_cache_range(NULL, vma, addr, ptep, 1) diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 8b6c0a112a8d..807c0a0de182 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -9,8 +9,16 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) { - asm goto(ALTERNATIVE("nop", "j %l[svvptc]", 0, RISCV_ISA_EXT_SVVPTC, 1) - : : : : svvptc); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SVVPTC)) { + if (!pte_same(ptep_get(ptep), entry)) { + __set_pte_at(vma->vm_mm, ptep, entry); + /* Here only not svadu is impacted */ + flush_tlb_page(vma, address); + return true; + } + + return false; + } if (!pte_same(ptep_get(ptep), entry)) __set_pte_at(vma->vm_mm, ptep, entry); @@ -19,16 +27,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma, * the case that the PTE changed and the spurious fault case. */ return true; - -svvptc: - if (!pte_same(ptep_get(ptep), entry)) { - __set_pte_at(vma->vm_mm, ptep, entry); - /* Here only not svadu is impacted */ - flush_tlb_page(vma, address); - return true; - } - - return false; } int ptep_test_and_clear_young(struct vm_area_struct *vma, From 1c7d491d8678ee056ee91de5cb14c9afa50175cb Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 17 Nov 2025 21:19:21 -0700 Subject: [PATCH 04/18] riscv: checksum: Use riscv_has_extension_likely Use riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB, replacing the use of asm goto with ALTERNATIVE. The "likely" variant is used to match the behavior of the original implementation using ALTERNATIVE("j %l[no_zbb]", "nop", ...). While we're at it, also remove bogus comment about Zbb being likely available. We have to choose between "likely" and "unlikely" due to limitations of the asm goto feature, but that does not mean we should put a bad comment on why we pick "likely" over "unlikely". Signed-off-by: Vivian Wang Link: https://patch.msgid.link/20251020-riscv-altn-helper-wip-v4-2-ef941c87669a@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/checksum.h | 13 +++----- arch/riscv/lib/csum.c | 53 +++++++------------------------ 2 files changed, 16 insertions(+), 50 deletions(-) diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h index da378856f1d5..945cce34be92 100644 --- a/arch/riscv/include/asm/checksum.h +++ b/arch/riscv/include/asm/checksum.h @@ -49,16 +49,11 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * ZBB only saves three instructions on 32-bit and five on 64-bit so not * worth checking if supported without Alternatives. */ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) && + riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) { unsigned long fold_temp; - asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : - : - : - : no_zbb); - if (IS_ENABLED(CONFIG_32BIT)) { asm(".option push \n\ .option arch,+zbb \n\ @@ -81,7 +76,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } return (__force __sum16)(csum >> 16); } -no_zbb: + #ifndef CONFIG_32BIT csum += ror64(csum, 32); csum >>= 32; diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c index 9408f50ca59a..75bd0abffd63 100644 --- a/arch/riscv/lib/csum.c +++ b/arch/riscv/lib/csum.c @@ -40,20 +40,11 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, uproto = (__force unsigned int)htonl(proto); sum += uproto; - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) && + riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) { unsigned long fold_temp; - /* - * Zbb is likely available when the kernel is compiled with Zbb - * support, so nop when Zbb is available and jump when Zbb is - * not available. - */ - asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : - : - : - : no_zbb); asm(".option push \n\ .option arch,+zbb \n\ rori %[fold_temp], %[sum], 32 \n\ @@ -66,7 +57,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, : [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp)); return (__force __sum16)(sum >> 16); } -no_zbb: + sum += ror64(sum, 32); sum >>= 32; return csum_fold((__force __wsum)sum); @@ -152,21 +143,11 @@ do_csum_with_alignment(const unsigned char *buff, int len) csum = do_csum_common(ptr, end, data); #ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) && + riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) { unsigned long fold_temp; - /* - * Zbb is likely available when the kernel is compiled with Zbb - * support, so nop when Zbb is available and jump when Zbb is - * not available. - */ - asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : - : - : - : no_zbb); - #ifdef CONFIG_32BIT asm_goto_output(".option push \n\ .option arch,+zbb \n\ @@ -204,7 +185,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) end: return csum >> 16; } -no_zbb: + #endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */ #ifndef CONFIG_32BIT csum += ror64(csum, 32); @@ -234,21 +215,11 @@ do_csum_no_alignment(const unsigned char *buff, int len) end = (const unsigned long *)(buff + len); csum = do_csum_common(ptr, end, data); - if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) { + if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) && + riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) { unsigned long fold_temp; - /* - * Zbb is likely available when the kernel is compiled with Zbb - * support, so nop when Zbb is available and jump when Zbb is - * not available. - */ - asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : - : - : - : no_zbb); - #ifdef CONFIG_32BIT asm (".option push \n\ .option arch,+zbb \n\ @@ -274,7 +245,7 @@ do_csum_no_alignment(const unsigned char *buff, int len) #endif /* !CONFIG_32BIT */ return csum >> 16; } -no_zbb: + #ifndef CONFIG_32BIT csum += ror64(csum, 32); csum >>= 32; From 8261a9d167e8b3007c0e830a1f7ab8c8b8c24b79 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 17 Nov 2025 21:19:22 -0700 Subject: [PATCH 05/18] riscv: hweight: Use riscv_has_extension_likely Use riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB, replacing the use of asm goto with ALTERNATIVE. The "likely" variant is used to match the behavior of the original implementation using ALTERNATIVE("j %l[legacy]", "nop", ...). Signed-off-by: Vivian Wang Link: https://patch.msgid.link/20251020-riscv-altn-helper-wip-v4-3-ef941c87669a@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/arch_hweight.h | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h index 0e7cdbbec8ef..f3c0831beefc 100644 --- a/arch/riscv/include/asm/arch_hweight.h +++ b/arch/riscv/include/asm/arch_hweight.h @@ -19,10 +19,10 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { -#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : : : : legacy); + if (!(IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) && + riscv_has_extension_likely(RISCV_ISA_EXT_ZBB))) + return __sw_hweight32(w); asm (".option push\n" ".option arch,+zbb\n" @@ -31,10 +31,6 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) : "=r" (w) : "r" (w) :); return w; - -legacy: -#endif - return __sw_hweight32(w); } static inline unsigned int __arch_hweight16(unsigned int w) @@ -50,10 +46,10 @@ static inline unsigned int __arch_hweight8(unsigned int w) #if BITS_PER_LONG == 64 static __always_inline unsigned long __arch_hweight64(__u64 w) { -#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : : : : legacy); + if (!(IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && + IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) && + riscv_has_extension_likely(RISCV_ISA_EXT_ZBB))) + return __sw_hweight64(w); asm (".option push\n" ".option arch,+zbb\n" @@ -62,10 +58,6 @@ static __always_inline unsigned long __arch_hweight64(__u64 w) : "=r" (w) : "r" (w) :); return w; - -legacy: -#endif - return __sw_hweight64(w); } #else /* BITS_PER_LONG == 64 */ static inline unsigned long __arch_hweight64(__u64 w) From 6b85e9ac4a25c5e69f8121e95eea14da6e3df2fa Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 17 Nov 2025 21:19:22 -0700 Subject: [PATCH 06/18] riscv: bitops: Use riscv_has_extension_likely Use riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZBB, replacing the use of asm goto with ALTERNATIVE. The "likely" variant is used to match the behavior of the original implementation using ALTERNATIVE("j %l[legacy]", "nop", ...). Signed-off-by: Vivian Wang Link: https://patch.msgid.link/20251020-riscv-altn-helper-wip-v4-4-ef941c87669a@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/bitops.h | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 77880677b06e..238092125c11 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -47,9 +47,8 @@ static __always_inline __attribute_const__ unsigned long variable__ffs(unsigned long word) { - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : : : : legacy); + if (!riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) + return generic___ffs(word); asm volatile (".option push\n" ".option arch,+zbb\n" @@ -58,9 +57,6 @@ static __always_inline __attribute_const__ unsigned long variable__ffs(unsigned : "=r" (word) : "r" (word) :); return word; - -legacy: - return generic___ffs(word); } /** @@ -76,9 +72,8 @@ legacy: static __always_inline __attribute_const__ unsigned long variable__fls(unsigned long word) { - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : : : : legacy); + if (!riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) + return generic___fls(word); asm volatile (".option push\n" ".option arch,+zbb\n" @@ -87,9 +82,6 @@ static __always_inline __attribute_const__ unsigned long variable__fls(unsigned : "=r" (word) : "r" (word) :); return BITS_PER_LONG - 1 - word; - -legacy: - return generic___fls(word); } /** @@ -105,9 +97,8 @@ legacy: static __always_inline __attribute_const__ int variable_ffs(int x) { - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : : : : legacy); + if (!riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) + return generic_ffs(x); if (!x) return 0; @@ -119,9 +110,6 @@ static __always_inline __attribute_const__ int variable_ffs(int x) : "=r" (x) : "r" (x) :); return x + 1; - -legacy: - return generic_ffs(x); } /** @@ -137,9 +125,8 @@ legacy: static __always_inline int variable_fls(unsigned int x) { - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBB, 1) - : : : : legacy); + if (!riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) + return generic_fls(x); if (!x) return 0; @@ -151,9 +138,6 @@ static __always_inline int variable_fls(unsigned int x) : "=r" (x) : "r" (x) :); return 32 - x; - -legacy: - return generic_fls(x); } /** From 724c69447975e1e0854082c324b6871c8e2214f8 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 17 Nov 2025 21:19:23 -0700 Subject: [PATCH 07/18] riscv: cmpxchg: Use riscv_has_extension_likely Use riscv_has_extension_likely() to check for RISCV_ISA_EXT_ZAWRS, replacing the use of asm goto with ALTERNATIVE. The "likely" variant is used to match the behavior of the original implementation using ALTERNATIVE("j %l[no_zawrs]", "nop", ...). Signed-off-by: Vivian Wang Link: https://patch.msgid.link/20251020-riscv-altn-helper-wip-v4-5-ef941c87669a@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/cmpxchg.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 122e1485d39a..8712cf9c69dc 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -373,9 +373,10 @@ static __always_inline void __cmpwait(volatile void *ptr, u32 *__ptr32b; ulong __s, __val, __mask; - asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", - 0, RISCV_ISA_EXT_ZAWRS, 1) - : : : : no_zawrs); + if (!riscv_has_extension_likely(RISCV_ISA_EXT_ZAWRS)) { + ALT_RISCV_PAUSE(); + return; + } switch (size) { case 1: @@ -437,11 +438,6 @@ static __always_inline void __cmpwait(volatile void *ptr, default: BUILD_BUG(); } - - return; - -no_zawrs: - ALT_RISCV_PAUSE(); } #define __cmpwait_relaxed(ptr, val) \ From 6fcce9f028fb75ce03e4221f5f4f652741d178c4 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Mon, 17 Nov 2025 21:19:24 -0700 Subject: [PATCH 08/18] raid6: riscv: Prevent compiler from breaking inline vector assembly code To prevent the compiler from breaking the inline vector assembly code, this code must be built without compiler support for vector. Signed-off-by: Chunyan Zhang Link: https://patch.msgid.link/20250718072711.3865118-4-zhangchunyan@iscas.ac.cn [pjw@kernel.org: cleaned up commit message] Signed-off-by: Paul Walmsley --- lib/raid6/rvv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index 89da5fc247aa..015f3ee4da25 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -20,6 +20,10 @@ static int rvv_has_vector(void) return has_vector(); } +#ifdef __riscv_vector +#error "This code must be built without compiler support for vector" +#endif + static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; From 3c58d7a51341fa032eda45d8b9904dbfae120ef5 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Mon, 17 Nov 2025 21:19:25 -0700 Subject: [PATCH 09/18] raid6: riscv: Allow code to be compiled in userspace To support userspace raid6test, this patch adds __KERNEL__ ifdef for kernel header inclusions also userspace wrapper definitions to allow code to be compiled in userspace. This patch also drops the NSIZE macro, instead of using the vector length, which can work for both kernel and user space. Reviewed-by: Alexandre Ghiti Signed-off-by: Chunyan Zhang Link: https://patch.msgid.link/20250718072711.3865118-5-zhangchunyan@iscas.ac.cn Signed-off-by: Paul Walmsley --- lib/raid6/recov_rvv.c | 7 +- lib/raid6/rvv.c | 297 +++++++++++++++++++++--------------------- lib/raid6/rvv.h | 17 +++ 3 files changed, 170 insertions(+), 151 deletions(-) diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c index 5f779719c3d3..40c393206b6a 100644 --- a/lib/raid6/recov_rvv.c +++ b/lib/raid6/recov_rvv.c @@ -4,13 +4,8 @@ * Author: Chunyan Zhang */ -#include #include - -static int rvv_has_vector(void) -{ - return has_vector(); -} +#include "rvv.h" static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, u8 *dq, const u8 *pbmul, diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index 015f3ee4da25..75c9dafedb28 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -9,17 +9,8 @@ * Copyright 2002-2004 H. Peter Anvin */ -#include -#include #include "rvv.h" -#define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */ - -static int rvv_has_vector(void) -{ - return has_vector(); -} - #ifdef __riscv_vector #error "This code must be built without compiler support for vector" #endif @@ -28,7 +19,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = disks - 3; /* Highest data disk */ @@ -42,8 +33,10 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** : "=&r" (vl) ); + nsize = vl; + /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ - for (d = 0; d < bytes; d += NSIZE * 1) { + for (d = 0; d < bytes; d += nsize * 1) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -51,7 +44,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** "vmv.v.v v1, v0\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]) ); for (z = z0 - 1 ; z >= 0 ; z--) { @@ -75,7 +68,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** "vxor.vv v0, v0, v2\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), [x1d]"r"(0x1d) ); } @@ -90,8 +83,8 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v1, (%[wq0])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]) ); } } @@ -101,7 +94,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -115,8 +108,10 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, : "=&r" (vl) ); + nsize = vl; + /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ - for (d = 0 ; d < bytes ; d += NSIZE * 1) { + for (d = 0 ; d < bytes ; d += nsize * 1) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -124,7 +119,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, "vmv.v.v v1, v0\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]) ); /* P/Q data pages */ @@ -149,7 +144,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v0, v0, v2\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), [x1d]"r"(0x1d) ); } @@ -189,8 +184,8 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, "vse8.v v3, (%[wq0])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]) ); } } @@ -199,7 +194,7 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = disks - 3; /* Highest data disk */ @@ -213,11 +208,13 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 */ - for (d = 0; d < bytes; d += NSIZE * 2) { + for (d = 0; d < bytes; d += nsize * 2) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -227,8 +224,8 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** "vmv.v.v v5, v4\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]) ); for (z = z0 - 1; z >= 0; z--) { @@ -260,8 +257,8 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** "vxor.vv v4, v4, v6\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), [x1d]"r"(0x1d) ); } @@ -278,10 +275,10 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v5, (%[wq1])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]) ); } } @@ -291,7 +288,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -305,11 +302,13 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 */ - for (d = 0; d < bytes; d += NSIZE * 2) { + for (d = 0; d < bytes; d += nsize * 2) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -319,8 +318,8 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vmv.v.v v5, v4\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]) ); /* P/Q data pages */ @@ -353,8 +352,8 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v4, v4, v6\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), [x1d]"r"(0x1d) ); } @@ -407,10 +406,10 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vse8.v v7, (%[wq1])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]) ); } } @@ -419,7 +418,7 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = disks - 3; /* Highest data disk */ @@ -433,13 +432,15 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 */ - for (d = 0; d < bytes; d += NSIZE * 4) { + for (d = 0; d < bytes; d += nsize * 4) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -453,10 +454,10 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vmv.v.v v13, v12\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]), + [wp2]"r"(&dptr[z0][d + 2 * nsize]), + [wp3]"r"(&dptr[z0][d + 3 * nsize]) ); for (z = z0 - 1; z >= 0; z--) { @@ -504,10 +505,10 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vxor.vv v12, v12, v14\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), + [wd2]"r"(&dptr[z][d + 2 * nsize]), + [wd3]"r"(&dptr[z][d + 3 * nsize]), [x1d]"r"(0x1d) ); } @@ -528,14 +529,14 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v13, (%[wq3])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]), - [wp2]"r"(&p[d + NSIZE * 2]), - [wq2]"r"(&q[d + NSIZE * 2]), - [wp3]"r"(&p[d + NSIZE * 3]), - [wq3]"r"(&q[d + NSIZE * 3]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]), + [wp2]"r"(&p[d + nsize * 2]), + [wq2]"r"(&q[d + nsize * 2]), + [wp3]"r"(&p[d + nsize * 3]), + [wq3]"r"(&q[d + nsize * 3]) ); } } @@ -545,7 +546,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -559,13 +560,15 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 */ - for (d = 0; d < bytes; d += NSIZE * 4) { + for (d = 0; d < bytes; d += nsize * 4) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -579,10 +582,10 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vmv.v.v v13, v12\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]), + [wp2]"r"(&dptr[z0][d + 2 * nsize]), + [wp3]"r"(&dptr[z0][d + 3 * nsize]) ); /* P/Q data pages */ @@ -631,10 +634,10 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v12, v12, v14\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), + [wd2]"r"(&dptr[z][d + 2 * nsize]), + [wd3]"r"(&dptr[z][d + 3 * nsize]), [x1d]"r"(0x1d) ); } @@ -713,14 +716,14 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vse8.v v15, (%[wq3])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]), - [wp2]"r"(&p[d + NSIZE * 2]), - [wq2]"r"(&q[d + NSIZE * 2]), - [wp3]"r"(&p[d + NSIZE * 3]), - [wq3]"r"(&q[d + NSIZE * 3]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]), + [wp2]"r"(&p[d + nsize * 2]), + [wq2]"r"(&q[d + nsize * 2]), + [wp3]"r"(&p[d + nsize * 3]), + [wq3]"r"(&q[d + nsize * 3]) ); } } @@ -729,7 +732,7 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = disks - 3; /* Highest data disk */ @@ -743,6 +746,8 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 @@ -753,7 +758,7 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 */ - for (d = 0; d < bytes; d += NSIZE * 8) { + for (d = 0; d < bytes; d += nsize * 8) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -775,14 +780,14 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** "vmv.v.v v29, v28\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), - [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), - [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), - [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), - [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]), + [wp2]"r"(&dptr[z0][d + 2 * nsize]), + [wp3]"r"(&dptr[z0][d + 3 * nsize]), + [wp4]"r"(&dptr[z0][d + 4 * nsize]), + [wp5]"r"(&dptr[z0][d + 5 * nsize]), + [wp6]"r"(&dptr[z0][d + 6 * nsize]), + [wp7]"r"(&dptr[z0][d + 7 * nsize]) ); for (z = z0 - 1; z >= 0; z--) { @@ -862,14 +867,14 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** "vxor.vv v28, v28, v30\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), - [wd4]"r"(&dptr[z][d + 4 * NSIZE]), - [wd5]"r"(&dptr[z][d + 5 * NSIZE]), - [wd6]"r"(&dptr[z][d + 6 * NSIZE]), - [wd7]"r"(&dptr[z][d + 7 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), + [wd2]"r"(&dptr[z][d + 2 * nsize]), + [wd3]"r"(&dptr[z][d + 3 * nsize]), + [wd4]"r"(&dptr[z][d + 4 * nsize]), + [wd5]"r"(&dptr[z][d + 5 * nsize]), + [wd6]"r"(&dptr[z][d + 6 * nsize]), + [wd7]"r"(&dptr[z][d + 7 * nsize]), [x1d]"r"(0x1d) ); } @@ -898,22 +903,22 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v29, (%[wq7])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]), - [wp2]"r"(&p[d + NSIZE * 2]), - [wq2]"r"(&q[d + NSIZE * 2]), - [wp3]"r"(&p[d + NSIZE * 3]), - [wq3]"r"(&q[d + NSIZE * 3]), - [wp4]"r"(&p[d + NSIZE * 4]), - [wq4]"r"(&q[d + NSIZE * 4]), - [wp5]"r"(&p[d + NSIZE * 5]), - [wq5]"r"(&q[d + NSIZE * 5]), - [wp6]"r"(&p[d + NSIZE * 6]), - [wq6]"r"(&q[d + NSIZE * 6]), - [wp7]"r"(&p[d + NSIZE * 7]), - [wq7]"r"(&q[d + NSIZE * 7]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]), + [wp2]"r"(&p[d + nsize * 2]), + [wq2]"r"(&q[d + nsize * 2]), + [wp3]"r"(&p[d + nsize * 3]), + [wq3]"r"(&q[d + nsize * 3]), + [wp4]"r"(&p[d + nsize * 4]), + [wq4]"r"(&q[d + nsize * 4]), + [wp5]"r"(&p[d + nsize * 5]), + [wq5]"r"(&q[d + nsize * 5]), + [wp6]"r"(&p[d + nsize * 6]), + [wq6]"r"(&q[d + nsize * 6]), + [wp7]"r"(&p[d + nsize * 7]), + [wq7]"r"(&q[d + nsize * 7]) ); } } @@ -923,7 +928,7 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -937,6 +942,8 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 @@ -947,7 +954,7 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 */ - for (d = 0; d < bytes; d += NSIZE * 8) { + for (d = 0; d < bytes; d += nsize * 8) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -969,14 +976,14 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, "vmv.v.v v29, v28\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), - [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), - [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), - [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), - [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]), + [wp2]"r"(&dptr[z0][d + 2 * nsize]), + [wp3]"r"(&dptr[z0][d + 3 * nsize]), + [wp4]"r"(&dptr[z0][d + 4 * nsize]), + [wp5]"r"(&dptr[z0][d + 5 * nsize]), + [wp6]"r"(&dptr[z0][d + 6 * nsize]), + [wp7]"r"(&dptr[z0][d + 7 * nsize]) ); /* P/Q data pages */ @@ -1057,14 +1064,14 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v28, v28, v30\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), - [wd4]"r"(&dptr[z][d + 4 * NSIZE]), - [wd5]"r"(&dptr[z][d + 5 * NSIZE]), - [wd6]"r"(&dptr[z][d + 6 * NSIZE]), - [wd7]"r"(&dptr[z][d + 7 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), + [wd2]"r"(&dptr[z][d + 2 * nsize]), + [wd3]"r"(&dptr[z][d + 3 * nsize]), + [wd4]"r"(&dptr[z][d + 4 * nsize]), + [wd5]"r"(&dptr[z][d + 5 * nsize]), + [wd6]"r"(&dptr[z][d + 6 * nsize]), + [wd7]"r"(&dptr[z][d + 7 * nsize]), [x1d]"r"(0x1d) ); } @@ -1195,22 +1202,22 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, "vse8.v v31, (%[wq7])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]), - [wp2]"r"(&p[d + NSIZE * 2]), - [wq2]"r"(&q[d + NSIZE * 2]), - [wp3]"r"(&p[d + NSIZE * 3]), - [wq3]"r"(&q[d + NSIZE * 3]), - [wp4]"r"(&p[d + NSIZE * 4]), - [wq4]"r"(&q[d + NSIZE * 4]), - [wp5]"r"(&p[d + NSIZE * 5]), - [wq5]"r"(&q[d + NSIZE * 5]), - [wp6]"r"(&p[d + NSIZE * 6]), - [wq6]"r"(&q[d + NSIZE * 6]), - [wp7]"r"(&p[d + NSIZE * 7]), - [wq7]"r"(&q[d + NSIZE * 7]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]), + [wp2]"r"(&p[d + nsize * 2]), + [wq2]"r"(&q[d + nsize * 2]), + [wp3]"r"(&p[d + nsize * 3]), + [wq3]"r"(&q[d + nsize * 3]), + [wp4]"r"(&p[d + nsize * 4]), + [wq4]"r"(&q[d + nsize * 4]), + [wp5]"r"(&p[d + nsize * 5]), + [wq5]"r"(&q[d + nsize * 5]), + [wp6]"r"(&p[d + nsize * 6]), + [wq6]"r"(&q[d + nsize * 6]), + [wp7]"r"(&p[d + nsize * 7]), + [wq7]"r"(&q[d + nsize * 7]) ); } } diff --git a/lib/raid6/rvv.h b/lib/raid6/rvv.h index 94044a1b707b..6d0708a2c8a4 100644 --- a/lib/raid6/rvv.h +++ b/lib/raid6/rvv.h @@ -7,6 +7,23 @@ * Definitions for RISC-V RAID-6 code */ +#ifdef __KERNEL__ +#include +#else +#define kernel_vector_begin() +#define kernel_vector_end() +#include +#include +#define has_vector() (getauxval(AT_HWCAP) & COMPAT_HWCAP_ISA_V) +#endif + +#include + +static int rvv_has_vector(void) +{ + return has_vector(); +} + #define RAID6_RVV_WRAPPER(_n) \ static void raid6_rvv ## _n ## _gen_syndrome(int disks, \ size_t bytes, void **ptrs) \ From 3ac022bf389d60e696e4d3156b72f3ccd6e6368b Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Mon, 17 Nov 2025 21:19:25 -0700 Subject: [PATCH 10/18] raid6: test: Add support for RISC-V Add RISC-V code to be compiled to allow the userspace raid6test program to be built and run on RISC-V. Signed-off-by: Chunyan Zhang Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://patch.msgid.link/20250718072711.3865118-6-zhangchunyan@iscas.ac.cn Signed-off-by: Paul Walmsley --- lib/raid6/test/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 8f2dd2210ba8..09bbe2b14cce 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -35,6 +35,11 @@ ifeq ($(ARCH),aarch64) HAS_NEON = yes endif +ifeq ($(findstring riscv,$(ARCH)),riscv) + CFLAGS += -I../../../arch/riscv/include -DCONFIG_RISCV=1 + HAS_RVV = yes +endif + ifeq ($(findstring ppc,$(ARCH)),ppc) CFLAGS += -I../../../arch/powerpc/include HAS_ALTIVEC := $(shell printf '$(pound)include \nvector int a;\n' |\ @@ -63,6 +68,9 @@ else ifeq ($(HAS_ALTIVEC),yes) vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o else ifeq ($(ARCH),loongarch64) OBJS += loongarch_simd.o recov_loongarch_simd.o +else ifeq ($(HAS_RVV),yes) + OBJS += rvv.o recov_rvv.o + CFLAGS += -DCONFIG_RISCV_ISA_V=1 endif .c.o: From 6efb1a9462ef0023f3f96a7b88542e1f878e31a0 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Mon, 17 Nov 2025 21:19:29 -0700 Subject: [PATCH 11/18] riscv: ptrace: Optimize the allocation of vector regset The vector regset uses the maximum possible vlen value to estimate the .n field. But not all the hardwares support the maximum vlen. Linux might wastes time to prepare a large memory buffer(about 2^6 pages) for the vector regset. The regset can only copy vector registers when the process are using vector. Add .active callback and determine the n field of vector regset in riscv_v_setup_ctx_cache() doesn't affect the ptrace syscall and coredump. It can avoid oversized allocations and better matches real hardware limits. Signed-off-by: Yong-Xuan Wang Reviewed-by: Greentime Hu Reviewed-by: Andy Chiu Tested-by: Andy Chiu Link: https://patch.msgid.link/20251013091318.467864-2-yongxuan.wang@sifive.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/vector.h | 1 + arch/riscv/kernel/ptrace.c | 24 +++++++++++++++++++++--- arch/riscv/kernel/vector.c | 2 ++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index b61786d43c20..e7aa449368ad 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -51,6 +51,7 @@ void put_cpu_vector_context(void); void riscv_v_thread_free(struct task_struct *tsk); void __init riscv_v_setup_ctx_cache(void); void riscv_v_thread_alloc(struct task_struct *tsk); +void __init update_regset_vector_info(unsigned long size); static inline u32 riscv_v_flags(void) { diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 8e86305831ea..e6272d74572f 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -153,6 +153,17 @@ static int riscv_vr_set(struct task_struct *target, 0, riscv_v_vsize); return ret; } + +static int riscv_vr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!(has_vector() || has_xtheadvector())) + return -ENODEV; + + if (!riscv_v_vstate_query(task_pt_regs(target))) + return 0; + + return regset->n; +} #endif #ifdef CONFIG_RISCV_ISA_SUPM @@ -184,7 +195,7 @@ static int tagged_addr_ctrl_set(struct task_struct *target, } #endif -static const struct user_regset riscv_user_regset[] = { +static struct user_regset riscv_user_regset[] __ro_after_init = { [REGSET_X] = { USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG, @@ -207,11 +218,10 @@ static const struct user_regset riscv_user_regset[] = { [REGSET_V] = { USER_REGSET_NOTE_TYPE(RISCV_VECTOR), .align = 16, - .n = ((32 * RISCV_MAX_VLENB) + - sizeof(struct __riscv_v_regset_state)) / sizeof(__u32), .size = sizeof(__u32), .regset_get = riscv_vr_get, .set = riscv_vr_set, + .active = riscv_vr_active, }, #endif #ifdef CONFIG_RISCV_ISA_SUPM @@ -233,6 +243,14 @@ static const struct user_regset_view riscv_user_native_view = { .n = ARRAY_SIZE(riscv_user_regset), }; +#ifdef CONFIG_RISCV_ISA_V +void __init update_regset_vector_info(unsigned long size) +{ + riscv_user_regset[REGSET_V].n = (size + sizeof(struct __riscv_v_regset_state)) / + sizeof(__u32); +} +#endif + struct pt_regs_offset { const char *name; int offset; diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 901e67adf576..3ed071dab9d8 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -66,6 +66,8 @@ void __init riscv_v_setup_ctx_cache(void) if (!(has_vector() || has_xtheadvector())) return; + update_regset_vector_info(riscv_v_vsize); + riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", riscv_v_vsize, 16, SLAB_PANIC, 0, riscv_v_vsize, NULL); From f0ae09a89267552bde346c8cb27acb838a21eb1c Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Mon, 17 Nov 2025 21:19:30 -0700 Subject: [PATCH 12/18] selftests: riscv: Add test for the Vector ptrace interface Add a test case that does some basic verification of the Vector ptrace interface. This forks a child process then using ptrace to inspect and manipulate the v31 register of the child. Signed-off-by: Yong-Xuan Wang Reviewed-by: Andy Chiu Tested-by: Andy Chiu Link: https://patch.msgid.link/20251013091318.467864-3-yongxuan.wang@sifive.com Signed-off-by: Paul Walmsley --- tools/testing/selftests/riscv/vector/Makefile | 5 +- .../selftests/riscv/vector/vstate_ptrace.c | 134 ++++++++++++++++++ 2 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/riscv/vector/vstate_ptrace.c diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile index 6f7497f4e7b3..2c2a33fc083e 100644 --- a/tools/testing/selftests/riscv/vector/Makefile +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -2,7 +2,7 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := v_initval vstate_prctl +TEST_GEN_PROGS := v_initval vstate_prctl vstate_ptrace TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc include ../../lib.mk @@ -26,3 +26,6 @@ $(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o $(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc + +$(OUTPUT)/vstate_ptrace: vstate_ptrace.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/vector/vstate_ptrace.c b/tools/testing/selftests/riscv/vector/vstate_ptrace.c new file mode 100644 index 000000000000..1479abc0c9cb --- /dev/null +++ b/tools/testing/selftests/riscv/vector/vstate_ptrace.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include "../../kselftest.h" +#include "v_helpers.h" + +int parent_set_val, child_set_val; + +static long do_ptrace(enum __ptrace_request op, pid_t pid, long type, size_t size, void *data) +{ + struct iovec v_iovec = { + .iov_len = size, + .iov_base = data + }; + + return ptrace(op, pid, type, &v_iovec); +} + +static int do_child(void) +{ + int out; + + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) { + ksft_perror("PTRACE_TRACEME failed\n"); + return EXIT_FAILURE; + } + + asm volatile (".option push\n\t" + ".option arch, +v\n\t" + ".option norvc\n\t" + "vsetivli x0, 1, e32, m1, ta, ma\n\t" + "vmv.s.x v31, %[in]\n\t" + "ebreak\n\t" + "vmv.x.s %[out], v31\n\t" + ".option pop\n\t" + : [out] "=r" (out) + : [in] "r" (child_set_val)); + + if (out != parent_set_val) + return EXIT_FAILURE; + + return EXIT_SUCCESS; +} + +static void do_parent(pid_t child) +{ + int status; + void *data = NULL; + + /* Attach to the child */ + while (waitpid(child, &status, 0)) { + if (WIFEXITED(status)) { + ksft_test_result(WEXITSTATUS(status) == 0, "SETREGSET vector\n"); + goto out; + } else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) { + size_t size; + void *data, *v31; + struct __riscv_v_regset_state *v_regset_hdr; + struct user_regs_struct *gpreg; + + size = sizeof(*v_regset_hdr); + data = malloc(size); + if (!data) + goto out; + v_regset_hdr = (struct __riscv_v_regset_state *)data; + + if (do_ptrace(PTRACE_GETREGSET, child, NT_RISCV_VECTOR, size, data)) + goto out; + + ksft_print_msg("vlenb %ld\n", v_regset_hdr->vlenb); + data = realloc(data, size + v_regset_hdr->vlenb * 32); + if (!data) + goto out; + v_regset_hdr = (struct __riscv_v_regset_state *)data; + v31 = (void *)(data + size + v_regset_hdr->vlenb * 31); + size += v_regset_hdr->vlenb * 32; + + if (do_ptrace(PTRACE_GETREGSET, child, NT_RISCV_VECTOR, size, data)) + goto out; + + ksft_test_result(*(int *)v31 == child_set_val, "GETREGSET vector\n"); + + *(int *)v31 = parent_set_val; + if (do_ptrace(PTRACE_SETREGSET, child, NT_RISCV_VECTOR, size, data)) + goto out; + + /* move the pc forward */ + size = sizeof(*gpreg); + data = realloc(data, size); + gpreg = (struct user_regs_struct *)data; + + if (do_ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, size, data)) + goto out; + + gpreg->pc += 4; + if (do_ptrace(PTRACE_SETREGSET, child, NT_PRSTATUS, size, data)) + goto out; + } + + ptrace(PTRACE_CONT, child, NULL, NULL); + } + +out: + free(data); +} + +int main(void) +{ + pid_t child; + + ksft_set_plan(2); + if (!is_vector_supported() && !is_xtheadvector_supported()) + ksft_exit_skip("Vector not supported\n"); + + srandom(getpid()); + parent_set_val = rand(); + child_set_val = rand(); + + child = fork(); + if (child < 0) + ksft_exit_fail_msg("Fork failed %d\n", child); + + if (!child) + return do_child(); + + do_parent(child); + + ksft_finished(); +} From c9651fbc6051d305765f9c7528b90ad6c88ba4f4 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Mon, 20 Oct 2025 12:20:47 +0800 Subject: [PATCH 13/18] riscv: Add ISA extension parsing for Zalasr Add parsing for Zalasr ISA extension. Signed-off-by: Xu Lu Link: https://patch.msgid.link/20251020042056.30283-2-luxu.kernel@bytedance.com [pjw@kernel.org: updated to apply] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/kernel/cpufeature.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index affd63e11b0a..ae3852c4f2ca 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -106,6 +106,7 @@ #define RISCV_ISA_EXT_ZAAMO 97 #define RISCV_ISA_EXT_ZALRSC 98 #define RISCV_ISA_EXT_ZICBOP 99 +#define RISCV_ISA_EXT_ZALASR 100 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 72ca768f4e91..53842b79610e 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -472,6 +472,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zaamo, RISCV_ISA_EXT_ZAAMO), __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), + __RISCV_ISA_EXT_DATA(zalasr, RISCV_ISA_EXT_ZALASR), __RISCV_ISA_EXT_DATA(zalrsc, RISCV_ISA_EXT_ZALRSC), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA_VALIDATE(zfa, RISCV_ISA_EXT_ZFA, riscv_ext_f_depends), From 4640be2588713766b3141998d2e396121b337e41 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Mon, 20 Oct 2025 12:20:48 +0800 Subject: [PATCH 14/18] dt-bindings: riscv: Add Zalasr ISA extension description Add description for the Zalasr ISA extension Signed-off-by: Xu Lu Acked-by: Conor Dooley Link: https://patch.msgid.link/20251020042056.30283-3-luxu.kernel@bytedance.com Signed-off-by: Paul Walmsley --- Documentation/devicetree/bindings/riscv/extensions.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 543ac94718e8..6dc73f2674c9 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -242,6 +242,11 @@ properties: is supported as ratified at commit 5059e0ca641c ("update to ratified") of the riscv-zacas. + - const: zalasr + description: | + The standard Zalasr extension for load-acquire/store-release as frozen + at commit 194f0094 ("Version 0.9 for freeze") of riscv-zalasr. + - const: zalrsc description: | The standard Zalrsc extension for load-reserved/store-conditional as From f4922b69165735e81752ee47d174f873e989a449 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Mon, 20 Oct 2025 12:20:49 +0800 Subject: [PATCH 15/18] riscv: hwprobe: Export Zalasr extension Export the Zalasr extension to userspace using hwprobe. Signed-off-by: Xu Lu Link: https://patch.msgid.link/20251020042056.30283-4-luxu.kernel@bytedance.com Signed-off-by: Paul Walmsley --- Documentation/arch/riscv/hwprobe.rst | 5 ++++- arch/riscv/include/uapi/asm/hwprobe.h | 1 + arch/riscv/kernel/sys_hwprobe.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 2f449c9b15bd..8b36eaa9d5d8 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -249,6 +249,9 @@ The following keys are defined: defined in the in the RISC-V ISA manual starting from commit e87412e621f1 ("integrate Zaamo and Zalrsc text (#1304)"). + * :c:macro:`RISCV_HWPROBE_EXT_ZALASR`: The Zalasr extension is supported as + frozen at commit 194f0094 ("Version 0.9 for freeze") of riscv-zalasr. + * :c:macro:`RISCV_HWPROBE_EXT_ZALRSC`: The Zalrsc extension is supported as defined in the in the RISC-V ISA manual starting from commit e87412e621f1 ("integrate Zaamo and Zalrsc text (#1304)"). @@ -369,4 +372,4 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ`: The Xsfvfwmaccqqq vendor extension is supported in version 1.0 of Matrix Multiply Accumulate - Instruction Extensions Specification. \ No newline at end of file + Instruction Extensions Specification. diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 5d30a4fae37a..906896780999 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -82,6 +82,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZAAMO (1ULL << 56) #define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57) #define RISCV_HWPROBE_EXT_ZABHA (1ULL << 58) +#define RISCV_HWPROBE_EXT_ZALASR (1ULL << 59) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 199d13f86f31..55004d81631a 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -109,6 +109,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZAAMO); EXT_KEY(ZABHA); EXT_KEY(ZACAS); + EXT_KEY(ZALASR); EXT_KEY(ZALRSC); EXT_KEY(ZAWRS); EXT_KEY(ZBA); From ad1bb4b8525e497c95d48a2d3fd2e81fba4b5ad7 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Mon, 20 Oct 2025 12:20:50 +0800 Subject: [PATCH 16/18] riscv: Introduce Zalasr instructions Introduce l{b|h|w|d}.{aq|aqrl} and s{b|h|w|d}.{rl|aqrl} instruction encodings. Signed-off-by: Xu Lu Reviewed-by: Guo Ren Link: https://patch.msgid.link/20251020042056.30283-5-luxu.kernel@bytedance.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/insn-def.h | 79 +++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index d29da6ccd3dd..7c6daf116756 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -179,6 +179,7 @@ #define RV___RS1(v) __RV_REG(v) #define RV___RS2(v) __RV_REG(v) +#define RV_OPCODE_AMO RV_OPCODE(47) #define RV_OPCODE_MISC_MEM RV_OPCODE(15) #define RV_OPCODE_OP_IMM RV_OPCODE(19) #define RV_OPCODE_SYSTEM RV_OPCODE(115) @@ -208,6 +209,84 @@ __ASM_STR(.error "hlv.d requires 64-bit support") #endif +#define LB_AQ(dest, addr) \ + INSN_R(OPCODE_AMO, FUNC3(0), FUNC7(26), \ + RD(dest), RS1(addr), __RS2(0)) + +#define LB_AQRL(dest, addr) \ + INSN_R(OPCODE_AMO, FUNC3(0), FUNC7(27), \ + RD(dest), RS1(addr), __RS2(0)) + +#define LH_AQ(dest, addr) \ + INSN_R(OPCODE_AMO, FUNC3(1), FUNC7(26), \ + RD(dest), RS1(addr), __RS2(0)) + +#define LH_AQRL(dest, addr) \ + INSN_R(OPCODE_AMO, FUNC3(1), FUNC7(27), \ + RD(dest), RS1(addr), __RS2(0)) + +#define LW_AQ(dest, addr) \ + INSN_R(OPCODE_AMO, FUNC3(2), FUNC7(26), \ + RD(dest), RS1(addr), __RS2(0)) + +#define LW_AQRL(dest, addr) \ + INSN_R(OPCODE_AMO, FUNC3(2), FUNC7(27), \ + RD(dest), RS1(addr), __RS2(0)) + +#define SB_RL(src, addr) \ + INSN_R(OPCODE_AMO, FUNC3(0), FUNC7(29), \ + __RD(0), RS1(addr), RS2(src)) + +#define SB_AQRL(src, addr) \ + INSN_R(OPCODE_AMO, FUNC3(0), FUNC7(31), \ + __RD(0), RS1(addr), RS2(src)) + +#define SH_RL(src, addr) \ + INSN_R(OPCODE_AMO, FUNC3(1), FUNC7(29), \ + __RD(0), RS1(addr), RS2(src)) + +#define SH_AQRL(src, addr) \ + INSN_R(OPCODE_AMO, FUNC3(1), FUNC7(31), \ + __RD(0), RS1(addr), RS2(src)) + +#define SW_RL(src, addr) \ + INSN_R(OPCODE_AMO, FUNC3(2), FUNC7(29), \ + __RD(0), RS1(addr), RS2(src)) + +#define SW_AQRL(src, addr) \ + INSN_R(OPCODE_AMO, FUNC3(2), FUNC7(31), \ + __RD(0), RS1(addr), RS2(src)) + +#ifdef CONFIG_64BIT +#define LD_AQ(dest, addr) \ + INSN_R(OPCODE_AMO, FUNC3(3), FUNC7(26), \ + RD(dest), RS1(addr), __RS2(0)) + +#define LD_AQRL(dest, addr) \ + INSN_R(OPCODE_AMO, FUNC3(3), FUNC7(27), \ + RD(dest), RS1(addr), __RS2(0)) + +#define SD_RL(src, addr) \ + INSN_R(OPCODE_AMO, FUNC3(3), FUNC7(29), \ + __RD(0), RS1(addr), RS2(src)) + +#define SD_AQRL(src, addr) \ + INSN_R(OPCODE_AMO, FUNC3(3), FUNC7(31), \ + __RD(0), RS1(addr), RS2(src)) +#else +#define LD_AQ(dest, addr) \ + __ASM_STR(.error "ld.aq requires 64-bit support") + +#define LD_AQRL(dest, addr) \ + __ASM_STR(.error "ld.aqrl requires 64-bit support") + +#define SD_RL(dest, addr) \ + __ASM_STR(.error "sd.rl requires 64-bit support") + +#define SD_AQRL(dest, addr) \ + __ASM_STR(.error "sd.aqrl requires 64-bit support") +#endif + #define SINVAL_VMA(vaddr, asid) \ INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(11), \ __RD(0), RS1(vaddr), RS2(asid)) From e0a504984a88a2f1c0131aca5115fd529fc9974a Mon Sep 17 00:00:00 2001 From: Yao Zihong Date: Wed, 19 Nov 2025 00:23:24 +0800 Subject: [PATCH 17/18] riscv: hwprobe: Expose Zicbop extension and its block size - Add `RISCV_HWPROBE_EXT_ZICBOP` to report the presence of the Zicbop extension. - Add `RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE` to expose the block size (in bytes) when Zicbop is supported. - Update hwprobe.rst to document the new extension bit and block size key, following the existing Zicbom/Zicboz style. Reviewed-by: Andrew Jones Signed-off-by: Yao Zihong Link: https://patch.msgid.link/20251118162436.15485-2-zihong.plct@isrc.iscas.ac.cn [pjw@kernel.org: updated to apply] Signed-off-by: Paul Walmsley --- Documentation/arch/riscv/hwprobe.rst | 6 ++++++ arch/riscv/include/asm/hwprobe.h | 2 +- arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ arch/riscv/kernel/sys_hwprobe.c | 6 ++++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 8b36eaa9d5d8..06c5280b728a 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -278,6 +278,9 @@ The following keys are defined: ratified in commit 49f49c842ff9 ("Update to Rafified state") of riscv-zabha. + * :c:macro:`RISCV_HWPROBE_EXT_ZICBOP`: The Zicbop extension is supported, as + ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + * :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was mistakenly classified as a bitmask rather than a value. @@ -373,3 +376,6 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ`: The Xsfvfwmaccqqq vendor extension is supported in version 1.0 of Matrix Multiply Accumulate Instruction Extensions Specification. + +* :c:macro:`RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE`: An unsigned int which + represents the size of the Zicbop block in bytes. diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 58f8dda73259..8c572a464719 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include -#define RISCV_HWPROBE_MAX_KEY 14 +#define RISCV_HWPROBE_MAX_KEY 15 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 906896780999..1edea2331b8b 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -83,6 +83,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57) #define RISCV_HWPROBE_EXT_ZABHA (1ULL << 58) #define RISCV_HWPROBE_EXT_ZALASR (1ULL << 59) +#define RISCV_HWPROBE_EXT_ZICBOP (1ULL << 60) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -108,6 +109,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE 12 #define RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0 13 #define RISCV_HWPROBE_KEY_VENDOR_EXT_MIPS_0 14 +#define RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE 15 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 55004d81631a..0f701ace3bb9 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -123,6 +123,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCB); EXT_KEY(ZCMOP); EXT_KEY(ZICBOM); + EXT_KEY(ZICBOP); EXT_KEY(ZICBOZ); EXT_KEY(ZICNTR); EXT_KEY(ZICOND); @@ -303,6 +304,11 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOM)) pair->value = riscv_cbom_block_size; break; + case RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE: + pair->value = 0; + if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOP)) + pair->value = riscv_cbop_block_size; + break; case RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS: pair->value = user_max_virt_addr(); break; From a131fd60796dbfaa6297c0c8ca8e2a7610a64281 Mon Sep 17 00:00:00 2001 From: Yao Zihong Date: Wed, 19 Nov 2025 00:23:25 +0800 Subject: [PATCH 18/18] selftests/riscv: Add Zicbop prefetch test Add selftests to cbo.c to verify Zicbop extension behavior, and split the previous `--sigill` mode into two options so they can be tested independently. The test checks: - That hwprobe correctly reports Zicbop presence and block size. - That prefetch instructions execute without exception on valid and NULL addresses when Zicbop is present. Signed-off-by: Yao Zihong Reviewed-by: Andrew Jones Link: https://patch.msgid.link/20251118162436.15485-3-zihong.plct@isrc.iscas.ac.cn Signed-off-by: Paul Walmsley --- tools/testing/selftests/riscv/hwprobe/cbo.c | 163 ++++++++++++++++---- 1 file changed, 135 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index 5e96ef785d0d..6d99726aceac 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -15,24 +15,31 @@ #include #include #include +#include #include "hwprobe.h" #include "../../kselftest.h" #define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) +#define MK_PREFETCH(fn) \ + le32_bswap(0 << 25 | (uint32_t)(fn) << 20 | 10 << 15 | 6 << 12 | 0 << 7 | 19) static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 }; -static bool illegal_insn; +static bool got_fault; -static void sigill_handler(int sig, siginfo_t *info, void *context) +static void fault_handler(int sig, siginfo_t *info, void *context) { unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext; uint32_t insn = *(uint32_t *)regs[0]; - assert(insn == MK_CBO(regs[11])); + if (sig == SIGILL) + assert(insn == MK_CBO(regs[11])); - illegal_insn = true; + if (sig == SIGSEGV || sig == SIGBUS) + assert(insn == MK_PREFETCH(regs[11])); + + got_fault = true; regs[0] += 4; } @@ -45,39 +52,51 @@ static void sigill_handler(int sig, siginfo_t *info, void *context) : : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory"); \ }) +#define prefetch_insn(base, fn) \ +({ \ + asm volatile( \ + "mv a0, %0\n" \ + "li a1, %1\n" \ + ".4byte %2\n" \ + : : "r" (base), "i" (fn), "i" (MK_PREFETCH(fn)) : "a0", "a1"); \ +}) + static void cbo_inval(char *base) { cbo_insn(base, 0); } static void cbo_clean(char *base) { cbo_insn(base, 1); } static void cbo_flush(char *base) { cbo_insn(base, 2); } static void cbo_zero(char *base) { cbo_insn(base, 4); } +static void prefetch_i(char *base) { prefetch_insn(base, 0); } +static void prefetch_r(char *base) { prefetch_insn(base, 1); } +static void prefetch_w(char *base) { prefetch_insn(base, 3); } static void test_no_cbo_inval(void *arg) { ksft_print_msg("Testing cbo.inval instruction remain privileged\n"); - illegal_insn = false; + got_fault = false; cbo_inval(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.inval\n"); + ksft_test_result(got_fault, "No cbo.inval\n"); } static void test_no_zicbom(void *arg) { ksft_print_msg("Testing Zicbom instructions remain privileged\n"); - illegal_insn = false; + got_fault = false; cbo_clean(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.clean\n"); + ksft_test_result(got_fault, "No cbo.clean\n"); - illegal_insn = false; + got_fault = false; cbo_flush(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.flush\n"); + ksft_test_result(got_fault, "No cbo.flush\n"); } static void test_no_zicboz(void *arg) { ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n"); - illegal_insn = false; + got_fault = false; cbo_zero(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.zero\n"); + ksft_test_result(got_fault, "No cbo.zero\n"); } static bool is_power_of_2(__u64 n) @@ -85,6 +104,51 @@ static bool is_power_of_2(__u64 n) return n != 0 && (n & (n - 1)) == 0; } +static void test_zicbop(void *arg) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE, + }; + struct sigaction act = { + .sa_sigaction = &fault_handler, + .sa_flags = SA_SIGINFO + }; + struct sigaction dfl = { + .sa_handler = SIG_DFL + }; + cpu_set_t *cpus = (cpu_set_t *)arg; + __u64 block_size; + long rc; + + rc = sigaction(SIGSEGV, &act, NULL); + assert(rc == 0); + rc = sigaction(SIGBUS, &act, NULL); + assert(rc == 0); + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0); + block_size = pair.value; + ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE && + is_power_of_2(block_size), "Zicbop block size\n"); + ksft_print_msg("Zicbop block size: %llu\n", block_size); + + got_fault = false; + prefetch_i(&mem[0]); + prefetch_r(&mem[0]); + prefetch_w(&mem[0]); + ksft_test_result(!got_fault, "Zicbop prefetch.* on valid address\n"); + + got_fault = false; + prefetch_i(NULL); + prefetch_r(NULL); + prefetch_w(NULL); + ksft_test_result(!got_fault, "Zicbop prefetch.* on NULL\n"); + + rc = sigaction(SIGBUS, &dfl, NULL); + assert(rc == 0); + rc = sigaction(SIGSEGV, &dfl, NULL); + assert(rc == 0); +} + static void test_zicbom(void *arg) { struct riscv_hwprobe pair = { @@ -100,13 +164,13 @@ static void test_zicbom(void *arg) is_power_of_2(block_size), "Zicbom block size\n"); ksft_print_msg("Zicbom block size: %llu\n", block_size); - illegal_insn = false; + got_fault = false; cbo_clean(&mem[block_size]); - ksft_test_result(!illegal_insn, "cbo.clean\n"); + ksft_test_result(!got_fault, "cbo.clean\n"); - illegal_insn = false; + got_fault = false; cbo_flush(&mem[block_size]); - ksft_test_result(!illegal_insn, "cbo.flush\n"); + ksft_test_result(!got_fault, "cbo.flush\n"); } static void test_zicboz(void *arg) @@ -125,11 +189,11 @@ static void test_zicboz(void *arg) is_power_of_2(block_size), "Zicboz block size\n"); ksft_print_msg("Zicboz block size: %llu\n", block_size); - illegal_insn = false; + got_fault = false; cbo_zero(&mem[block_size]); - ksft_test_result(!illegal_insn, "cbo.zero\n"); + ksft_test_result(!got_fault, "cbo.zero\n"); - if (illegal_insn || !is_power_of_2(block_size)) { + if (got_fault || !is_power_of_2(block_size)) { ksft_test_result_skip("cbo.zero check\n"); return; } @@ -177,7 +241,19 @@ static void check_no_zicbo_cpus(cpu_set_t *cpus, __u64 cbo) rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0); assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); - cbostr = cbo == RISCV_HWPROBE_EXT_ZICBOZ ? "Zicboz" : "Zicbom"; + switch (cbo) { + case RISCV_HWPROBE_EXT_ZICBOZ: + cbostr = "Zicboz"; + break; + case RISCV_HWPROBE_EXT_ZICBOM: + cbostr = "Zicbom"; + break; + case RISCV_HWPROBE_EXT_ZICBOP: + cbostr = "Zicbop"; + break; + default: + ksft_exit_fail_msg("Internal error: invalid cbo %llu\n", cbo); + } if (pair.value & cbo) ksft_exit_fail_msg("%s is only present on a subset of harts.\n" @@ -194,6 +270,7 @@ enum { TEST_ZICBOM, TEST_NO_ZICBOM, TEST_NO_CBO_INVAL, + TEST_ZICBOP, }; static struct test_info { @@ -206,26 +283,51 @@ static struct test_info { [TEST_ZICBOM] = { .nr_tests = 3, test_zicbom }, [TEST_NO_ZICBOM] = { .nr_tests = 2, test_no_zicbom }, [TEST_NO_CBO_INVAL] = { .nr_tests = 1, test_no_cbo_inval }, + [TEST_ZICBOP] = { .nr_tests = 3, test_zicbop }, +}; + +static const struct option long_opts[] = { + {"zicbom-raises-sigill", no_argument, 0, 'm'}, + {"zicboz-raises-sigill", no_argument, 0, 'z'}, + {0, 0, 0, 0} }; int main(int argc, char **argv) { struct sigaction act = { - .sa_sigaction = &sigill_handler, + .sa_sigaction = &fault_handler, .sa_flags = SA_SIGINFO, }; struct riscv_hwprobe pair; unsigned int plan = 0; cpu_set_t cpus; long rc; - int i; + int i, opt, long_index; - if (argc > 1 && !strcmp(argv[1], "--sigill")) { - rc = sigaction(SIGILL, &act, NULL); - assert(rc == 0); - tests[TEST_NO_ZICBOZ].enabled = true; - tests[TEST_NO_ZICBOM].enabled = true; - tests[TEST_NO_CBO_INVAL].enabled = true; + long_index = 0; + + while ((opt = getopt_long(argc, argv, "mz", long_opts, &long_index)) != -1) { + switch (opt) { + case 'm': + tests[TEST_NO_ZICBOM].enabled = true; + tests[TEST_NO_CBO_INVAL].enabled = true; + rc = sigaction(SIGILL, &act, NULL); + assert(rc == 0); + break; + case 'z': + tests[TEST_NO_ZICBOZ].enabled = true; + tests[TEST_NO_CBO_INVAL].enabled = true; + rc = sigaction(SIGILL, &act, NULL); + assert(rc == 0); + break; + case '?': + fprintf(stderr, + "Usage: %s [--zicbom-raises-sigill|-m] [--zicboz-raises-sigill|-z]\n", + argv[0]); + exit(1); + default: + break; + } } rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus); @@ -253,6 +355,11 @@ int main(int argc, char **argv) check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOM); } + if (pair.value & RISCV_HWPROBE_EXT_ZICBOP) + tests[TEST_ZICBOP].enabled = true; + else + check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOP); + for (i = 0; i < ARRAY_SIZE(tests); ++i) plan += tests[i].enabled ? tests[i].nr_tests : 0;