Files
linux/lib/raid6/recov_rvv.c
Chunyan Zhang 3c58d7a513 raid6: riscv: Allow code to be compiled in userspace
To support userspace raid6test, this patch adds __KERNEL__ ifdef for kernel
header inclusions also userspace wrapper definitions to allow code to be
compiled in userspace.

This patch also drops the NSIZE macro, instead of using the vector length,
which can work for both kernel and user space.

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
Link: https://patch.msgid.link/20250718072711.3865118-5-zhangchunyan@iscas.ac.cn
Signed-off-by: Paul Walmsley <pjw@kernel.org>
2025-11-19 09:19:28 -07:00

223 lines
5.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2024 Institute of Software, CAS.
* Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
*/
#include <linux/raid/pq.h>
#include "rvv.h"
static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp,
u8 *dq, const u8 *pbmul,
const u8 *qmul)
{
asm volatile (".option push\n"
".option arch,+v\n"
"vsetvli x0, %[avl], e8, m1, ta, ma\n"
".option pop\n"
: :
[avl]"r"(16)
);
/*
* while ( bytes-- ) {
* uint8_t px, qx, db;
*
* px = *p ^ *dp;
* qx = qmul[*q ^ *dq];
* *dq++ = db = pbmul[px] ^ qx;
* *dp++ = db ^ px;
* p++; q++;
* }
*/
while (bytes) {
/*
* v0:px, v1:dp,
* v2:qx, v3:dq,
* v4:vx, v5:vy,
* v6:qm0, v7:qm1,
* v8:pm0, v9:pm1,
* v14:p/qm[vx], v15:p/qm[vy]
*/
asm volatile (".option push\n"
".option arch,+v\n"
"vle8.v v0, (%[px])\n"
"vle8.v v1, (%[dp])\n"
"vxor.vv v0, v0, v1\n"
"vle8.v v2, (%[qx])\n"
"vle8.v v3, (%[dq])\n"
"vxor.vv v4, v2, v3\n"
"vsrl.vi v5, v4, 4\n"
"vand.vi v4, v4, 0xf\n"
"vle8.v v6, (%[qm0])\n"
"vle8.v v7, (%[qm1])\n"
"vrgather.vv v14, v6, v4\n" /* v14 = qm[vx] */
"vrgather.vv v15, v7, v5\n" /* v15 = qm[vy] */
"vxor.vv v2, v14, v15\n" /* v2 = qmul[*q ^ *dq] */
"vsrl.vi v5, v0, 4\n"
"vand.vi v4, v0, 0xf\n"
"vle8.v v8, (%[pm0])\n"
"vle8.v v9, (%[pm1])\n"
"vrgather.vv v14, v8, v4\n" /* v14 = pm[vx] */
"vrgather.vv v15, v9, v5\n" /* v15 = pm[vy] */
"vxor.vv v4, v14, v15\n" /* v4 = pbmul[px] */
"vxor.vv v3, v4, v2\n" /* v3 = db = pbmul[px] ^ qx */
"vxor.vv v1, v3, v0\n" /* v1 = db ^ px; */
"vse8.v v3, (%[dq])\n"
"vse8.v v1, (%[dp])\n"
".option pop\n"
: :
[px]"r"(p),
[dp]"r"(dp),
[qx]"r"(q),
[dq]"r"(dq),
[qm0]"r"(qmul),
[qm1]"r"(qmul + 16),
[pm0]"r"(pbmul),
[pm1]"r"(pbmul + 16)
:);
bytes -= 16;
p += 16;
q += 16;
dp += 16;
dq += 16;
}
}
static void __raid6_datap_recov_rvv(int bytes, u8 *p, u8 *q,
u8 *dq, const u8 *qmul)
{
asm volatile (".option push\n"
".option arch,+v\n"
"vsetvli x0, %[avl], e8, m1, ta, ma\n"
".option pop\n"
: :
[avl]"r"(16)
);
/*
* while (bytes--) {
* *p++ ^= *dq = qmul[*q ^ *dq];
* q++; dq++;
* }
*/
while (bytes) {
/*
* v0:vx, v1:vy,
* v2:dq, v3:p,
* v4:qm0, v5:qm1,
* v10:m[vx], v11:m[vy]
*/
asm volatile (".option push\n"
".option arch,+v\n"
"vle8.v v0, (%[vx])\n"
"vle8.v v2, (%[dq])\n"
"vxor.vv v0, v0, v2\n"
"vsrl.vi v1, v0, 4\n"
"vand.vi v0, v0, 0xf\n"
"vle8.v v4, (%[qm0])\n"
"vle8.v v5, (%[qm1])\n"
"vrgather.vv v10, v4, v0\n"
"vrgather.vv v11, v5, v1\n"
"vxor.vv v0, v10, v11\n"
"vle8.v v1, (%[vy])\n"
"vxor.vv v1, v0, v1\n"
"vse8.v v0, (%[dq])\n"
"vse8.v v1, (%[vy])\n"
".option pop\n"
: :
[vx]"r"(q),
[vy]"r"(p),
[dq]"r"(dq),
[qm0]"r"(qmul),
[qm1]"r"(qmul + 16)
:);
bytes -= 16;
p += 16;
q += 16;
dq += 16;
}
}
static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila,
int failb, void **ptrs)
{
u8 *p, *q, *dp, *dq;
const u8 *pbmul; /* P multiplier table for B data */
const u8 *qmul; /* Q multiplier table (for both) */
p = (u8 *)ptrs[disks - 2];
q = (u8 *)ptrs[disks - 1];
/*
* Compute syndrome with zero for the missing data pages
* Use the dead data pages as temporary storage for
* delta p and delta q
*/
dp = (u8 *)ptrs[faila];
ptrs[faila] = raid6_get_zero_page();
ptrs[disks - 2] = dp;
dq = (u8 *)ptrs[failb];
ptrs[failb] = raid6_get_zero_page();
ptrs[disks - 1] = dq;
raid6_call.gen_syndrome(disks, bytes, ptrs);
/* Restore pointer table */
ptrs[faila] = dp;
ptrs[failb] = dq;
ptrs[disks - 2] = p;
ptrs[disks - 1] = q;
/* Now, pick the proper data tables */
pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
raid6_gfexp[failb]]];
kernel_vector_begin();
__raid6_2data_recov_rvv(bytes, p, q, dp, dq, pbmul, qmul);
kernel_vector_end();
}
static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila,
void **ptrs)
{
u8 *p, *q, *dq;
const u8 *qmul; /* Q multiplier table */
p = (u8 *)ptrs[disks - 2];
q = (u8 *)ptrs[disks - 1];
/*
* Compute syndrome with zero for the missing data page
* Use the dead data page as temporary storage for delta q
*/
dq = (u8 *)ptrs[faila];
ptrs[faila] = raid6_get_zero_page();
ptrs[disks - 1] = dq;
raid6_call.gen_syndrome(disks, bytes, ptrs);
/* Restore pointer table */
ptrs[faila] = dq;
ptrs[disks - 1] = q;
/* Now, pick the proper data tables */
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
kernel_vector_begin();
__raid6_datap_recov_rvv(bytes, p, q, dq, qmul);
kernel_vector_end();
}
const struct raid6_recov_calls raid6_recov_rvv = {
.data2 = raid6_2data_recov_rvv,
.datap = raid6_datap_recov_rvv,
.valid = rvv_has_vector,
.name = "rvv",
.priority = 1,
};