RISC-V: Bugfix for rvv bool mode precision adjustment

Fix the bug of the rvv bool mode precision with the adjustment.
The bits size of vbool*_t will be adjusted to
[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
adjusted mode precison of vbool*_t will help underlying pass to
make the right decision for both the correctness and optimization.

Given below sample code:

void test_1(int8_t * restrict in, int8_t * restrict out)
{
  vbool8_t v2 = *(vbool8_t*)in;
  vbool16_t v5 = *(vbool16_t*)in;
  *(vbool16_t*)(out + 200) = v5;
  *(vbool8_t*)(out + 100) = v2;
}

Before the precision adjustment:

addi    a4,a1,100
vsetvli a5,zero,e8,m1,ta,ma
addi    a1,a1,200
vlm.v   v24,0(a0)
vsm.v   v24,0(a4)
// Need one vsetvli and vlm.v for correctness here.
vsm.v   v24,0(a1)

After the precision adjustment:

csrr    t0,vlenb
slli    t1,t0,1
csrr    a3,vlenb
sub     sp,sp,t1
slli    a4,a3,1
add     a4,a4,sp
sub     a3,a4,a3
vsetvli a5,zero,e8,m1,ta,ma
addi    a2,a1,200
vlm.v   v24,0(a0)
vsm.v   v24,0(a3)
addi    a1,a1,100
vsetvli a4,zero,e8,mf2,ta,ma
csrr    t0,vlenb
vlm.v   v25,0(a3)
vsm.v   v25,0(a2)
slli    t1,t0,1
vsetvli a5,zero,e8,m1,ta,ma
vsm.v   v24,0(a1)
add     sp,sp,t1
jr      ra

However, there may be some optimization opportunates after
the mode precision adjustment. It can be token care of in
the RISC-V backend in the underlying separted PR(s).

gcc/ChangeLog:

	PR target/108185
	PR target/108654
	* config/riscv/riscv-modes.def (ADJUST_PRECISION): Adjust VNx*BI
	modes.
	* config/riscv/riscv.cc (riscv_v_adjust_precision): New.
	* config/riscv/riscv.h (riscv_v_adjust_precision): New.
	* genmodes.cc (adj_precision): New.
	(ADJUST_PRECISION): New.
	(emit_mode_adjustments): Handle ADJUST_PRECISION.

gcc/testsuite/ChangeLog:

	PR target/108185
	PR target/108654
	* gcc.target/riscv/rvv/base/pr108185-1.c: New test.
	* gcc.target/riscv/rvv/base/pr108185-2.c: New test.
	* gcc.target/riscv/rvv/base/pr108185-3.c: New test.
	* gcc.target/riscv/rvv/base/pr108185-4.c: New test.
	* gcc.target/riscv/rvv/base/pr108185-5.c: New test.
	* gcc.target/riscv/rvv/base/pr108185-6.c: New test.
	* gcc.target/riscv/rvv/base/pr108185-7.c: New test.
	* gcc.target/riscv/rvv/base/pr108185-8.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
This commit is contained in:
Pan Li 2023-03-07 20:05:15 +08:00 committed by Kito Cheng
parent 59a72acbcc
commit 247cacc9e3
12 changed files with 600 additions and 2 deletions

View file

@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1));
ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2));
ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4));
ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8));
ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 16));
ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision (VNx32BImode, 32));
ADJUST_PRECISION (VNx64BI, riscv_v_adjust_precision (VNx64BImode, 64));
/*
| Mode | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
| | LMUL | SEW/LMUL | LMUL | SEW/LMUL |

View file

@ -1003,6 +1003,18 @@ riscv_v_adjust_nunits (machine_mode mode, int scale)
return scale;
}
/* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
PRECISION size for corresponding machine_mode. */
poly_int64
riscv_v_adjust_precision (machine_mode mode, int scale)
{
if (riscv_v_ext_vector_mode_p (mode))
return riscv_vector_chunks * scale;
return scale;
}
/* Return true if X is a valid address for machine mode MODE. If it is,
fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
effect. */

View file

@ -1025,6 +1025,7 @@ extern unsigned riscv_stack_boundary;
extern unsigned riscv_bytes_per_vector_chunk;
extern poly_uint16 riscv_vector_chunks;
extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int);
extern poly_int64 riscv_v_adjust_precision (enum machine_mode, int);
/* The number of bits and bytes in a RVV vector. */
#define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8))
#define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk))

View file

@ -114,6 +114,7 @@ static struct mode_adjust *adj_alignment;
static struct mode_adjust *adj_format;
static struct mode_adjust *adj_ibit;
static struct mode_adjust *adj_fbit;
static struct mode_adjust *adj_precision;
/* Mode class operations. */
static enum mode_class
@ -819,6 +820,7 @@ make_vector_mode (enum mode_class bclass,
#define ADJUST_NUNITS(M, X) _ADD_ADJUST (nunits, M, X, RANDOM, RANDOM)
#define ADJUST_BYTESIZE(M, X) _ADD_ADJUST (bytesize, M, X, RANDOM, RANDOM)
#define ADJUST_ALIGNMENT(M, X) _ADD_ADJUST (alignment, M, X, RANDOM, RANDOM)
#define ADJUST_PRECISION(M, X) _ADD_ADJUST (precision, M, X, RANDOM, RANDOM)
#define ADJUST_FLOAT_FORMAT(M, X) _ADD_ADJUST (format, M, X, FLOAT, FLOAT)
#define ADJUST_IBIT(M, X) _ADD_ADJUST (ibit, M, X, ACCUM, UACCUM)
#define ADJUST_FBIT(M, X) _ADD_ADJUST (fbit, M, X, FRACT, UACCUM)
@ -1794,6 +1796,7 @@ emit_real_format_for_mode (void)
static void
emit_mode_adjustments (void)
{
int c;
struct mode_adjust *a;
struct mode_data *m;
@ -1829,8 +1832,9 @@ emit_mode_adjustments (void)
" (mode_precision[E_%smode], mode_nunits[E_%smode]);\n",
m->name, m->name);
printf (" mode_precision[E_%smode] = ps * old_factor;\n", m->name);
printf (" mode_size[E_%smode] = exact_div (mode_precision[E_%smode],"
" BITS_PER_UNIT);\n", m->name, m->name);
printf (" if (!multiple_p (mode_precision[E_%smode],"
" BITS_PER_UNIT, &mode_size[E_%smode]))\n", m->name, m->name);
printf (" mode_size[E_%smode] = -1;\n", m->name);
printf (" mode_nunits[E_%smode] = ps;\n", m->name);
printf (" adjust_mode_mask (E_%smode);\n", m->name);
printf (" }\n");
@ -1963,6 +1967,26 @@ emit_mode_adjustments (void)
printf ("\n /* %s:%d */\n REAL_MODE_FORMAT (E_%smode) = %s;\n",
a->file, a->line, a->mode->name, a->adjustment);
/* Adjust precision to the actual bits size. */
for (a = adj_precision; a; a = a->next)
switch (a->mode->cl)
{
case MODE_VECTOR_BOOL:
printf ("\n /* %s:%d. */\n ps = %s;\n", a->file, a->line,
a->adjustment);
printf (" mode_precision[E_%smode] = ps;\n", a->mode->name);
break;
default:
internal_error ("invalid use of ADJUST_PRECISION for mode %s",
a->mode->name);
/* NOTREACHED. */
}
/* Ensure there is no mode size equals -1. */
for_all_modes (c, m)
printf ("\n gcc_assert (maybe_ne (mode_size[E_%smode], -1));\n",
m->name);
puts ("}");
}

View file

@ -0,0 +1,68 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
#include "riscv_vector.h"
void
test_vbool1_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
vbool1_t v1 = *(vbool1_t*)in;
vbool2_t v2 = *(vbool2_t*)in;
*(vbool1_t*)(out + 100) = v1;
*(vbool2_t*)(out + 200) = v2;
}
void
test_vbool1_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
vbool1_t v1 = *(vbool1_t*)in;
vbool4_t v2 = *(vbool4_t*)in;
*(vbool1_t*)(out + 100) = v1;
*(vbool4_t*)(out + 200) = v2;
}
void
test_vbool1_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
vbool1_t v1 = *(vbool1_t*)in;
vbool8_t v2 = *(vbool8_t*)in;
*(vbool1_t*)(out + 100) = v1;
*(vbool8_t*)(out + 200) = v2;
}
void
test_vbool1_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
vbool1_t v1 = *(vbool1_t*)in;
vbool16_t v2 = *(vbool16_t*)in;
*(vbool1_t*)(out + 100) = v1;
*(vbool16_t*)(out + 200) = v2;
}
void
test_vbool1_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
vbool1_t v1 = *(vbool1_t*)in;
vbool32_t v2 = *(vbool32_t*)in;
*(vbool1_t*)(out + 100) = v1;
*(vbool32_t*)(out + 200) = v2;
}
void
test_vbool1_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
vbool1_t v1 = *(vbool1_t*)in;
vbool64_t v2 = *(vbool64_t*)in;
*(vbool1_t*)(out + 100) = v1;
*(vbool64_t*)(out + 200) = v2;
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 18 } } */

View file

@ -0,0 +1,68 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
#include "riscv_vector.h"
void
test_vbool2_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
vbool2_t v1 = *(vbool2_t*)in;
vbool1_t v2 = *(vbool1_t*)in;
*(vbool2_t*)(out + 100) = v1;
*(vbool1_t*)(out + 200) = v2;
}
void
test_vbool2_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
vbool2_t v1 = *(vbool2_t*)in;
vbool4_t v2 = *(vbool4_t*)in;
*(vbool2_t*)(out + 100) = v1;
*(vbool4_t*)(out + 200) = v2;
}
void
test_vbool2_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
vbool2_t v1 = *(vbool2_t*)in;
vbool8_t v2 = *(vbool8_t*)in;
*(vbool2_t*)(out + 100) = v1;
*(vbool8_t*)(out + 200) = v2;
}
void
test_vbool2_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
vbool2_t v1 = *(vbool2_t*)in;
vbool16_t v2 = *(vbool16_t*)in;
*(vbool2_t*)(out + 100) = v1;
*(vbool16_t*)(out + 200) = v2;
}
void
test_vbool2_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
vbool2_t v1 = *(vbool2_t*)in;
vbool32_t v2 = *(vbool32_t*)in;
*(vbool2_t*)(out + 100) = v1;
*(vbool32_t*)(out + 200) = v2;
}
void
test_vbool2_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
vbool2_t v1 = *(vbool2_t*)in;
vbool64_t v2 = *(vbool64_t*)in;
*(vbool2_t*)(out + 100) = v1;
*(vbool64_t*)(out + 200) = v2;
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 6 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 17 } } */

View file

@ -0,0 +1,68 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
#include "riscv_vector.h"
void
test_vbool4_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
vbool4_t v1 = *(vbool4_t*)in;
vbool1_t v2 = *(vbool1_t*)in;
*(vbool4_t*)(out + 100) = v1;
*(vbool1_t*)(out + 200) = v2;
}
void
test_vbool4_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
vbool4_t v1 = *(vbool4_t*)in;
vbool2_t v2 = *(vbool2_t*)in;
*(vbool4_t*)(out + 100) = v1;
*(vbool2_t*)(out + 200) = v2;
}
void
test_vbool4_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
vbool4_t v1 = *(vbool4_t*)in;
vbool8_t v2 = *(vbool8_t*)in;
*(vbool4_t*)(out + 100) = v1;
*(vbool8_t*)(out + 200) = v2;
}
void
test_vbool4_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
vbool4_t v1 = *(vbool4_t*)in;
vbool16_t v2 = *(vbool16_t*)in;
*(vbool4_t*)(out + 100) = v1;
*(vbool16_t*)(out + 200) = v2;
}
void
test_vbool4_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
vbool4_t v1 = *(vbool4_t*)in;
vbool32_t v2 = *(vbool32_t*)in;
*(vbool4_t*)(out + 100) = v1;
*(vbool32_t*)(out + 200) = v2;
}
void
test_vbool4_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
vbool4_t v1 = *(vbool4_t*)in;
vbool64_t v2 = *(vbool64_t*)in;
*(vbool4_t*)(out + 100) = v1;
*(vbool64_t*)(out + 200) = v2;
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 6 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 16 } } */

View file

@ -0,0 +1,68 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
#include "riscv_vector.h"
void
test_vbool8_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
vbool8_t v1 = *(vbool8_t*)in;
vbool1_t v2 = *(vbool1_t*)in;
*(vbool8_t*)(out + 100) = v1;
*(vbool1_t*)(out + 200) = v2;
}
void
test_vbool8_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
vbool8_t v1 = *(vbool8_t*)in;
vbool2_t v2 = *(vbool2_t*)in;
*(vbool8_t*)(out + 100) = v1;
*(vbool2_t*)(out + 200) = v2;
}
void
test_vbool8_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
vbool8_t v1 = *(vbool8_t*)in;
vbool4_t v2 = *(vbool4_t*)in;
*(vbool8_t*)(out + 100) = v1;
*(vbool4_t*)(out + 200) = v2;
}
void
test_vbool8_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
vbool8_t v1 = *(vbool8_t*)in;
vbool16_t v2 = *(vbool16_t*)in;
*(vbool8_t*)(out + 100) = v1;
*(vbool16_t*)(out + 200) = v2;
}
void
test_vbool8_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
vbool8_t v1 = *(vbool8_t*)in;
vbool32_t v2 = *(vbool32_t*)in;
*(vbool8_t*)(out + 100) = v1;
*(vbool32_t*)(out + 200) = v2;
}
void
test_vbool8_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
vbool8_t v1 = *(vbool8_t*)in;
vbool64_t v2 = *(vbool64_t*)in;
*(vbool8_t*)(out + 100) = v1;
*(vbool64_t*)(out + 200) = v2;
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 6 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */

View file

@ -0,0 +1,68 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
#include "riscv_vector.h"
void
test_vbool16_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
vbool16_t v1 = *(vbool16_t*)in;
vbool1_t v2 = *(vbool1_t*)in;
*(vbool16_t*)(out + 100) = v1;
*(vbool1_t*)(out + 200) = v2;
}
void
test_vbool16_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
vbool16_t v1 = *(vbool16_t*)in;
vbool2_t v2 = *(vbool2_t*)in;
*(vbool16_t*)(out + 100) = v1;
*(vbool2_t*)(out + 200) = v2;
}
void
test_vbool16_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
vbool16_t v1 = *(vbool16_t*)in;
vbool4_t v2 = *(vbool4_t*)in;
*(vbool16_t*)(out + 100) = v1;
*(vbool4_t*)(out + 200) = v2;
}
void
test_vbool16_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
vbool16_t v1 = *(vbool16_t*)in;
vbool8_t v2 = *(vbool8_t*)in;
*(vbool16_t*)(out + 100) = v1;
*(vbool8_t*)(out + 200) = v2;
}
void
test_vbool16_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
vbool16_t v1 = *(vbool16_t*)in;
vbool32_t v2 = *(vbool32_t*)in;
*(vbool16_t*)(out + 100) = v1;
*(vbool32_t*)(out + 200) = v2;
}
void
test_vbool16_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
vbool16_t v1 = *(vbool16_t*)in;
vbool64_t v2 = *(vbool64_t*)in;
*(vbool16_t*)(out + 100) = v1;
*(vbool64_t*)(out + 200) = v2;
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 6 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */

View file

@ -0,0 +1,68 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
#include "riscv_vector.h"
void
test_vbool32_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
vbool32_t v1 = *(vbool32_t*)in;
vbool1_t v2 = *(vbool1_t*)in;
*(vbool32_t*)(out + 100) = v1;
*(vbool1_t*)(out + 200) = v2;
}
void
test_vbool32_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
vbool32_t v1 = *(vbool32_t*)in;
vbool2_t v2 = *(vbool2_t*)in;
*(vbool32_t*)(out + 100) = v1;
*(vbool2_t*)(out + 200) = v2;
}
void
test_vbool32_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
vbool32_t v1 = *(vbool32_t*)in;
vbool4_t v2 = *(vbool4_t*)in;
*(vbool32_t*)(out + 100) = v1;
*(vbool4_t*)(out + 200) = v2;
}
void
test_vbool32_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
vbool32_t v1 = *(vbool32_t*)in;
vbool8_t v2 = *(vbool8_t*)in;
*(vbool32_t*)(out + 100) = v1;
*(vbool8_t*)(out + 200) = v2;
}
void
test_vbool32_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
vbool32_t v1 = *(vbool32_t*)in;
vbool16_t v2 = *(vbool16_t*)in;
*(vbool32_t*)(out + 100) = v1;
*(vbool16_t*)(out + 200) = v2;
}
void
test_vbool32_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
vbool32_t v1 = *(vbool32_t*)in;
vbool64_t v2 = *(vbool64_t*)in;
*(vbool32_t*)(out + 100) = v1;
*(vbool64_t*)(out + 200) = v2;
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 6 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 13 } } */

View file

@ -0,0 +1,68 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
#include "riscv_vector.h"
void
test_vbool64_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
vbool64_t v1 = *(vbool64_t*)in;
vbool1_t v2 = *(vbool1_t*)in;
*(vbool64_t*)(out + 100) = v1;
*(vbool1_t*)(out + 200) = v2;
}
void
test_vbool64_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
vbool64_t v1 = *(vbool64_t*)in;
vbool2_t v2 = *(vbool2_t*)in;
*(vbool64_t*)(out + 100) = v1;
*(vbool2_t*)(out + 200) = v2;
}
void
test_vbool64_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
vbool64_t v1 = *(vbool64_t*)in;
vbool4_t v2 = *(vbool4_t*)in;
*(vbool64_t*)(out + 100) = v1;
*(vbool4_t*)(out + 200) = v2;
}
void
test_vbool64_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
vbool64_t v1 = *(vbool64_t*)in;
vbool8_t v2 = *(vbool8_t*)in;
*(vbool64_t*)(out + 100) = v1;
*(vbool8_t*)(out + 200) = v2;
}
void
test_vbool64_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
vbool64_t v1 = *(vbool64_t*)in;
vbool16_t v2 = *(vbool16_t*)in;
*(vbool64_t*)(out + 100) = v1;
*(vbool16_t*)(out + 200) = v2;
}
void
test_vbool64_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
vbool64_t v1 = *(vbool64_t*)in;
vbool32_t v2 = *(vbool32_t*)in;
*(vbool64_t*)(out + 100) = v1;
*(vbool32_t*)(out + 200) = v2;
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 6 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */

View file

@ -0,0 +1,77 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
#include "riscv_vector.h"
void
test_vbool1_then_vbool1(int8_t * restrict in, int8_t * restrict out) {
vbool1_t v1 = *(vbool1_t*)in;
vbool1_t v2 = *(vbool1_t*)in;
*(vbool1_t*)(out + 100) = v1;
*(vbool1_t*)(out + 200) = v2;
}
void
test_vbool2_then_vbool2(int8_t * restrict in, int8_t * restrict out) {
vbool2_t v1 = *(vbool2_t*)in;
vbool2_t v2 = *(vbool2_t*)in;
*(vbool2_t*)(out + 100) = v1;
*(vbool2_t*)(out + 200) = v2;
}
void
test_vbool4_then_vbool4(int8_t * restrict in, int8_t * restrict out) {
vbool4_t v1 = *(vbool4_t*)in;
vbool4_t v2 = *(vbool4_t*)in;
*(vbool4_t*)(out + 100) = v1;
*(vbool4_t*)(out + 200) = v2;
}
void
test_vbool8_then_vbool8(int8_t * restrict in, int8_t * restrict out) {
vbool8_t v1 = *(vbool8_t*)in;
vbool8_t v2 = *(vbool8_t*)in;
*(vbool8_t*)(out + 100) = v1;
*(vbool8_t*)(out + 200) = v2;
}
void
test_vbool16_then_vbool16(int8_t * restrict in, int8_t * restrict out) {
vbool16_t v1 = *(vbool16_t*)in;
vbool16_t v2 = *(vbool16_t*)in;
*(vbool16_t*)(out + 100) = v1;
*(vbool16_t*)(out + 200) = v2;
}
void
test_vbool32_then_vbool32(int8_t * restrict in, int8_t * restrict out) {
vbool32_t v1 = *(vbool32_t*)in;
vbool32_t v2 = *(vbool32_t*)in;
*(vbool32_t*)(out + 100) = v1;
*(vbool32_t*)(out + 200) = v2;
}
void
test_vbool64_then_vbool64(int8_t * restrict in, int8_t * restrict out) {
vbool64_t v1 = *(vbool64_t*)in;
vbool64_t v2 = *(vbool64_t*)in;
*(vbool64_t*)(out + 100) = v1;
*(vbool64_t*)(out + 200) = v2;
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 14 } } */