RISC-V: Introduce rounding mode operand into fixed-point intrinsics
According to new comming fixed-point API: https://github.com/riscv-non-isa/rvv-intrinsic-doc/pull/222 Introduce vxrm argument: - vint32m1_t __riscv_vsadd_vv_i32m1 (vint32m1_t op1, vint32m1_t op2, size_t vl); + vint32m1_t __riscv_vsadd_vv_i32m1 (vint32m1_t op1, vint32m1_t op2, size_t vxrm, size_t vl); This patch doesn't insert vxrm csrw configuration instruction yet. Will support automatically insert csrw vxrm instruction in the next patch. This patch does this following: 1. Only extend the vxrm argument. 2. Check vxrm argument is invalid immediate and report error message if it is invalid. gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc: Introduce rounding mode. * config/riscv/riscv-vector-builtins-shapes.cc (struct alu_def): Ditto. (struct narrow_alu_def): Ditto. * config/riscv/riscv-vector-builtins.cc (function_builder::apply_predication): Ditto. (function_expander::use_exact_insn): Ditto. * config/riscv/riscv-vector-builtins.h (function_checker::arg_num): New function. (function_base::has_rounding_mode_operand_p): New function. gcc/testsuite/ChangeLog: * g++.target/riscv/rvv/base/bug-11.C: Adapt testcase. * g++.target/riscv/rvv/base/bug-12.C: Ditto. * g++.target/riscv/rvv/base/bug-14.C: Ditto. * g++.target/riscv/rvv/base/bug-15.C: Ditto. * g++.target/riscv/rvv/base/bug-16.C: Ditto. * g++.target/riscv/rvv/base/bug-17.C: Ditto. * g++.target/riscv/rvv/base/bug-18.C: Ditto. * g++.target/riscv/rvv/base/bug-19.C: Ditto. * g++.target/riscv/rvv/base/bug-20.C: Ditto. * g++.target/riscv/rvv/base/bug-21.C: Ditto. * g++.target/riscv/rvv/base/bug-22.C: Ditto. * g++.target/riscv/rvv/base/bug-23.C: Ditto. * g++.target/riscv/rvv/base/bug-3.C: Ditto. * g++.target/riscv/rvv/base/bug-5.C: Ditto. * g++.target/riscv/rvv/base/bug-6.C: Ditto. * g++.target/riscv/rvv/base/bug-8.C: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-100.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-101.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-102.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-103.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-104.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-105.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-106.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-107.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-108.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-109.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-110.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-111.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-112.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-113.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-114.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-115.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-116.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-117.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-118.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-119.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-122.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-97.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-98.c: Ditto. * gcc.target/riscv/rvv/base/merge_constraint-1.c: Ditto. * gcc.target/riscv/rvv/base/narrow_constraint-6.c: Ditto. * gcc.target/riscv/rvv/base/narrow_constraint-7.c: Ditto. * gcc.target/riscv/rvv/base/narrow_constraint-8.c: Ditto. * gcc.target/riscv/rvv/base/narrow_constraint-9.c: Ditto. * gcc.target/riscv/rvv/base/vxrm-2.c: New test. * gcc.target/riscv/rvv/base/vxrm-3.c: New test. * gcc.target/riscv/rvv/base/vxrm-4.c: New test. * gcc.target/riscv/rvv/base/vxrm-5.c: New test.
This commit is contained in:
parent
f65af1eeef
commit
24bd716811
52 changed files with 543 additions and 426 deletions
|
@ -260,6 +260,12 @@ template<rtx_code CODE>
|
|||
class binop : public function_base
|
||||
{
|
||||
public:
|
||||
bool has_rounding_mode_operand_p () const override
|
||||
{
|
||||
return CODE == SS_PLUS || CODE == SS_MINUS || CODE == US_PLUS
|
||||
|| CODE == US_MINUS;
|
||||
}
|
||||
|
||||
rtx expand (function_expander &e) const override
|
||||
{
|
||||
switch (e.op_info->op)
|
||||
|
@ -596,6 +602,8 @@ template<int UNSPEC>
|
|||
class sat_op : public function_base
|
||||
{
|
||||
public:
|
||||
bool has_rounding_mode_operand_p () const override { return true; }
|
||||
|
||||
rtx expand (function_expander &e) const override
|
||||
{
|
||||
switch (e.op_info->op)
|
||||
|
@ -616,6 +624,8 @@ template<int UNSPEC>
|
|||
class vnclip : public function_base
|
||||
{
|
||||
public:
|
||||
bool has_rounding_mode_operand_p () const override { return true; }
|
||||
|
||||
rtx expand (function_expander &e) const override
|
||||
{
|
||||
switch (e.op_info->op)
|
||||
|
|
|
@ -211,6 +211,19 @@ struct alu_def : public build_base
|
|||
b.append_name (predication_suffixes[instance.pred]);
|
||||
return b.finish_name ();
|
||||
}
|
||||
|
||||
bool check (function_checker &c) const override
|
||||
{
|
||||
/* Check whether rounding mode argument is a valid immediate. */
|
||||
if (c.base->has_rounding_mode_operand_p ())
|
||||
{
|
||||
if (!c.any_type_float_p ())
|
||||
return c.require_immediate (c.arg_num () - 2, VXRM_RNU, VXRM_ROD);
|
||||
/* TODO: We will support floating-point intrinsic modeling
|
||||
rounding mode in the future. */
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/* widen_alu_def class. Handle vwadd/vwsub. Unlike
|
||||
|
@ -313,6 +326,19 @@ struct narrow_alu_def : public build_base
|
|||
b.append_name (predication_suffixes[instance.pred]);
|
||||
return b.finish_name ();
|
||||
}
|
||||
|
||||
bool check (function_checker &c) const override
|
||||
{
|
||||
/* Check whether rounding mode argument is a valid immediate. */
|
||||
if (c.base->has_rounding_mode_operand_p ())
|
||||
{
|
||||
if (!c.any_type_float_p ())
|
||||
return c.require_immediate (c.arg_num () - 2, VXRM_RNU, VXRM_ROD);
|
||||
/* TODO: We will support floating-point intrinsic modeling
|
||||
rounding mode in the future. */
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/* move_def class. Handle vmv.v.v/vmv.v.x. */
|
||||
|
|
|
@ -2998,6 +2998,10 @@ function_builder::apply_predication (const function_instance &instance,
|
|||
|| instance.pred == PRED_TYPE_tumu || instance.pred == PRED_TYPE_mu)
|
||||
argument_types.quick_insert (0, mask_type);
|
||||
|
||||
/* check if rounding mode parameter need */
|
||||
if (instance.base->has_rounding_mode_operand_p ())
|
||||
argument_types.quick_push (unsigned_type_node);
|
||||
|
||||
/* check if vl parameter need */
|
||||
if (instance.base->apply_vl_p ())
|
||||
argument_types.quick_push (size_type_node);
|
||||
|
@ -3297,7 +3301,17 @@ function_expander::use_exact_insn (insn_code icode)
|
|||
}
|
||||
|
||||
for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
|
||||
add_input_operand (argno);
|
||||
{
|
||||
if (base->has_rounding_mode_operand_p ()
|
||||
&& argno == call_expr_nargs (exp) - 2)
|
||||
{
|
||||
/* Since the rounding mode argument position is not consistent with
|
||||
the instruction pattern, we need to skip rounding mode argument
|
||||
here. */
|
||||
continue;
|
||||
}
|
||||
add_input_operand (argno);
|
||||
}
|
||||
|
||||
if (base->apply_tail_policy_p ())
|
||||
add_input_operand (Pmode, get_tail_policy_for_pred (pred));
|
||||
|
@ -3307,6 +3321,9 @@ function_expander::use_exact_insn (insn_code icode)
|
|||
if (base->apply_vl_p ())
|
||||
add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMAX));
|
||||
|
||||
if (base->has_rounding_mode_operand_p ())
|
||||
add_input_operand (call_expr_nargs (exp) - 2);
|
||||
|
||||
/* TODO: Currently, we don't support intrinsic that is modeling rounding mode.
|
||||
We add default rounding mode for the intrinsics that didn't model rounding
|
||||
mode yet. */
|
||||
|
|
|
@ -413,6 +413,9 @@ public:
|
|||
/* Return true if intrinsics has merge operand. */
|
||||
virtual bool has_merge_operand_p () const;
|
||||
|
||||
/* Return true if intrinsics has rounding mode operand. */
|
||||
virtual bool has_rounding_mode_operand_p () const;
|
||||
|
||||
/* Try to fold the given gimple call. Return the new gimple statement
|
||||
on success, otherwise return null. */
|
||||
virtual gimple *fold (gimple_folder &) const { return NULL; }
|
||||
|
@ -434,6 +437,7 @@ public:
|
|||
|
||||
machine_mode arg_mode (unsigned int) const;
|
||||
machine_mode ret_mode (void) const;
|
||||
unsigned int arg_num (void) const;
|
||||
bool check (void);
|
||||
|
||||
bool require_immediate (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT) const;
|
||||
|
@ -600,6 +604,12 @@ function_checker::ret_mode () const
|
|||
return TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
|
||||
}
|
||||
|
||||
inline unsigned int
|
||||
function_checker::arg_num () const
|
||||
{
|
||||
return m_nargs;
|
||||
}
|
||||
|
||||
/* Default implementation of function_base::call_properties, with conservatively
|
||||
correct behavior for floating-point instructions. */
|
||||
inline unsigned int
|
||||
|
@ -651,6 +661,14 @@ function_base::has_merge_operand_p () const
|
|||
return true;
|
||||
}
|
||||
|
||||
/* We choose to return false by default since most of the intrinsics does
|
||||
not have rounding mode operand. */
|
||||
inline bool
|
||||
function_base::has_rounding_mode_operand_p () const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Since most of intrinsics can be overloaded, we set it true by default. */
|
||||
inline bool
|
||||
function_base::can_be_overloaded_p (enum predication_type_index) const
|
||||
|
|
|
@ -410,7 +410,7 @@ vint8mf8_t var_10 = __riscv_vsra_vv_i8mf8_mu(var_53, var_11, var_13, var_54, 1);
|
|||
vint8mf8_t var_1 = __riscv_vmax_vx_i8mf8_mu(var_72, var_10, var_10, var_9, 1);
|
||||
// 5, 1
|
||||
|
||||
vint8mf8_t var_0 = __riscv_vssra_vx_i8mf8(var_1, var_85, 1);
|
||||
vint8mf8_t var_0 = __riscv_vssra_vx_i8mf8(var_1, var_85, 0, 1);
|
||||
// 5
|
||||
|
||||
vbool64_t var_2 = __riscv_vmsbc_vx_i8mf8_b64(var_0, var_3, 1);
|
||||
|
|
|
@ -335,7 +335,7 @@ vbool32_t var_14 = __riscv_vmseq_vv_u32m1_b32_mu(var_39, var_40, var_41, var_42,
|
|||
// 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
__riscv_vsetvl_e32m8(27);
|
||||
vint32m8_t var_0 = __riscv_vssub_vx_i32m8(var_59, var_1, 27);
|
||||
vint32m8_t var_0 = __riscv_vssub_vx_i32m8(var_59, var_1, 0, 27);
|
||||
// -1061068412, -1776580354, -100935733, 1111812123, 840849367, 1454689778, -1416961586, 286847306, 2118070565, 1965230406, -1040658036, 587048909, 1667471177, -1452995359, 1549864288, 1955648606, -1153689461, -105253108, 1792194502, -341148625, 630712685, -1367196047, 1561028022, -599776667, 1447136930, -480839967, -1960624419
|
||||
|
||||
__riscv_vsetvl_e32m8(19);
|
||||
|
@ -359,7 +359,7 @@ __riscv_vse32_v_i32m8(var_70, var_4, 10);
|
|||
__riscv_vsetvl_e32m8(27);
|
||||
__riscv_vse32_v_i32m8(var_74, var_10, 27);
|
||||
__riscv_vsetvl_e32m8(19);
|
||||
vint32m8_t var_2 = __riscv_vaadd_vx_i32m8_mu(var_8, var_0, var_57, var_11, 19);
|
||||
vint32m8_t var_2 = __riscv_vaadd_vx_i32m8_mu(var_8, var_0, var_57, var_11, 0, 19);
|
||||
// 359557953, 197431454, 20431512, -1122683440, 434907211, -719883824, 37657602, -782537125, -106566459, -1084448745, -945878036, -626712270, 778335544, -755412905, -574020956, -1028523912, 458052219, -1166885074, 732449389, -341148625, 630712685, -1367196047, 1561028022, -599776667, 1447136930, -480839967, -1960624419
|
||||
|
||||
if(!check(var_70, var_114, var_115)) {cerr << "check 113 fails" << endl; return_value = 1;}
|
||||
|
@ -372,7 +372,7 @@ vint32m1_t var_6 = __riscv_vredmin_vs_i32m8_i32m1_tu(var_18, var_2, var_18, 3);
|
|||
// 20431512
|
||||
|
||||
__riscv_vsetvl_e32m8(10);
|
||||
vint32m8_t var_9 = __riscv_vasub_vv_i32m8(var_2, var_53, 10);
|
||||
vint32m8_t var_9 = __riscv_vasub_vv_i32m8(var_2, var_53, 0, 10);
|
||||
// 679936144, 129579879, -377657770, -304070536, 173758693, 371969755, -994446215, -471795897, 314947602, 489622156
|
||||
|
||||
__riscv_vsetvl_e32m8(19);
|
||||
|
@ -394,7 +394,7 @@ vint32m1_t var_12 = __riscv_vredxor_vs_i32m2_i32m1_tum(var_46, var_7, var_47, va
|
|||
// 611390260
|
||||
|
||||
__riscv_vsetvl_e32m8(10);
|
||||
vint32m8_t var_19 = __riscv_vssra_vv_i32m8_mu(var_13, var_20, var_21, var_22, 10);
|
||||
vint32m8_t var_19 = __riscv_vssra_vv_i32m8_mu(var_13, var_20, var_21, var_22, 0, 10);
|
||||
// -816540887, 1074541498, -1467236483, -23091331, -38787, 1943479342, 1158929439, 360172, -218, 2034278775
|
||||
|
||||
if(!check(var_85, var_105, var_106)) {cerr << "check 104 fails" << endl; return_value = 1;}
|
||||
|
|
|
@ -382,7 +382,7 @@ vuint16m2_t var_18 = __riscv_vwmulu_vv_u16m2_mu(var_24, var_25, var_26, var_27,
|
|||
if(!check(var_91, var_142, var_143)) {cerr << "check 141 fails" << endl; return_value = 1;}
|
||||
if(!check(var_90, var_139, var_140)) {cerr << "check 138 fails" << endl; return_value = 1;}
|
||||
__riscv_vsetvl_e64m1(2);
|
||||
vuint64m1_t var_7 = __riscv_vasubu_vx_u64m1(var_42, var_15, 2);
|
||||
vuint64m1_t var_7 = __riscv_vasubu_vx_u64m1(var_42, var_15, 0, 2);
|
||||
// 13578039560782071336, 1484621602351210644
|
||||
|
||||
if(!check(var_94, var_136, var_137)) {cerr << "check 135 fails" << endl; return_value = 1;}
|
||||
|
@ -415,7 +415,7 @@ int32_t var_9 = __riscv_vmv_x_s_i32m4_i32(var_10);
|
|||
// 0
|
||||
|
||||
__riscv_vsetvl_e32mf2(2);
|
||||
vint32mf2_t var_2 = __riscv_vsadd_vx_i32mf2_mu(var_47, var_48, var_49, var_9, 2);
|
||||
vint32mf2_t var_2 = __riscv_vsadd_vx_i32mf2_mu(var_47, var_48, var_49, var_9, 0, 2);
|
||||
// 470559939, 1961139923
|
||||
|
||||
__riscv_vsuxei64_v_i32mf2(var_115, var_112, var_2, 2);
|
||||
|
|
|
@ -341,7 +341,7 @@ vuint32m4_t var_6 = __riscv_vmv_s_x_u32m4_tu(var_0, var_58, 10);
|
|||
// 1207313030, 3014603841, 234827873, 3591973177, 774620885, 1394372191, 643827065, 4045083863, 1674932769, 2206939407, 1193735501, 1704965662, 3397690693, 3455432162, 2782347083
|
||||
|
||||
__riscv_vsetvl_e8m1(15);
|
||||
vuint8m1_t var_16 = __riscv_vnclipu_wx_u8m1_mu(var_25, var_26, var_1, var_56, 15);
|
||||
vuint8m1_t var_16 = __riscv_vnclipu_wx_u8m1_mu(var_25, var_26, var_1, var_56, 0, 15);
|
||||
// 143, 148, 202, 255, 188, 255, 0, 255, 6, 180, 211, 220, 74, 255, 255
|
||||
|
||||
__riscv_vsetvl_e16m2(3);
|
||||
|
|
|
@ -351,7 +351,7 @@ vbool1_t var_66 = __riscv_vmseq_vx_i8m8_b1(var_68, var_69, 98);
|
|||
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
__riscv_vsetvl_e8mf2(8);
|
||||
vuint8mf2_t var_19 = __riscv_vasubu_vx_u8mf2_tumu(var_20, var_21, var_22, var_75, 8);
|
||||
vuint8mf2_t var_19 = __riscv_vasubu_vx_u8mf2_tumu(var_20, var_21, var_22, var_75, 0, 8);
|
||||
// 197, 206, 42, 228, 104, 250, 255, 186
|
||||
|
||||
vbool16_t var_18 = __riscv_vmfle_vv_f64m4_b16_mu(var_24, var_25, var_26, var_27, 8);
|
||||
|
@ -395,11 +395,11 @@ __riscv_vsetvl_e8m8(120);
|
|||
vint8m8_t var_1 = __riscv_vxor_vv_i8m8_tumu(var_11, var_0, var_2, var_2, 120);
|
||||
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
vuint8m8_t var_8 = __riscv_vasubu_vx_u8m8(var_59, var_13, 120);
|
||||
vuint8m8_t var_8 = __riscv_vasubu_vx_u8m8(var_59, var_13, 0, 120);
|
||||
// 218, 246, 239, 246, 238, 166, 19, 14, 242, 1, 241, 218, 249, 179, 214, 204, 166, 219, 245, 179, 179, 4, 224, 178, 202, 253, 206, 163, 230, 251, 213, 25, 19, 195, 239, 168, 239, 17, 239, 205, 173, 251, 241, 202, 219, 223, 17, 162, 3, 6, 13, 17, 170, 229, 178, 246, 180, 249, 195, 250, 241, 229, 20, 249, 19, 174, 198, 221, 200, 11, 177, 160, 180, 216, 11, 19, 163, 17, 209, 174, 1, 9, 208, 241, 169, 190, 176, 19, 187, 198, 213, 208, 22, 4, 237, 180, 0, 188, 204, 203, 173, 188, 28, 180, 162, 218, 227, 160, 230, 214, 177, 172, 255, 15, 207, 199, 20, 165, 180, 206
|
||||
|
||||
__riscv_vsetvl_e8m8(31);
|
||||
vint8m8_t var_17 = __riscv_vssra_vx_i8m8_tumu(var_31, var_12, var_32, var_76, 31);
|
||||
vint8m8_t var_17 = __riscv_vssra_vx_i8m8_tumu(var_31, var_12, var_32, var_76, 0, 31);
|
||||
// 41, 69, -57, 102, 86, 103, -128, 4, -118, -1, 109, 40, 7, 27, 79, -63, 35, 73, 1, 42, -85, 126, 107, 53, -114, 39, 53, 10, -94, -20, 125, -46, -52, 94, 14, -74, -97, 25, -59, 3, 68, -15, -60, 83, 80, -113, -90, -118, 7, -38, -57, -114, 88, -76, 8, 44, 45, 12, -27, 83, 43, 77, -93, 79, 6, -17, 93, 33, 22, 6, 113, -5, -13, 20, -106, -36, -57, -21, -127, -89, 102, -62, -92, -124, 73, 118, 41, -120, 94, -100, 13, -34, -86, -103, 26, -57, -16, 22, -48, -71, 15, 40, 27, -125, -94, -93, -93, -3, -33, 60, 15, -29, -16, 70, -15, 30, 108, -105, 30, -65
|
||||
|
||||
__riscv_vsetvl_e8m8(98);
|
||||
|
@ -407,7 +407,7 @@ vbool1_t var_6 = __riscv_vmseq_vv_i8m8_b1(var_5, var_10, 98);
|
|||
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
__riscv_vsetvl_e8m8(120);
|
||||
vint8m8_t var_4 = __riscv_vssra_vv_i8m8_tumu(var_63, var_1, var_10, var_8, 120);
|
||||
vint8m8_t var_4 = __riscv_vssra_vv_i8m8_tumu(var_63, var_1, var_10, var_8, 0, 120);
|
||||
// 15, -2, 0, 0, -1, 0, 0, -2, 18, 0, 1, 0, 2, 10, 2, 0, 0, -9, 0, 0, 0, 0, 1, 0, 31, 2, 0, -5, 0, 3, -2, -17, -12, 0, 0, -1, 1, 47, -1, 0, 0, -1, 33, 0, 0, 1, 0, 0, -10, 0, 0, 61, 18, -3, 0, -1, 0, 0, 0, 14, 0, 3, 0, 0, 0, 0, 0, 0, -59, 0, 0, 55, 0, 11, 14, 0, 0, 0, 1, 0, 0, 0, -1, 17, 0, 2, 0, 0, -5, -1, 1, 0, 1, 6, 0, 0, 107, 0, 7, 0, 0, 0, 5, 7, 0, 0, 0, -1, 0, 0, -8, 0, 0, 0, -1, 0, -8, 1, 0, 0
|
||||
|
||||
__riscv_vsetvl_e8m8(31);
|
||||
|
|
|
@ -375,7 +375,7 @@ vuint16m1_t var_8 = __riscv_vor_vv_u16m1_tumu(var_1, var_43, var_44, var_45, 1);
|
|||
vfloat32m2_t var_12 = __riscv_vfdiv_vv_f32m2_tumu(var_1, var_16, var_16, var_18, 1);
|
||||
// 4.841275101341818e-29, 4.841275101341818e-29, 4.841275101341818e-29, 4.841275101341818e-29, 4.841275101341818e-29
|
||||
|
||||
vint8mf2_t var_19 = __riscv_vaadd_vv_i8mf2_tumu(var_1, var_20, var_21, var_22, 1);
|
||||
vint8mf2_t var_19 = __riscv_vaadd_vv_i8mf2_tumu(var_1, var_20, var_21, var_22, 0, 1);
|
||||
// -108, 37, -34
|
||||
|
||||
__riscv_vse8_v_i8mf2(var_66, var_6, 1);
|
||||
|
|
|
@ -121,7 +121,7 @@ asm volatile ("ttt":::"memory");
|
|||
if (check(k, ab, aa))
|
||||
cerr << "check 8 fails" << endl;
|
||||
vbool64_t var_2 = __riscv_vmsne_vx_u32mf2_b64_mu(var_55, var_56, var_3, au, 2);
|
||||
vint16mf4_t var_1 = __riscv_vssub_vv_i16mf4_mu(var_2, var_0, var_4, cg, 2);
|
||||
vint16mf4_t var_1 = __riscv_vssub_vv_i16mf4_mu(var_2, var_0, var_4, cg, 0, 2);
|
||||
vint16mf4_t var_5 = __riscv_vxor_vv_i16mf4_mu(var_46, var_1, bw, bx, 2);
|
||||
vint32mf2_t var_18 = __riscv_vwmaccsu_vv_i32mf2(bf, var_1, bg, 2);
|
||||
vint8mf8_t var_6 = __riscv_vncvt_x_x_w_i8mf8_mu(var_8, var_7, var_5, 1);
|
||||
|
|
|
@ -131,7 +131,7 @@
|
|||
vuint64m1_t var_8 = __riscv_vredand_vs_u64m8_u64m1_tum(var_13, var_58, var_0, var_59, 1);
|
||||
__riscv_vse64_v_i64m8(var_74, var_3, 2);
|
||||
vuint64m8_t var_10 = __riscv_vmadd_vv_u64m8_mu(var_13, var_6, var_51, var_52, 13);
|
||||
vuint64m8_t var_15 = __riscv_vssubu_vv_u64m8_mu(var_33, var_6, var_34, var_35, 13);
|
||||
vuint64m8_t var_15 = __riscv_vssubu_vv_u64m8_mu(var_33, var_6, var_34, var_35, 0, 13);
|
||||
vuint64m1_t var_9 = __riscv_vadd_vv_u64m1_mu(var_54, var_8, var_55, var_56, 1);
|
||||
vuint64m1_t var_11 = __riscv_vredxor_vs_u64m4_u64m1_tum(var_46, var_8, var_47, var_48, 1);
|
||||
if(!check(var_74, var_129, var_130)) {cerr << "check 128 fails" << endl; return_value = 1;}
|
||||
|
|
|
@ -131,7 +131,7 @@
|
|||
vuint64m1_t var_8 = __riscv_vredand_vs_u64m8_u64m1_tum(var_13, var_58, var_0, var_59, 1);
|
||||
__riscv_vse64_v_i64m8(var_74, var_3, 2);
|
||||
vuint64m8_t var_10 = __riscv_vmacc_vv_u64m8_mu(var_13, var_6, var_51, var_52, 13);
|
||||
vuint64m8_t var_15 = __riscv_vssubu_vv_u64m8_mu(var_33, var_6, var_34, var_35, 13);
|
||||
vuint64m8_t var_15 = __riscv_vssubu_vv_u64m8_mu(var_33, var_6, var_34, var_35, 0, 13);
|
||||
vuint64m1_t var_9 = __riscv_vadd_vv_u64m1_mu(var_54, var_8, var_55, var_56, 1);
|
||||
vuint64m1_t var_11 = __riscv_vredxor_vs_u64m4_u64m1_tum(var_46, var_8, var_47, var_48, 1);
|
||||
if(!check(var_74, var_129, var_130)) {cerr << "check 128 fails" << endl; return_value = 1;}
|
||||
|
|
|
@ -131,7 +131,7 @@
|
|||
vuint64m1_t var_8 = __riscv_vredand_vs_u64m8_u64m1_tum(var_13, var_58, var_0, var_59, 1);
|
||||
__riscv_vse64_v_i64m8(var_74, var_3, 2);
|
||||
vuint64m8_t var_10 = __riscv_vnmsub_vv_u64m8_mu(var_13, var_6, var_51, var_52, 13);
|
||||
vuint64m8_t var_15 = __riscv_vssubu_vv_u64m8_mu(var_33, var_6, var_34, var_35, 13);
|
||||
vuint64m8_t var_15 = __riscv_vssubu_vv_u64m8_mu(var_33, var_6, var_34, var_35, 0, 13);
|
||||
vuint64m1_t var_9 = __riscv_vadd_vv_u64m1_mu(var_54, var_8, var_55, var_56, 1);
|
||||
vuint64m1_t var_11 = __riscv_vredxor_vs_u64m4_u64m1_tum(var_46, var_8, var_47, var_48, 1);
|
||||
if(!check(var_74, var_129, var_130)) {cerr << "check 128 fails" << endl; return_value = 1;}
|
||||
|
|
|
@ -131,7 +131,7 @@
|
|||
vuint64m1_t var_8 = __riscv_vredand_vs_u64m8_u64m1_tum(var_13, var_58, var_0, var_59, 1);
|
||||
__riscv_vse64_v_i64m8(var_74, var_3, 2);
|
||||
vuint64m8_t var_10 = __riscv_vnmsac_vv_u64m8_mu(var_13, var_6, var_51, var_52, 13);
|
||||
vuint64m8_t var_15 = __riscv_vssubu_vv_u64m8_mu(var_33, var_6, var_34, var_35, 13);
|
||||
vuint64m8_t var_15 = __riscv_vssubu_vv_u64m8_mu(var_33, var_6, var_34, var_35, 0, 13);
|
||||
vuint64m1_t var_9 = __riscv_vadd_vv_u64m1_mu(var_54, var_8, var_55, var_56, 1);
|
||||
vuint64m1_t var_11 = __riscv_vredxor_vs_u64m4_u64m1_tum(var_46, var_8, var_47, var_48, 1);
|
||||
if(!check(var_74, var_129, var_130)) {cerr << "check 128 fails" << endl; return_value = 1;}
|
||||
|
|
|
@ -354,7 +354,7 @@ vbool64_t var_63 = __riscv_vmseq_vx_u8mf8_b64(var_69, var_70, 2);
|
|||
vuint8mf8_t var_19 = __riscv_vsub_vx_u8mf8_tumu(var_20, var_21, var_22, var_73, 2);
|
||||
// 225, 96
|
||||
|
||||
vuint32mf2_t var_16 = __riscv_vssubu_vx_u32mf2_tumu(var_33, var_34, var_35, var_74, 2);
|
||||
vuint32mf2_t var_16 = __riscv_vssubu_vx_u32mf2_tumu(var_33, var_34, var_35, var_74, 0, 2);
|
||||
// 3077557042, 4186139873
|
||||
|
||||
__riscv_vsetvl_e64m4(2);
|
||||
|
|
|
@ -309,7 +309,7 @@ __riscv_vsetvl_e32m2(8);
|
|||
vbool16_t var_49 = __riscv_vmseq_vv_i32m2_b16(var_50, var_51, 8);
|
||||
// 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
vint16m1_t var_13 = __riscv_vsadd_vx_i16m1(var_0, var_60, 8);
|
||||
vint16m1_t var_13 = __riscv_vsadd_vx_i16m1(var_0, var_60, 0, 8);
|
||||
// -9364, 32767, 11538, -10536, 32767, 30906, 30906, 4977
|
||||
|
||||
__riscv_vsetvl_e16m8(7);
|
||||
|
@ -328,7 +328,7 @@ vuint32m2_t var_12 = __riscv_vfcvt_rtz_xu_f_v_u32m2_mu(var_35, var_36, var_37, 8
|
|||
|
||||
__riscv_vse16_v_i16m1(var_79, var_13, 8);
|
||||
__riscv_vsetvl_e16m8(7);
|
||||
vint16m8_t var_9 = __riscv_vaadd_vx_i16m8_mu(var_15, var_43, var_44, var_63, 7);
|
||||
vint16m8_t var_9 = __riscv_vaadd_vx_i16m8_mu(var_15, var_43, var_44, var_63, 0, 7);
|
||||
// -6442, 2757, 1437, -18340, -12668, -27551, 29648
|
||||
|
||||
__riscv_vsetvl_e32m2(8);
|
||||
|
@ -347,7 +347,7 @@ vint16m8_t var_4 = __riscv_vmerge_vxm_i16m8(var_48, var_8, var_11, 7);
|
|||
// -6442, -6442, -6442, -6442, -6442, -6442, -6442
|
||||
|
||||
__riscv_vsetvl_e16m1(1);
|
||||
vint16m1_t var_6 = __riscv_vaadd_vx_i16m1(var_14, var_8, 1);
|
||||
vint16m1_t var_6 = __riscv_vaadd_vx_i16m1(var_14, var_8, 0, 1);
|
||||
// -6554
|
||||
|
||||
if(!check(var_96, var_107, var_108)) {cerr << "check 106 fails" << endl; return_value = 1;}
|
||||
|
|
|
@ -304,7 +304,7 @@ vuint8mf8_t var_59 = __riscv_vle8_v_u8mf8(var_66, 1);
|
|||
// 54
|
||||
|
||||
__riscv_vsetvl_e8mf8(2);
|
||||
vint8mf8_t var_19 = __riscv_vsmul_vx_i8mf8(var_20, var_63, 2);
|
||||
vint8mf8_t var_19 = __riscv_vsmul_vx_i8mf8(var_20, var_63, 0,2);
|
||||
// 79, 28
|
||||
|
||||
__riscv_vsetvl_e16mf2(2);
|
||||
|
@ -358,7 +358,7 @@ vbool64_t var_0 = __riscv_vmsne_vx_i16mf4_b64(var_9, var_16, 2);
|
|||
// 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
__riscv_vsetvl_e8mf8(1);
|
||||
vuint8mf8_t var_1 = __riscv_vnclipu_wx_u8mf8_mu(var_0, var_59, var_2, var_65, 1);
|
||||
vuint8mf8_t var_1 = __riscv_vnclipu_wx_u8mf8_mu(var_0, var_59, var_2, var_65, 0, 1);
|
||||
// 255
|
||||
|
||||
__riscv_vsetvl_e8mf8(2);
|
||||
|
|
|
@ -292,7 +292,7 @@ vint8m8_t var_10 = __riscv_vmulh_vv_i8m8(var_11, var_38, 45);
|
|||
// -9, -3, 9, -3, 3, -35, 5, 3, 0, 17, -1, -10, 6, -10, 21, -18, 37, 24, 15, -8, -29, 18, 0, -7, -6, -2, -46, 44, 3, -5, -6, -9, 21, -3, -42, -9, 9, -12, -2, -18, 7, 4, -1, -1, 39
|
||||
|
||||
if(!check(var_62, var_80, var_81)) {cerr << "check 79 fails" << endl; return_value = 1;}
|
||||
vint8m8_t var_8 = __riscv_vasub_vx_i8m8(var_10, var_12, 45);
|
||||
vint8m8_t var_8 = __riscv_vasub_vx_i8m8(var_10, var_12, 0, 45);
|
||||
// -32, -29, -23, -29, -26, -45, -25, -26, -28, -19, -28, -33, -25, -33, -17, -37, -9, -16, -20, -32, -42, -19, -28, -31, -31, -29, -51, -6, -26, -30, -31, -32, -17, -29, -49, -32, -23, -34, -29, -37, -24, -26, -28, -28, -8
|
||||
|
||||
__riscv_vse8_v_i8m8_m(var_15, var_63, var_8, 45);
|
||||
|
|
|
@ -379,7 +379,7 @@ if(!check(var_87, var_122, var_123)) {cerr << "check 121 fails" << endl; return_
|
|||
vint8m4_t var_5 = __riscv_vnsra_wv_i8m4(var_12, var_48, 43);
|
||||
// 0, -2, -5, -7, 0, -3, -1, -1, 0, 0, -5, -90, -1, 0, -15, -1, 0, 0, 0, 0, 0, 0, -3, -1, -3, 0, 0, -13, 0, -1, -1, -1, 0, -1, 39, 0, 0, -2, 0, 0, -24, -45, 1
|
||||
|
||||
vint16m8_t var_4 = __riscv_vssub_vx_i16m8_mu(var_6, var_12, var_49, var_10, 43);
|
||||
vint16m8_t var_4 = __riscv_vssub_vx_i16m8_mu(var_6, var_12, var_49, var_10, 0, 43);
|
||||
// -27921, -25052, -17, -20337, 15054, 1382, -12, -16, 16159, -32768, 17832, -12646, 16746, 20, -15, -16, 4, 7798, 14967, 3, -29916, 11, -6168, -32768, 14361, -14023, -32768, -12646, 10, -12646, 18748, -12646, 8473, -32768, -32768, 16, -32768, -14720, -11479, 6985, -24591, -28243, 11
|
||||
|
||||
__riscv_vsetvl_e16m8(16);
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, x, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1_tu (v3, v2, x, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, x, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1_tu (v3, v2, x, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,8 +7,8 @@ void f (void * in, void *out, int32_t x, int n)
|
|||
for (int i = 0; i < n; i++) {
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, x, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1_tu (v3, v2, x, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, x, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1_tu (v3, v2, x, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ void f0 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, -16, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, -16, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, -16, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, -16, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -32,8 +32,8 @@ void f1 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 15, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 15, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 15, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 15, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ void f2 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 16, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 16, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 16, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 16, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -66,8 +66,8 @@ void f3 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 0xAAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAA, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 0xAAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -83,8 +83,8 @@ void f4 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -100,8 +100,8 @@ void f5 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -117,7 +117,7 @@ void f6 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, x, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, x, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, x, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, x, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ void f0 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, -16, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, -16, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, -16, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, -16, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -32,8 +32,8 @@ void f1 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 15, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 15, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 15, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 15, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ void f2 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 16, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 16, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 16, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 16, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ void f3 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 0xAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAA, 0,4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1 (v3, 0xAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1_tu (v3, v2, 0xAAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAA, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1_tu (v3, v2, 0xAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, x, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1_tu (v3, v2, x, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, x, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1_tu (v3, v2, x, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,8 +7,8 @@ void f (void * in, void *out, int32_t x, int n)
|
|||
for (int i = 0; i < n; i++) {
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + i + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + i + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, x, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1_tu (v3, v2, x, 4);
|
||||
vuint64m1_t v3 = __riscv_vsaddu_vx_u64m1 (v2, x, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vsaddu_vx_u64m1_tu (v3, v2, x, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + i + 2, v4, 4);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ void f0 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, -15, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, -15, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, -15, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, -15, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -32,8 +32,8 @@ void f1 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 16, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 16, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 16, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 16, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ void f2 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 17, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 17, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 17, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 17, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -66,8 +66,8 @@ void f3 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 0xAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAA, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 0xAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -83,8 +83,8 @@ void f4 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -100,8 +100,8 @@ void f5 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -117,7 +117,7 @@ void f6 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, x, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, x, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, x, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, x, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ void f0 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, -15, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, -15, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, -15, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, -15, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -32,8 +32,8 @@ void f1 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 16, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 16, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 16, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 16, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ void f2 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 17, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 17, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 17, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 17, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ void f3 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 0xAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAA, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1 (v3, 0xAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAA, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1_tu (v3, v2, 0xAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, x, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1_tu (v3, v2, x, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, x, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1_tu (v3, v2, x, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,8 +7,8 @@ void f (void * in, void *out, int32_t x, int n)
|
|||
for (int i = 0; i < n; i++) {
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + i + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + i + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, x, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1_tu (v3, v2, x, 4);
|
||||
vint64m1_t v3 = __riscv_vssub_vx_i64m1 (v2, x, 0, 4);
|
||||
vint64m1_t v4 = __riscv_vssub_vx_i64m1_tu (v3, v2, x, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out + i + 2, v4, 4);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ void f0 (void * in, void *out, uint64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, -16, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1 (v3, -16, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, -16, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1 (v3, -16, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -32,8 +32,8 @@ void f1 (void * in, void *out, uint64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 15, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1 (v3, 15, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 15, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1 (v3, 15, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ void f2 (void * in, void *out, uint64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 16, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1 (v3, 16, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 16, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1 (v3, 16, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ void f3 (void * in, void *out, uint64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 0xAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1 (v3, 0xAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 0xAAAAAAA, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1 (v3, 0xAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, uint64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 0xAAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1_tu (v3, v2, 0xAAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 0xAAAAAAAA, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1_tu (v3, v2, 0xAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, uint64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1_tu (v3, v2, 0xAAAAAAAAAAAAAAAA, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ void f (void * in, void *out, uint64_t x, int n)
|
|||
{
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, x, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1_tu (v3, v2, x, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, x, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1_tu (v3, v2, x, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,8 +7,8 @@ void f (void * in, void *out, uint64_t x, int n)
|
|||
for (int i = 0; i < n; i++) {
|
||||
vuint64m1_t v = __riscv_vle64_v_u64m1 (in + i + 1, 4);
|
||||
vuint64m1_t v2 = __riscv_vle64_v_u64m1_tu (v, in + i + 2, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, x, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1_tu (v3, v2, x, 4);
|
||||
vuint64m1_t v3 = __riscv_vssubu_vx_u64m1 (v2, x, 0, 4);
|
||||
vuint64m1_t v4 = __riscv_vssubu_vx_u64m1_tu (v3, v2, x, 0, 4);
|
||||
__riscv_vse64_v_u64m1 (out + i + 2, v4, 4);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ void f1 (void * in, void *out, int32_t x)
|
|||
{
|
||||
vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
|
||||
vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
|
||||
vint32m1_t v3 = __riscv_vaadd_vx_i32m1 (v2, 0, 4);
|
||||
vint32m1_t v3 = __riscv_vaadd_vx_i32m1 (v2, 0, 0, 4);
|
||||
__riscv_vse32_v_i32m1 (out, v3, 4);
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@ void f2 (void * in, void *out, int32_t x)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in, 4);
|
||||
vint64m1_t v3 = __riscv_vaadd_vx_i64m1 (v2, 0, 4);
|
||||
vint64m1_t v3 = __riscv_vaadd_vx_i64m1 (v2, 0, 0, 4);
|
||||
__riscv_vse64_v_i64m1 (out, v3, 4);
|
||||
}
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@ void f0 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, -16, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, -16, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, -16, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, -16, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -32,8 +32,8 @@ void f1 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 15, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 15, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 15, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 15, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ void f2 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 16, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 16, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 16, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 16, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -66,8 +66,8 @@ void f3 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 0xAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAAA, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 0xAAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -83,8 +83,8 @@ void f4 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -100,8 +100,8 @@ void f5 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 0xAAAAAAAAAAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -117,7 +117,7 @@ void f6 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, x, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, x, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, x, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, x, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ void f0 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, -16, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, -16, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, -16, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, -16, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -32,8 +32,8 @@ void f1 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 15, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 15, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 15, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 15, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ void f2 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 16, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 16, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 16, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 16, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ void f3 (void * in, void *out, int64_t x, int n)
|
|||
{
|
||||
vint64m1_t v = __riscv_vle64_v_i64m1 (in + 1, 4);
|
||||
vint64m1_t v2 = __riscv_vle64_v_i64m1_tu (v, in + 2, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAA, 4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 0xAAAAAAA, 4);
|
||||
vint64m1_t v3 = __riscv_vsadd_vx_i64m1 (v2, 0xAAAAAAA, 0,4);
|
||||
vint64m1_t v4 = __riscv_vsadd_vx_i64m1 (v3, 0xAAAAAAA, 0,4);
|
||||
__riscv_vse64_v_i64m1 (out + 2, v4, 4);
|
||||
}
|
||||
|
|
|
@ -165,7 +165,7 @@ void f16 (int8_t* base1,int8_t* base2,int8_t* out,int n)
|
|||
{
|
||||
vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
|
||||
for (int i = 0; i < n; i++){
|
||||
v = __riscv_vsadd_vv_i8mf4 (v, v, 32);
|
||||
v = __riscv_vsadd_vv_i8mf4 (v, v, 0, 32);
|
||||
v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
|
||||
}
|
||||
__riscv_vse8_v_i8mf4 (out, v, 32);
|
||||
|
@ -175,7 +175,7 @@ void f17 (int8_t* base1,int8_t* base2,int8_t* out,int n)
|
|||
{
|
||||
vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
|
||||
for (int i = 0; i < n; i++){
|
||||
v = __riscv_vsadd_vx_i8mf4 (v, 100, 32);
|
||||
v = __riscv_vsadd_vx_i8mf4 (v, 100, 0, 32);
|
||||
v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
|
||||
}
|
||||
__riscv_vse8_v_i8mf4 (out, v, 32);
|
||||
|
@ -185,7 +185,7 @@ void f18 (int8_t* base1,int8_t* base2,int8_t* out,int n)
|
|||
{
|
||||
vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
|
||||
for (int i = 0; i < n; i++){
|
||||
v = __riscv_vaadd_vv_i8mf4 (v, v, 32);
|
||||
v = __riscv_vaadd_vv_i8mf4 (v, v, 0, 32);
|
||||
v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
|
||||
}
|
||||
__riscv_vse8_v_i8mf4 (out, v, 32);
|
||||
|
@ -195,7 +195,7 @@ void f19 (int8_t* base1,int8_t* base2,int8_t* out,int n)
|
|||
{
|
||||
vint8mf4_t v = __riscv_vle8_v_i8mf4 (base1, 32);
|
||||
for (int i = 0; i < n; i++){
|
||||
v = __riscv_vaadd_vx_i8mf4 (v, 100, 32);
|
||||
v = __riscv_vaadd_vx_i8mf4 (v, 100, 0, 32);
|
||||
v = __riscv_vle8_v_i8mf4_tu (v, base2, 32);
|
||||
}
|
||||
__riscv_vse8_v_i8mf4 (out, v, 32);
|
||||
|
|
|
@ -6,24 +6,24 @@
|
|||
void f0 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
}
|
||||
|
||||
void f1 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
}
|
||||
|
||||
void f2 (void *base,void *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out+100,src,vl);
|
||||
}
|
||||
|
@ -31,8 +31,8 @@ void f2 (void *base,void *out,size_t vl, size_t shift)
|
|||
void f3 (void *base,void *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out+100,src,vl);
|
||||
}
|
||||
|
@ -41,8 +41,8 @@ void f4 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tumu(m,v,src,v,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tumu(m,v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out+100,src,vl);
|
||||
}
|
||||
|
@ -51,8 +51,8 @@ void f5 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_m(m,src,v,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_m(m,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out+100,src,vl);
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ void f6 (void *base,void *out,size_t vl, size_t shift)
|
|||
vbool64_t m = __riscv_vlm_v_b64 (base + 500, vl);
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8_m(m,src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8_m(m,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v2,vl);
|
||||
__riscv_vse8_v_u8mf8 (out+100,v,vl);
|
||||
}
|
||||
|
@ -71,8 +71,8 @@ void f7 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8 (src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8 (src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v2,vl);
|
||||
__riscv_vse8_v_u8mf8 (out+100,v,vl);
|
||||
}
|
||||
|
@ -81,8 +81,8 @@ void f8 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8 (src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8 (src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v2,vl);
|
||||
__riscv_vse8_v_u8mf8 (out+100,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out+200,src,vl);
|
||||
|
@ -92,8 +92,8 @@ void f9 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v2,vl);
|
||||
__riscv_vse8_v_u8mf8 (out+100,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out+200,src,vl);
|
||||
|
@ -102,11 +102,11 @@ void f9 (void *base,void *out,size_t vl, size_t shift)
|
|||
void f10 (void *base,void *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out+100,src,vl);
|
||||
}
|
||||
|
@ -115,12 +115,12 @@ void f11 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8_tu (v2,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v2,vl);
|
||||
__riscv_vse8_v_u8mf8 (out+100,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out+200,src,vl);
|
||||
|
@ -130,11 +130,11 @@ void f12 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8 (src,v2,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8 (src,v2,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v2,vl);
|
||||
__riscv_vse8_v_u8mf8 (out+100,v,vl);
|
||||
}
|
||||
|
@ -144,8 +144,8 @@ void f13 (void *base,void *base2,void *out,size_t vl, int n)
|
|||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,vl,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,vl,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
|
@ -157,7 +157,7 @@ void f14 (void *base,void *base2,void *out,size_t vl, int n)
|
|||
for (int i = 0; i < n; i++){
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v2,vl);
|
||||
|
@ -170,11 +170,11 @@ void f15 (void *base,void *base2,void *out,size_t vl, int n)
|
|||
for (int i = 0; i < n; i++){
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 600, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v2,vl);
|
||||
|
@ -185,7 +185,7 @@ void f16 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vncvt_x_x_w_u8mf8(src,vl);
|
||||
vuint8mf8_t v3 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v3 = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100,v3,vl);
|
||||
}
|
||||
|
@ -195,7 +195,7 @@ void f17 (void *base,void *out,size_t vl, int n)
|
|||
for (int i = 0; i < n; i++){
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 (base + 200*i, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wv_u8mf8(src,src2,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wv_u8mf8(src,src2,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
|
||||
asm volatile ("":::"memory");
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
|
@ -209,12 +209,12 @@ void f18 (void *base,void *out,size_t vl, int n)
|
|||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -224,12 +224,12 @@ void f19 (void *base,void *out,size_t vl, int n)
|
|||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 200*i,v2,vl);
|
||||
}
|
||||
|
@ -240,9 +240,9 @@ void f20 (void *base,void *out,size_t vl, int n)
|
|||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8mf8(src,v2,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 200*i,v2,vl);
|
||||
}
|
||||
|
@ -253,12 +253,12 @@ void f21 (void *base,void *out,size_t vl, int n)
|
|||
for (int i = 0; i < n; i++){
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((base + 1000 * i), vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
__riscv_vse16_v_u16mf4 (out + 200*i,src,vl);
|
||||
}
|
||||
|
@ -274,12 +274,12 @@ void f22 (uint16_t *base,uint8_t *out,size_t vl, int n)
|
|||
vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
|
||||
vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
|
||||
vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src1,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src2,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src3,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src4,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src5,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src6,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src1,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src2,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src3,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src4,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src5,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src6,v,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -295,12 +295,12 @@ void f23 (uint16_t *base,uint8_t *out,size_t vl, int n)
|
|||
vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
|
||||
vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
|
||||
vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src1,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src2,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src3,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src4,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src5,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src6,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src1,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src2,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src3,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src4,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src5,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8mf8_tu(v,src6,v2,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -312,9 +312,9 @@ void f24 (void *base,void *base2,void *out,size_t vl, int n)
|
|||
vuint8mf8_t src3 = __riscv_vle8_v_u8mf8 (base + 300, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
|
||||
vuint16mf4_t v = __riscv_vnclipu_wv_u16mf4_m(m,src,src2,vl);
|
||||
vuint16mf4_t v = __riscv_vnclipu_wv_u16mf4_m(m,src,src2,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
|
||||
vuint8mf8_t v3 = __riscv_vnclipu_wv_u8mf8_m(m,v2,src3,vl);
|
||||
vuint8mf8_t v3 = __riscv_vnclipu_wv_u8mf8_m(m,v2,src3,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
|
||||
}
|
||||
}
|
||||
|
@ -328,7 +328,7 @@ void f25 (void *base,void *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29", "v30");
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 (base + 100, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
|
@ -351,7 +351,7 @@ void f26 (void *base,void *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29", "v30");
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,vl);
|
||||
vuint8mf8_t v2 = __riscv_vnclipu_wv_u8mf8(src,v,0,vl);
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
|
|
|
@ -6,24 +6,24 @@
|
|||
void f0 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
}
|
||||
|
||||
void f1 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
}
|
||||
|
||||
void f2 (void *base,void *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out+100,src,vl);
|
||||
}
|
||||
|
@ -31,8 +31,8 @@ void f2 (void *base,void *out,size_t vl, size_t shift)
|
|||
void f3 (void *base,void *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out+100,src,vl);
|
||||
}
|
||||
|
@ -41,8 +41,8 @@ void f4 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tumu(m,v,src,v,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tumu(m,v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out+100,src,vl);
|
||||
}
|
||||
|
@ -51,8 +51,8 @@ void f5 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_m(m,src,v,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_m(m,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out+100,src,vl);
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ void f6 (void *base,void *out,size_t vl, size_t shift)
|
|||
vbool8_t m = __riscv_vlm_v_b8 (base + 500, vl);
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1_m(m,src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1_m(m,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v2,vl);
|
||||
__riscv_vse8_v_u8m1 (out+100,v,vl);
|
||||
}
|
||||
|
@ -71,8 +71,8 @@ void f7 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1 (src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1 (src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v2,vl);
|
||||
__riscv_vse8_v_u8m1 (out+100,v,vl);
|
||||
}
|
||||
|
@ -81,8 +81,8 @@ void f8 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1 (src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1 (src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v2,vl);
|
||||
__riscv_vse8_v_u8m1 (out+100,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out+200,src,vl);
|
||||
|
@ -92,8 +92,8 @@ void f9 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v2,vl);
|
||||
__riscv_vse8_v_u8m1 (out+100,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out+200,src,vl);
|
||||
|
@ -102,11 +102,11 @@ void f9 (void *base,void *out,size_t vl, size_t shift)
|
|||
void f10 (void *base,void *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out+100,src,vl);
|
||||
}
|
||||
|
@ -115,12 +115,12 @@ void f11 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1_tu (v2,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v2,vl);
|
||||
__riscv_vse8_v_u8m1 (out+100,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out+200,src,vl);
|
||||
|
@ -130,11 +130,11 @@ void f12 (void *base,void *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1 (src,v2,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1 (src,v2,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v2,vl);
|
||||
__riscv_vse8_v_u8m1 (out+100,v,vl);
|
||||
}
|
||||
|
@ -144,8 +144,8 @@ void f13 (void *base,void *base2,void *out,size_t vl, int n)
|
|||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,vl,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,vl,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
|
@ -157,7 +157,7 @@ void f14 (void *base,void *base2,void *out,size_t vl, int n)
|
|||
for (int i = 0; i < n; i++){
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v2,vl);
|
||||
|
@ -170,11 +170,11 @@ void f15 (void *base,void *base2,void *out,size_t vl, int n)
|
|||
for (int i = 0; i < n; i++){
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 600, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v2,vl);
|
||||
|
@ -185,7 +185,7 @@ void f16 (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vncvt_x_x_w_u8m1(src,vl);
|
||||
vuint8m1_t v3 = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v3 = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100,v3,vl);
|
||||
}
|
||||
|
@ -195,7 +195,7 @@ void f17 (void *base,void *out,size_t vl, int n)
|
|||
for (int i = 0; i < n; i++){
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
vuint8m1_t src2 = __riscv_vle8_v_u8m1 (base + 200*i, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wv_u8m1(src,src2,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wv_u8m1(src,src2,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
|
||||
asm volatile ("":::"memory");
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
|
@ -209,12 +209,12 @@ void f18 (void *base,void *out,size_t vl, int n)
|
|||
vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -224,12 +224,12 @@ void f19 (void *base,void *out,size_t vl, int n)
|
|||
vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 200*i,v2,vl);
|
||||
}
|
||||
|
@ -240,9 +240,9 @@ void f20 (void *base,void *out,size_t vl, int n)
|
|||
vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
v2 = __riscv_vnclipu_wv_u8m1(src,v2,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 200*i,v2,vl);
|
||||
}
|
||||
|
@ -253,12 +253,12 @@ void f21 (void *base,void *out,size_t vl, int n)
|
|||
for (int i = 0; i < n; i++){
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 ((base + 1000 * i), vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
__riscv_vse16_v_u16m2 (out + 200*i,src,vl);
|
||||
}
|
||||
|
@ -274,12 +274,12 @@ void f22 (uint16_t *base,uint8_t *out,size_t vl, int n)
|
|||
vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
|
||||
vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
|
||||
vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src1,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src2,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src3,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src4,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src5,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src6,v,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src1,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src2,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src3,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src4,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src5,v,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src6,v,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -295,12 +295,12 @@ void f23 (uint16_t *base,uint8_t *out,size_t vl, int n)
|
|||
vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
|
||||
vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
|
||||
vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src1,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src2,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src3,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src4,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src5,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src6,v2,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src1,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src2,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src3,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src4,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src5,v2,0,vl);
|
||||
v = __riscv_vnclipu_wv_u8m1_tu(v,src6,v2,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -312,9 +312,9 @@ void f24 (void *base,void *base2,void *out,size_t vl, int n)
|
|||
vuint8m1_t src3 = __riscv_vle8_v_u8m1 (base + 300, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
|
||||
vuint16m2_t v = __riscv_vnclipu_wv_u16m2_m(m,src,src2,vl);
|
||||
vuint16m2_t v = __riscv_vnclipu_wv_u16m2_m(m,src,src2,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
|
||||
vuint8m1_t v3 = __riscv_vnclipu_wv_u8m1_m(m,v2,src3,vl);
|
||||
vuint8m1_t v3 = __riscv_vnclipu_wv_u8m1_m(m,v2,src3,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
|
||||
}
|
||||
}
|
||||
|
@ -328,7 +328,7 @@ void f25 (void *base,void *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29");
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 (base + 100, vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
|
@ -351,7 +351,7 @@ void f26 (void *base,void *out,size_t vl, size_t shift)
|
|||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28");
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
|
@ -374,7 +374,7 @@ void f27 (void *base,void *out,size_t vl, size_t shift)
|
|||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28");
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,vl);
|
||||
vuint8m1_t v2 = __riscv_vnclipu_wv_u8m1(src,v,0,vl);
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
void f0 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
}
|
||||
|
||||
|
@ -14,14 +14,14 @@ void f1 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 100), vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_tu(src2,src,shift,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_tu(src2,src,shift,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
}
|
||||
|
||||
void f2 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
|
||||
|
@ -31,7 +31,7 @@ void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
{
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
__riscv_vse16_v_u16mf4 ((int16_t *)(out + 200*i),v2,vl);
|
||||
|
@ -41,9 +41,9 @@ void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
void f4 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
|
||||
|
@ -54,8 +54,8 @@ void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
|
@ -64,7 +64,7 @@ void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
void f6 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
}
|
||||
|
||||
|
@ -72,14 +72,14 @@ void f7 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t src2 = __riscv_vle8_v_u8m1 ((int8_t *)(base + 100), vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_tu(src2,src,shift,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_tu(src2,src,shift,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
}
|
||||
|
||||
void f8 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
|
||||
|
@ -89,7 +89,7 @@ void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
{
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
__riscv_vse16_v_u16m2 ((int16_t *)(out + 200*i),v2,vl);
|
||||
|
@ -99,9 +99,9 @@ void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
void f10 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
|
||||
|
@ -112,8 +112,8 @@ void f11 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
|
@ -124,12 +124,12 @@ void f12 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -139,12 +139,12 @@ void f13 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -154,12 +154,12 @@ void f14 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
for (int i = 0; i < n; i++){
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -169,12 +169,12 @@ void f15 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
for (int i = 0; i < n; i++){
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -189,12 +189,12 @@ void f16 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
|
||||
vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
|
||||
vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src1,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src2,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src3,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src4,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src5,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src6,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src1,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src2,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src3,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src4,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src5,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src6,shift,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -209,12 +209,12 @@ void f17 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
|
||||
vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
|
||||
vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src1,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src2,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src3,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src4,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src5,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src6,shift,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src1,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src2,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src3,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src4,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src5,shift,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src6,shift,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -224,9 +224,9 @@ void f18 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
|
||||
vuint16mf4_t v = __riscv_vnclipu_wx_u16mf4_m(m,src,shift,vl);
|
||||
vuint16mf4_t v = __riscv_vnclipu_wx_u16mf4_m(m,src,shift,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
|
||||
vuint8mf8_t v3 = __riscv_vnclipu_wx_u8mf8_m(m,v2,shift,vl);
|
||||
vuint8mf8_t v3 = __riscv_vnclipu_wx_u8mf8_m(m,v2,shift,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
|
||||
}
|
||||
}
|
||||
|
@ -236,10 +236,10 @@ void f19 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
|
||||
vuint16m2_t v = __riscv_vnclipu_wx_u16m2_m(m,src,shift,vl);
|
||||
vuint16m2_t v = __riscv_vnclipu_wx_u16m2_m(m,src,shift,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
|
||||
vuint8m1_t v3 = __riscv_vnclipu_wx_u8m1_m(m,v2,shift,vl);
|
||||
vuint8m1_t v4 = __riscv_vnclipu_wx_u8m1_tumu(m,v3,v2,shift,vl);
|
||||
vuint8m1_t v3 = __riscv_vnclipu_wx_u8m1_m(m,v2,shift,0,vl);
|
||||
vuint8m1_t v4 = __riscv_vnclipu_wx_u8m1_tumu(m,v3,v2,shift,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 222*i,v4,vl);
|
||||
}
|
||||
|
@ -255,7 +255,7 @@ void f20 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29");
|
||||
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
/* Only allow vncvt SRC == DEST v30. */
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
|
@ -276,7 +276,7 @@ void f21 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29", "v30");
|
||||
|
||||
vuint8mf2_t v = __riscv_vnclipu_wx_u8mf2(src,shift,vl);
|
||||
vuint8mf2_t v = __riscv_vnclipu_wx_u8mf2(src,shift,0,vl);
|
||||
/* Only allow vncvt SRC == DEST v31. */
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
|
@ -297,7 +297,7 @@ void f22 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29");
|
||||
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,0,vl);
|
||||
/* Only allow v29. */
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
void f0 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
}
|
||||
|
||||
|
@ -14,14 +14,14 @@ void f1 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 100), vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_tu(src2,src,31,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_tu(src2,src,31,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
}
|
||||
|
||||
void f2 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
|
||||
|
@ -31,7 +31,7 @@ void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
{
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
__riscv_vse16_v_u16mf4 ((int16_t *)(out + 200*i),v2,vl);
|
||||
|
@ -41,9 +41,9 @@ void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
void f4 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
|
||||
__riscv_vse8_v_u8mf8 (out,v,vl);
|
||||
__riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
|
||||
|
@ -54,8 +54,8 @@ void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
|
@ -64,7 +64,7 @@ void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
void f6 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
}
|
||||
|
||||
|
@ -72,14 +72,14 @@ void f7 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t src2 = __riscv_vle8_v_u8m1 ((int8_t *)(base + 100), vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_tu(src2,src,31,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_tu(src2,src,31,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
}
|
||||
|
||||
void f8 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
|
||||
|
@ -89,7 +89,7 @@ void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
{
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
__riscv_vse16_v_u16m2 ((int16_t *)(out + 200*i),v2,vl);
|
||||
|
@ -99,9 +99,9 @@ void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
void f10 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
__riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
|
||||
|
@ -112,8 +112,8 @@ void f11 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
|
@ -124,12 +124,12 @@ void f12 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -139,12 +139,12 @@ void f13 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000), vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -154,12 +154,12 @@ void f14 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
for (int i = 0; i < n; i++){
|
||||
vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
|
||||
vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src,31,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -169,12 +169,12 @@ void f15 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
for (int i = 0; i < n; i++){
|
||||
vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src,31,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -189,12 +189,12 @@ void f16 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
|
||||
vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
|
||||
vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src1,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src2,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src3,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src4,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src5,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src6,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src1,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src2,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src3,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src4,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src5,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8mf8_tu(v,src6,31,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -209,12 +209,12 @@ void f17 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
|
|||
vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
|
||||
vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
|
||||
vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src1,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src2,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src3,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src4,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src5,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src6,31,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src1,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src2,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src3,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src4,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src5,31,0,vl);
|
||||
v = __riscv_vnclipu_wx_u8m1_tu(v,src6,31,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v,vl);
|
||||
}
|
||||
}
|
||||
|
@ -224,9 +224,9 @@ void f18 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
|
||||
vuint16mf4_t v = __riscv_vnclipu_wx_u16mf4_m(m,src,31,vl);
|
||||
vuint16mf4_t v = __riscv_vnclipu_wx_u16mf4_m(m,src,31,0,vl);
|
||||
vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
|
||||
vuint8mf8_t v3 = __riscv_vnclipu_wx_u8mf8_m(m,v2,31,vl);
|
||||
vuint8mf8_t v3 = __riscv_vnclipu_wx_u8mf8_m(m,v2,31,0,vl);
|
||||
__riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
|
||||
}
|
||||
}
|
||||
|
@ -236,10 +236,10 @@ void f19 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
|
|||
vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
|
||||
for (int i = 0; i < n; i++){
|
||||
vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
|
||||
vuint16m2_t v = __riscv_vnclipu_wx_u16m2_m(m,src,31,vl);
|
||||
vuint16m2_t v = __riscv_vnclipu_wx_u16m2_m(m,src,31,0,vl);
|
||||
vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
|
||||
vuint8m1_t v3 = __riscv_vnclipu_wx_u8m1_m(m,v2,31,vl);
|
||||
vuint8m1_t v4 = __riscv_vnclipu_wx_u8m1_tumu(m,v3,v2,31,vl);
|
||||
vuint8m1_t v3 = __riscv_vnclipu_wx_u8m1_m(m,v2,31,0,vl);
|
||||
vuint8m1_t v4 = __riscv_vnclipu_wx_u8m1_tumu(m,v3,v2,31,0,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
|
||||
__riscv_vse8_v_u8m1 (out + 222*i,v4,vl);
|
||||
}
|
||||
|
@ -255,7 +255,7 @@ void f20 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29");
|
||||
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,0,vl);
|
||||
/* Only allow vncvt SRC == DEST v30. */
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
|
@ -276,7 +276,7 @@ void f21 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29", "v30");
|
||||
|
||||
vuint8mf2_t v = __riscv_vnclipu_wx_u8mf2(src,31,vl);
|
||||
vuint8mf2_t v = __riscv_vnclipu_wx_u8mf2(src,31,0,vl);
|
||||
/* Only allow vncvt SRC == DEST v31. */
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
|
@ -297,7 +297,7 @@ void f22 (int16_t *base,int8_t *out,size_t vl, size_t shift)
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
"v26", "v27", "v28", "v29");
|
||||
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,31,0,vl);
|
||||
/* Only allow v29. */
|
||||
asm volatile("#" ::
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
|
|
12
gcc/testsuite/gcc.target/riscv/rvv/base/vxrm-2.c
Normal file
12
gcc/testsuite/gcc.target/riscv/rvv/base/vxrm-2.c
Normal file
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
|
||||
|
||||
#include "riscv_vector.h"
|
||||
|
||||
void f (void * in, void *out, int32_t x)
|
||||
{
|
||||
vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
|
||||
vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
|
||||
vint32m1_t v3 = __riscv_vaadd_vx_i32m1 (v2, 0, x, 4); /* { dg-error {argument 3 of '__riscv_vaadd_vx_i32m1' must be an integer constant expression} } */
|
||||
__riscv_vse32_v_i32m1 (out, v3, 4);
|
||||
}
|
12
gcc/testsuite/gcc.target/riscv/rvv/base/vxrm-3.c
Normal file
12
gcc/testsuite/gcc.target/riscv/rvv/base/vxrm-3.c
Normal file
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
|
||||
|
||||
#include "riscv_vector.h"
|
||||
|
||||
void f (void * in, void *out, int32_t x)
|
||||
{
|
||||
vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
|
||||
vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
|
||||
vint32m1_t v3 = __riscv_vaadd_vx_i32m1 (v2, 0, 10, 4); /* { dg-error {passing 10 to argument 3 of '__riscv_vaadd_vx_i32m1', which expects a value in the range \[0, 3\]} } */
|
||||
__riscv_vse32_v_i32m1 (out, v3, 4);
|
||||
}
|
11
gcc/testsuite/gcc.target/riscv/rvv/base/vxrm-4.c
Normal file
11
gcc/testsuite/gcc.target/riscv/rvv/base/vxrm-4.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
|
||||
|
||||
#include "riscv_vector.h"
|
||||
|
||||
void f (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,shift,vl); /* { dg-error {argument 3 of '__riscv_vnclipu_wx_u8m1' must be an integer constant expression} } */
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
}
|
11
gcc/testsuite/gcc.target/riscv/rvv/base/vxrm-5.c
Normal file
11
gcc/testsuite/gcc.target/riscv/rvv/base/vxrm-5.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
|
||||
|
||||
#include "riscv_vector.h"
|
||||
|
||||
void f (uint16_t *base,uint8_t *out,size_t vl, size_t shift)
|
||||
{
|
||||
vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
|
||||
vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,10,vl); /* { dg-error {passing 10 to argument 3 of '__riscv_vnclipu_wx_u8m1', which expects a value in the range \[0, 3\]} } */
|
||||
__riscv_vse8_v_u8m1 (out,v,vl);
|
||||
}
|
Loading…
Add table
Reference in a new issue