RISC-V: Fix VSETVL PASS regression

This patch is regression fix patch, not an optimization patch.
Since trunk GCC generates redundant vsetvl than GCC-13.

This is the case:

bb 2:
  def a2 (vsetvl a2, zero)
bb 3:
  use a2
bb 4:
  use a2 (vle)

before this patch:

bb 2:
vsetvl a2 zero
bb 3:
vsetvl zero, zero ----> should be eliminated.
bb 4:
vle.v

The root cause is we didn't set bb 3 as transparent since the incorrect codes.
bb 3 didn't modify "a2" just use it, the VSETVL status from bb 2 can be available to bb 3 and bb 4:

bb 2 -> bb 3 -> bb4.

Another regression fix is anticipation calculation:

bb 4:
use a5 (sub)
use a5 (vle)

The vle VSETVL status should be considered as anticipated as long as both sub and vle a5 def are coming from same def.

Tested on zvl128b no regression.

I am going to test on zvl256/zvl512/zvl1024

	PR target/112713

gcc/ChangeLog:

	* config/riscv/riscv-vsetvl.cc (pre_vsetvl::compute_lcm_local_properties): Fix regression.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/vsetvl/pr112713-1.c: New test.
	* gcc.target/riscv/rvv/vsetvl/pr112713-2.c: New test.
This commit is contained in:
Juzhe-Zhong 2023-11-27 21:24:12 +08:00 committed by Pan Li
parent 5099525bff
commit 9c16ca9364
3 changed files with 91 additions and 9 deletions

View file

@ -1433,9 +1433,23 @@ private:
inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
{
return info.has_vl ()
&& (find_access (i->uses (), REGNO (info.get_vl ()))
|| find_access (i->defs (), REGNO (info.get_vl ())));
if (info.has_vl ())
{
if (find_access (i->defs (), REGNO (info.get_vl ())))
return true;
if (find_access (i->uses (), REGNO (info.get_vl ())))
{
resource_info resource = full_register (REGNO (info.get_vl ()));
def_lookup dl1 = crtl->ssa->find_def (resource, i);
def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
if (dl1.matching_set () || dl2.matching_set ())
return true;
/* If their VLs are coming from same def, we still want to fuse
their VSETVL demand info to gain better performance. */
return dl1.prev_def (i) != dl2.prev_def (i);
}
}
return false;
}
inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
{
@ -1702,7 +1716,7 @@ public:
for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
i = i->prev_nondebug_insn ())
{
// no def amd use of vl
// no def and use of vl
if (!ignore_vl && modify_or_use_vl_p (i, info))
return false;
@ -2635,11 +2649,8 @@ pre_vsetvl::compute_lcm_local_properties ()
for (const insn_info *insn : bb->real_nondebug_insns ())
{
if ((info.has_nonvlmax_reg_avl ()
&& find_access (insn->defs (), REGNO (info.get_avl ())))
|| (info.has_vl ()
&& find_access (insn->uses (),
REGNO (info.get_vl ()))))
if (info.has_nonvlmax_reg_avl ()
&& find_access (insn->defs (), REGNO (info.get_avl ())))
{
bitmap_clear_bit (m_transp[bb_index], i);
break;

View file

@ -0,0 +1,24 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
#include "riscv_vector.h"
size_t
foo (char const *buf, size_t len)
{
size_t sum = 0;
size_t vl = __riscv_vsetvlmax_e8m8();
size_t step = vl * 4;
const char *it = buf, *end = buf + len;
for(; it + step <= end; ) {
it += vl;
vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
sum += __riscv_vcpop_m_b1(m3, vl);
}
return sum;
}
/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
/* { dg-final { scan-assembler-not {vsetivli} } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */

View file

@ -0,0 +1,47 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
#include "riscv_vector.h"
static size_t
utf8_count_rvv(char const *buf, size_t len)
{
size_t sum = 0;
for (size_t vl; len > 0; len -= vl, buf += vl) {
vl = __riscv_vsetvl_e8m8(len);
vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl);
vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl);
sum += __riscv_vcpop_m_b1(mask, vl);
}
return sum;
}
size_t
utf8_count_rvv_4x_tail(char const *buf, size_t len)
{
size_t sum = 0;
size_t vl = __riscv_vsetvlmax_e8m8();
size_t step = vl * 4;
const char *it = buf, *end = buf + len;
for(; it + step <= end; ) {
vint8m8_t v0 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
vint8m8_t v1 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
vint8m8_t v2 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
vbool1_t m0 = __riscv_vmsgt_vx_i8m8_b1(v0, -65, vl);
vbool1_t m1 = __riscv_vmsgt_vx_i8m8_b1(v1, -65, vl);
vbool1_t m2 = __riscv_vmsgt_vx_i8m8_b1(v2, -65, vl);
vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
sum += __riscv_vcpop_m_b1(m0, vl);
sum += __riscv_vcpop_m_b1(m1, vl);
sum += __riscv_vcpop_m_b1(m2, vl);
sum += __riscv_vcpop_m_b1(m3, vl);
}
return sum + utf8_count_rvv(it, end - it);
}
/* { dg-final { scan-assembler-times {vsetvli} 2 } } */
/* { dg-final { scan-assembler-not {vsetivli} } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */