re PR tree-optimization/92645 (Hand written vector code is 450 times slower when compiled with GCC compared to Clang)
2019-12-03 Richard Biener <rguenther@suse.de> PR tree-optimization/92645 * gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy from or to a properly aligned register variable. * gcc.target/i386/pr92645-5.c: New testcase. From-SVN: r278934
This commit is contained in:
parent
8f316505da
commit
5105b576df
4 changed files with 51 additions and 22 deletions
|
@ -1,3 +1,9 @@
|
|||
2019-12-03 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92645
|
||||
* gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy
|
||||
from or to a properly aligned register variable.
|
||||
|
||||
2019-12-03 Matthias Klose <doko@ubuntu.com>
|
||||
|
||||
* Makefile.in (SOURCES): Add doc/lto-dump.1.
|
||||
|
|
|
@ -986,36 +986,33 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
|
|||
|
||||
src_align = get_pointer_alignment (src);
|
||||
dest_align = get_pointer_alignment (dest);
|
||||
if (dest_align < TYPE_ALIGN (desttype)
|
||||
|| src_align < TYPE_ALIGN (srctype))
|
||||
return false;
|
||||
|
||||
/* Choose between src and destination type for the access based
|
||||
on alignment, whether the access constitutes a register access
|
||||
and whether it may actually expose a declaration for SSA rewrite
|
||||
or SRA decomposition. */
|
||||
destvar = NULL_TREE;
|
||||
srcvar = NULL_TREE;
|
||||
if (TREE_CODE (dest) == ADDR_EXPR
|
||||
&& var_decl_component_p (TREE_OPERAND (dest, 0))
|
||||
&& tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
|
||||
&& tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len)
|
||||
&& dest_align >= TYPE_ALIGN (desttype)
|
||||
&& (is_gimple_reg_type (desttype)
|
||||
|| src_align >= TYPE_ALIGN (desttype)))
|
||||
destvar = fold_build2 (MEM_REF, desttype, dest, off0);
|
||||
|
||||
srcvar = NULL_TREE;
|
||||
if (TREE_CODE (src) == ADDR_EXPR
|
||||
&& var_decl_component_p (TREE_OPERAND (src, 0))
|
||||
&& tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
|
||||
{
|
||||
if (!destvar
|
||||
|| src_align >= TYPE_ALIGN (desttype))
|
||||
srcvar = fold_build2 (MEM_REF, destvar ? desttype : srctype,
|
||||
src, off0);
|
||||
else if (!STRICT_ALIGNMENT)
|
||||
{
|
||||
srctype = build_aligned_type (TYPE_MAIN_VARIANT (desttype),
|
||||
src_align);
|
||||
srcvar = fold_build2 (MEM_REF, srctype, src, off0);
|
||||
}
|
||||
}
|
||||
|
||||
else if (TREE_CODE (src) == ADDR_EXPR
|
||||
&& var_decl_component_p (TREE_OPERAND (src, 0))
|
||||
&& tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
|
||||
&& src_align >= TYPE_ALIGN (srctype)
|
||||
&& (is_gimple_reg_type (srctype)
|
||||
|| dest_align >= TYPE_ALIGN (srctype)))
|
||||
srcvar = fold_build2 (MEM_REF, srctype, src, off0);
|
||||
if (srcvar == NULL_TREE && destvar == NULL_TREE)
|
||||
return false;
|
||||
|
||||
/* Now that we chose an access type express the other side in
|
||||
terms of it if the target allows that with respect to alignment
|
||||
constraints. */
|
||||
if (srcvar == NULL_TREE)
|
||||
{
|
||||
if (src_align >= TYPE_ALIGN (desttype))
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2019-12-03 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92645
|
||||
* gcc.target/i386/pr92645-5.c: New testcase.
|
||||
|
||||
2019-12-03 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92751
|
||||
|
|
21
gcc/testsuite/gcc.target/i386/pr92645-5.c
Normal file
21
gcc/testsuite/gcc.target/i386/pr92645-5.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fdump-tree-cddce1 -mavx2 -Wno-psabi" } */
|
||||
typedef long long v4di __attribute__((vector_size(32)));
|
||||
struct Vec
|
||||
{
|
||||
unsigned int v[8];
|
||||
};
|
||||
|
||||
v4di pun (struct Vec *s)
|
||||
{
|
||||
v4di tem;
|
||||
__builtin_memcpy (&tem, s, 32);
|
||||
return tem;
|
||||
}
|
||||
|
||||
/* We're expecting exactly two stmts, in particular no BIT_INSERT_EXPR
|
||||
and no memcpy call.
|
||||
_3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)];
|
||||
return _3; */
|
||||
/* { dg-final { scan-tree-dump-times " = MEM" 1 "cddce1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "memcpy" "cddce1" } } */
|
Loading…
Add table
Reference in a new issue