re PR tree-optimization/92645 (Hand written vector code is 450 times slower when compiled with GCC compared to Clang)
2019-12-03 Richard Biener <rguenther@suse.de> PR tree-optimization/92645 * gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy from or to a properly aligned register variable. * gcc.target/i386/pr92645-5.c: New testcase. From-SVN: r278934
This commit is contained in:
parent
8f316505da
commit
5105b576df
4 changed files with 51 additions and 22 deletions
|
@ -1,3 +1,9 @@
|
||||||
|
2019-12-03 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/92645
|
||||||
|
* gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy
|
||||||
|
from or to a properly aligned register variable.
|
||||||
|
|
||||||
2019-12-03 Matthias Klose <doko@ubuntu.com>
|
2019-12-03 Matthias Klose <doko@ubuntu.com>
|
||||||
|
|
||||||
* Makefile.in (SOURCES): Add doc/lto-dump.1.
|
* Makefile.in (SOURCES): Add doc/lto-dump.1.
|
||||||
|
|
|
@ -986,36 +986,33 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
|
||||||
|
|
||||||
src_align = get_pointer_alignment (src);
|
src_align = get_pointer_alignment (src);
|
||||||
dest_align = get_pointer_alignment (dest);
|
dest_align = get_pointer_alignment (dest);
|
||||||
if (dest_align < TYPE_ALIGN (desttype)
|
|
||||||
|| src_align < TYPE_ALIGN (srctype))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
|
/* Choose between src and destination type for the access based
|
||||||
|
on alignment, whether the access constitutes a register access
|
||||||
|
and whether it may actually expose a declaration for SSA rewrite
|
||||||
|
or SRA decomposition. */
|
||||||
destvar = NULL_TREE;
|
destvar = NULL_TREE;
|
||||||
|
srcvar = NULL_TREE;
|
||||||
if (TREE_CODE (dest) == ADDR_EXPR
|
if (TREE_CODE (dest) == ADDR_EXPR
|
||||||
&& var_decl_component_p (TREE_OPERAND (dest, 0))
|
&& var_decl_component_p (TREE_OPERAND (dest, 0))
|
||||||
&& tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
|
&& tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len)
|
||||||
|
&& dest_align >= TYPE_ALIGN (desttype)
|
||||||
|
&& (is_gimple_reg_type (desttype)
|
||||||
|
|| src_align >= TYPE_ALIGN (desttype)))
|
||||||
destvar = fold_build2 (MEM_REF, desttype, dest, off0);
|
destvar = fold_build2 (MEM_REF, desttype, dest, off0);
|
||||||
|
else if (TREE_CODE (src) == ADDR_EXPR
|
||||||
srcvar = NULL_TREE;
|
&& var_decl_component_p (TREE_OPERAND (src, 0))
|
||||||
if (TREE_CODE (src) == ADDR_EXPR
|
&& tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
|
||||||
&& var_decl_component_p (TREE_OPERAND (src, 0))
|
&& src_align >= TYPE_ALIGN (srctype)
|
||||||
&& tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
|
&& (is_gimple_reg_type (srctype)
|
||||||
{
|
|| dest_align >= TYPE_ALIGN (srctype)))
|
||||||
if (!destvar
|
srcvar = fold_build2 (MEM_REF, srctype, src, off0);
|
||||||
|| src_align >= TYPE_ALIGN (desttype))
|
|
||||||
srcvar = fold_build2 (MEM_REF, destvar ? desttype : srctype,
|
|
||||||
src, off0);
|
|
||||||
else if (!STRICT_ALIGNMENT)
|
|
||||||
{
|
|
||||||
srctype = build_aligned_type (TYPE_MAIN_VARIANT (desttype),
|
|
||||||
src_align);
|
|
||||||
srcvar = fold_build2 (MEM_REF, srctype, src, off0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (srcvar == NULL_TREE && destvar == NULL_TREE)
|
if (srcvar == NULL_TREE && destvar == NULL_TREE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* Now that we chose an access type express the other side in
|
||||||
|
terms of it if the target allows that with respect to alignment
|
||||||
|
constraints. */
|
||||||
if (srcvar == NULL_TREE)
|
if (srcvar == NULL_TREE)
|
||||||
{
|
{
|
||||||
if (src_align >= TYPE_ALIGN (desttype))
|
if (src_align >= TYPE_ALIGN (desttype))
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
|
2019-12-03 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/92645
|
||||||
|
* gcc.target/i386/pr92645-5.c: New testcase.
|
||||||
|
|
||||||
2019-12-03 Richard Biener <rguenther@suse.de>
|
2019-12-03 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR tree-optimization/92751
|
PR tree-optimization/92751
|
||||||
|
|
21
gcc/testsuite/gcc.target/i386/pr92645-5.c
Normal file
21
gcc/testsuite/gcc.target/i386/pr92645-5.c
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O -fdump-tree-cddce1 -mavx2 -Wno-psabi" } */
|
||||||
|
typedef long long v4di __attribute__((vector_size(32)));
|
||||||
|
struct Vec
|
||||||
|
{
|
||||||
|
unsigned int v[8];
|
||||||
|
};
|
||||||
|
|
||||||
|
v4di pun (struct Vec *s)
|
||||||
|
{
|
||||||
|
v4di tem;
|
||||||
|
__builtin_memcpy (&tem, s, 32);
|
||||||
|
return tem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We're expecting exactly two stmts, in particular no BIT_INSERT_EXPR
|
||||||
|
and no memcpy call.
|
||||||
|
_3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)];
|
||||||
|
return _3; */
|
||||||
|
/* { dg-final { scan-tree-dump-times " = MEM" 1 "cddce1" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-not "memcpy" "cddce1" } } */
|
Loading…
Add table
Reference in a new issue