[AArch64 costs 12/18] Improve costs for sign/zero extracts

gcc/

	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for
	SIGN/ZERO_EXTRACT.


Co-Authored-By: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>

From-SVN: r210504
This commit is contained in:
James Greenhalgh 2014-05-16 09:07:34 +00:00 committed by James Greenhalgh
parent ba0cfa1700
commit 7cc2145f54
2 changed files with 67 additions and 2 deletions

View file

@ -1,3 +1,9 @@
2014-03-16 James Greenhalgh <james.greenhalgh@arm.com>
Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
* config/aarch64/aarch64.c (aarch64_rtx_arith_op_extract_p): New.
(aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT.
2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
Philipp Tomsich <philipp.tomsich@theobroma-systems.com>

View file

@ -4833,6 +4833,35 @@ aarch64_address_cost (rtx x,
return cost;
}
/* Return true if the RTX X in mode MODE is a zero or sign extract
usable in an ADD or SUB (extended register) instruction. */
static bool
aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
{
/* Catch add with a sign extract.
This is add_<optab><mode>_multp2. */
if (GET_CODE (x) == SIGN_EXTRACT
|| GET_CODE (x) == ZERO_EXTRACT)
{
rtx op0 = XEXP (x, 0);
rtx op1 = XEXP (x, 1);
rtx op2 = XEXP (x, 2);
if (GET_CODE (op0) == MULT
&& CONST_INT_P (op1)
&& op2 == const0_rtx
&& CONST_INT_P (XEXP (op0, 1))
&& aarch64_is_extend_from_extract (mode,
XEXP (op0, 1),
op1))
{
return true;
}
}
return false;
}
/* Calculate the cost of calculating X, storing it in *COST. Result
is true if the total cost of the operation has now been calculated. */
static bool
@ -5097,6 +5126,18 @@ cost_minus:
}
/* Look for SUB (extended register). */
if (aarch64_rtx_arith_op_extract_p (op1, mode))
{
if (speed)
*cost += extra_cost->alu.arith_shift;
*cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
(enum rtx_code) GET_CODE (op1),
0, speed);
return true;
}
rtx new_op1 = aarch64_strip_extend (op1);
/* Cost this as an FMA-alike operation. */
@ -5153,6 +5194,18 @@ cost_minus:
return true;
}
/* Look for ADD (extended register). */
if (aarch64_rtx_arith_op_extract_p (op0, mode))
{
if (speed)
*cost += extra_cost->alu.arith_shift;
*cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
(enum rtx_code) GET_CODE (op0),
0, speed);
return true;
}
/* Strip any extend, leave shifts behind as we will
cost them through mult_cost. */
new_op0 = aarch64_strip_extend (op0);
@ -5406,7 +5459,13 @@ cost_minus:
case ZERO_EXTRACT:
case SIGN_EXTRACT:
*cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
/* UBFX/SBFX. */
if (speed)
*cost += extra_cost->alu.bfx;
/* We can trust that the immediates used will be correct (there
are no by-register forms), so we need only cost op0. */
*cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
return true;
case MULT:
@ -9112,7 +9171,7 @@ aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS aarch64_rtx_costs
#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate