Add -mveclibabi=mass
From-SVN: r163470
This commit is contained in:
parent
a4a7e2f5f7
commit
8bcc030417
5 changed files with 191 additions and 1 deletions
|
@ -1,3 +1,16 @@
|
|||
2010-08-23 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000.opt (-mmass): New option to enable the
|
||||
compiler to autovectorize mathmetical functions for power7 using
|
||||
the Mathematical Acceleration Subsystem library.
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_builtin_vectorized_libmass): New
|
||||
function to handle auto vectorizing math functions that are in the
|
||||
MASS library.
|
||||
(rs6000_builtin_vectorized_function): Call it.
|
||||
|
||||
* doc/invoke.texi (RS/6000 and PowerPC Options): Document -mmass.
|
||||
|
||||
2010-08-22 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
|
||||
|
||||
PR boehm-gc/34544
|
||||
|
|
|
@ -989,6 +989,7 @@ static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool);
|
|||
static rtx rs6000_make_savres_rtx (rs6000_stack_t *, rtx, int,
|
||||
enum machine_mode, bool, bool, bool);
|
||||
static bool rs6000_reg_live_or_pic_offset_p (int);
|
||||
static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
|
||||
static tree rs6000_builtin_vectorized_function (tree, tree, tree);
|
||||
static int rs6000_savres_strategy (rs6000_stack_t *, bool, int, int);
|
||||
static void rs6000_restore_saved_cr (rtx, int);
|
||||
|
@ -3602,6 +3603,145 @@ rs6000_parse_fpu_option (const char *option)
|
|||
return FPU_NONE;
|
||||
}
|
||||
|
||||
|
||||
/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
|
||||
library with vectorized intrinsics. */
|
||||
|
||||
static tree
|
||||
rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
|
||||
{
|
||||
char name[32];
|
||||
const char *suffix = NULL;
|
||||
tree fntype, new_fndecl, bdecl = NULL_TREE;
|
||||
int n_args = 1;
|
||||
const char *bname;
|
||||
enum machine_mode el_mode, in_mode;
|
||||
int n, in_n;
|
||||
|
||||
/* Libmass is suitable for unsafe math only as it does not correctly support
|
||||
parts of IEEE with the required precision such as denormals. Only support
|
||||
it if we have VSX to use the simd d2 or f4 functions.
|
||||
XXX: Add variable length support. */
|
||||
if (!flag_unsafe_math_optimizations || !TARGET_VSX)
|
||||
return NULL_TREE;
|
||||
|
||||
el_mode = TYPE_MODE (TREE_TYPE (type_out));
|
||||
n = TYPE_VECTOR_SUBPARTS (type_out);
|
||||
in_mode = TYPE_MODE (TREE_TYPE (type_in));
|
||||
in_n = TYPE_VECTOR_SUBPARTS (type_in);
|
||||
if (el_mode != in_mode
|
||||
|| n != in_n)
|
||||
return NULL_TREE;
|
||||
|
||||
if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
|
||||
{
|
||||
enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
|
||||
switch (fn)
|
||||
{
|
||||
case BUILT_IN_ATAN2:
|
||||
case BUILT_IN_HYPOT:
|
||||
case BUILT_IN_POW:
|
||||
n_args = 2;
|
||||
/* fall through */
|
||||
|
||||
case BUILT_IN_ACOS:
|
||||
case BUILT_IN_ACOSH:
|
||||
case BUILT_IN_ASIN:
|
||||
case BUILT_IN_ASINH:
|
||||
case BUILT_IN_ATAN:
|
||||
case BUILT_IN_ATANH:
|
||||
case BUILT_IN_CBRT:
|
||||
case BUILT_IN_COS:
|
||||
case BUILT_IN_COSH:
|
||||
case BUILT_IN_ERF:
|
||||
case BUILT_IN_ERFC:
|
||||
case BUILT_IN_EXP2:
|
||||
case BUILT_IN_EXP:
|
||||
case BUILT_IN_EXPM1:
|
||||
case BUILT_IN_LGAMMA:
|
||||
case BUILT_IN_LOG10:
|
||||
case BUILT_IN_LOG1P:
|
||||
case BUILT_IN_LOG2:
|
||||
case BUILT_IN_LOG:
|
||||
case BUILT_IN_SIN:
|
||||
case BUILT_IN_SINH:
|
||||
case BUILT_IN_SQRT:
|
||||
case BUILT_IN_TAN:
|
||||
case BUILT_IN_TANH:
|
||||
bdecl = implicit_built_in_decls[fn];
|
||||
suffix = "d2"; /* pow -> powd2 */
|
||||
if (el_mode != DFmode
|
||||
|| n != 2)
|
||||
return NULL_TREE;
|
||||
break;
|
||||
|
||||
case BUILT_IN_ATAN2F:
|
||||
case BUILT_IN_HYPOTF:
|
||||
case BUILT_IN_POWF:
|
||||
n_args = 2;
|
||||
/* fall through */
|
||||
|
||||
case BUILT_IN_ACOSF:
|
||||
case BUILT_IN_ACOSHF:
|
||||
case BUILT_IN_ASINF:
|
||||
case BUILT_IN_ASINHF:
|
||||
case BUILT_IN_ATANF:
|
||||
case BUILT_IN_ATANHF:
|
||||
case BUILT_IN_CBRTF:
|
||||
case BUILT_IN_COSF:
|
||||
case BUILT_IN_COSHF:
|
||||
case BUILT_IN_ERFF:
|
||||
case BUILT_IN_ERFCF:
|
||||
case BUILT_IN_EXP2F:
|
||||
case BUILT_IN_EXPF:
|
||||
case BUILT_IN_EXPM1F:
|
||||
case BUILT_IN_LGAMMAF:
|
||||
case BUILT_IN_LOG10F:
|
||||
case BUILT_IN_LOG1PF:
|
||||
case BUILT_IN_LOG2F:
|
||||
case BUILT_IN_LOGF:
|
||||
case BUILT_IN_SINF:
|
||||
case BUILT_IN_SINHF:
|
||||
case BUILT_IN_SQRTF:
|
||||
case BUILT_IN_TANF:
|
||||
case BUILT_IN_TANHF:
|
||||
bdecl = implicit_built_in_decls[fn];
|
||||
suffix = "4"; /* powf -> powf4 */
|
||||
if (el_mode != SFmode
|
||||
|| n != 4)
|
||||
return NULL_TREE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
}
|
||||
else
|
||||
return NULL_TREE;
|
||||
|
||||
gcc_assert (suffix != NULL);
|
||||
bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
|
||||
strcpy (name, bname + sizeof ("__builtin_") - 1);
|
||||
strcat (name, suffix);
|
||||
|
||||
if (n_args == 1)
|
||||
fntype = build_function_type_list (type_out, type_in, NULL);
|
||||
else if (n_args == 2)
|
||||
fntype = build_function_type_list (type_out, type_in, type_in, NULL);
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
/* Build a function declaration for the vectorized function. */
|
||||
new_fndecl = build_decl (BUILTINS_LOCATION,
|
||||
FUNCTION_DECL, get_identifier (name), fntype);
|
||||
TREE_PUBLIC (new_fndecl) = 1;
|
||||
DECL_EXTERNAL (new_fndecl) = 1;
|
||||
DECL_IS_NOVOPS (new_fndecl) = 1;
|
||||
TREE_READONLY (new_fndecl) = 1;
|
||||
|
||||
return new_fndecl;
|
||||
}
|
||||
|
||||
/* Returns a function decl for a vectorized version of the builtin function
|
||||
with builtin function code FN and the result vector type TYPE, or NULL_TREE
|
||||
if it is not available. */
|
||||
|
@ -3768,6 +3908,10 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
|
|||
}
|
||||
}
|
||||
|
||||
/* Generate calls to libmass if appropriate. */
|
||||
if (TARGET_MASS)
|
||||
return rs6000_builtin_vectorized_libmass (fndecl, type_out, type_in);
|
||||
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
|
|
|
@ -115,6 +115,10 @@ mpopcntd
|
|||
Target Report Mask(POPCNTD)
|
||||
Use PowerPC V2.06 popcntd instruction
|
||||
|
||||
mmass
|
||||
Target Report Var(TARGET_MASS) Init(0)
|
||||
Use the Mathematical Acceleration Subsystem library high performance math libraries.
|
||||
|
||||
mvsx
|
||||
Target Report Mask(VSX)
|
||||
Use vector/scalar (VSX) instructions
|
||||
|
|
|
@ -786,7 +786,9 @@ See RS/6000 and PowerPC Options.
|
|||
-mprototype -mno-prototype @gol
|
||||
-msim -mmvme -mads -myellowknife -memb -msdata @gol
|
||||
-msdata=@var{opt} -mvxworks -G @var{num} -pthread @gol
|
||||
-mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision -mno-recip-precision}
|
||||
-mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision
|
||||
-mno-recip-precision @gol
|
||||
-mmass}
|
||||
|
||||
@emph{RX Options}
|
||||
@gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol
|
||||
|
@ -15847,6 +15849,29 @@ automatically selects @option{-mrecip-precision}. The double
|
|||
precision square root estimate instructions are not generated by
|
||||
default on low precision machines, since they do not provide an
|
||||
estimate that converges after three steps.
|
||||
|
||||
@item -mmass
|
||||
@itemx -mno-mass
|
||||
@opindex mmass
|
||||
Specifies to use IBM's Mathematical Acceleration Subsystem (MASS)
|
||||
libraries for vectorizing intrinsics using external libraries. GCC
|
||||
will currently emit calls to @code{acosd2}, @code{acosf4},
|
||||
@code{acoshd2}, @code{acoshf4}, @code{asind2}, @code{asinf4},
|
||||
@code{asinhd2}, @code{asinhf4}, @code{atan2d2}, @code{atan2f4},
|
||||
@code{atand2}, @code{atanf4}, @code{atanhd2}, @code{atanhf4},
|
||||
@code{cbrtd2}, @code{cbrtf4}, @code{cosd2}, @code{cosf4},
|
||||
@code{coshd2}, @code{coshf4}, @code{erfcd2}, @code{erfcf4},
|
||||
@code{erfd2}, @code{erff4}, @code{exp2d2}, @code{exp2f4},
|
||||
@code{expd2}, @code{expf4}, @code{expm1d2}, @code{expm1f4},
|
||||
@code{hypotd2}, @code{hypotf4}, @code{lgammad2}, @code{lgammaf4},
|
||||
@code{log10d2}, @code{log10f4}, @code{log1pd2}, @code{log1pf4},
|
||||
@code{log2d2}, @code{log2f4}, @code{logd2}, @code{logf4},
|
||||
@code{powd2}, @code{powf4}, @code{sind2}, @code{sinf4}, @code{sinhd2},
|
||||
@code{sinhf4}, @code{sqrtd2}, @code{sqrtf4}, @code{tand2},
|
||||
@code{tanf4}, @code{tanhd2}, and @code{tanhf4} when generating code
|
||||
for power7. Both @option{-ftree-vectorize} and
|
||||
@option{-funsafe-math-optimizations} have to be enabled. The MASS
|
||||
libraries will have to be specified at link time.
|
||||
@end table
|
||||
|
||||
@node RX Options
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2010-08-23 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/vsx-mass-1.c: New file, test -mmass.
|
||||
|
||||
2010-08-23 Janus Weil <janus@gcc.gnu.org>
|
||||
|
||||
PR fortran/45366
|
||||
|
|
Loading…
Add table
Reference in a new issue