Add -mveclibabi=mass

From-SVN: r163470
This commit is contained in:
Michael Meissner 2010-08-23 16:41:10 +00:00 committed by Michael Meissner
parent a4a7e2f5f7
commit 8bcc030417
5 changed files with 191 additions and 1 deletions

View file

@ -1,3 +1,16 @@
2010-08-23 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/rs6000.opt (-mmass): New option to enable the
compiler to autovectorize mathmetical functions for power7 using
the Mathematical Acceleration Subsystem library.
* config/rs6000/rs6000.c (rs6000_builtin_vectorized_libmass): New
function to handle auto vectorizing math functions that are in the
MASS library.
(rs6000_builtin_vectorized_function): Call it.
* doc/invoke.texi (RS/6000 and PowerPC Options): Document -mmass.
2010-08-22 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
PR boehm-gc/34544

View file

@ -989,6 +989,7 @@ static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool);
static rtx rs6000_make_savres_rtx (rs6000_stack_t *, rtx, int,
enum machine_mode, bool, bool, bool);
static bool rs6000_reg_live_or_pic_offset_p (int);
static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
static tree rs6000_builtin_vectorized_function (tree, tree, tree);
static int rs6000_savres_strategy (rs6000_stack_t *, bool, int, int);
static void rs6000_restore_saved_cr (rtx, int);
@ -3602,6 +3603,145 @@ rs6000_parse_fpu_option (const char *option)
return FPU_NONE;
}
/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
library with vectorized intrinsics. */
static tree
rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
{
char name[32];
const char *suffix = NULL;
tree fntype, new_fndecl, bdecl = NULL_TREE;
int n_args = 1;
const char *bname;
enum machine_mode el_mode, in_mode;
int n, in_n;
/* Libmass is suitable for unsafe math only as it does not correctly support
parts of IEEE with the required precision such as denormals. Only support
it if we have VSX to use the simd d2 or f4 functions.
XXX: Add variable length support. */
if (!flag_unsafe_math_optimizations || !TARGET_VSX)
return NULL_TREE;
el_mode = TYPE_MODE (TREE_TYPE (type_out));
n = TYPE_VECTOR_SUBPARTS (type_out);
in_mode = TYPE_MODE (TREE_TYPE (type_in));
in_n = TYPE_VECTOR_SUBPARTS (type_in);
if (el_mode != in_mode
|| n != in_n)
return NULL_TREE;
if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
{
enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
switch (fn)
{
case BUILT_IN_ATAN2:
case BUILT_IN_HYPOT:
case BUILT_IN_POW:
n_args = 2;
/* fall through */
case BUILT_IN_ACOS:
case BUILT_IN_ACOSH:
case BUILT_IN_ASIN:
case BUILT_IN_ASINH:
case BUILT_IN_ATAN:
case BUILT_IN_ATANH:
case BUILT_IN_CBRT:
case BUILT_IN_COS:
case BUILT_IN_COSH:
case BUILT_IN_ERF:
case BUILT_IN_ERFC:
case BUILT_IN_EXP2:
case BUILT_IN_EXP:
case BUILT_IN_EXPM1:
case BUILT_IN_LGAMMA:
case BUILT_IN_LOG10:
case BUILT_IN_LOG1P:
case BUILT_IN_LOG2:
case BUILT_IN_LOG:
case BUILT_IN_SIN:
case BUILT_IN_SINH:
case BUILT_IN_SQRT:
case BUILT_IN_TAN:
case BUILT_IN_TANH:
bdecl = implicit_built_in_decls[fn];
suffix = "d2"; /* pow -> powd2 */
if (el_mode != DFmode
|| n != 2)
return NULL_TREE;
break;
case BUILT_IN_ATAN2F:
case BUILT_IN_HYPOTF:
case BUILT_IN_POWF:
n_args = 2;
/* fall through */
case BUILT_IN_ACOSF:
case BUILT_IN_ACOSHF:
case BUILT_IN_ASINF:
case BUILT_IN_ASINHF:
case BUILT_IN_ATANF:
case BUILT_IN_ATANHF:
case BUILT_IN_CBRTF:
case BUILT_IN_COSF:
case BUILT_IN_COSHF:
case BUILT_IN_ERFF:
case BUILT_IN_ERFCF:
case BUILT_IN_EXP2F:
case BUILT_IN_EXPF:
case BUILT_IN_EXPM1F:
case BUILT_IN_LGAMMAF:
case BUILT_IN_LOG10F:
case BUILT_IN_LOG1PF:
case BUILT_IN_LOG2F:
case BUILT_IN_LOGF:
case BUILT_IN_SINF:
case BUILT_IN_SINHF:
case BUILT_IN_SQRTF:
case BUILT_IN_TANF:
case BUILT_IN_TANHF:
bdecl = implicit_built_in_decls[fn];
suffix = "4"; /* powf -> powf4 */
if (el_mode != SFmode
|| n != 4)
return NULL_TREE;
break;
default:
return NULL_TREE;
}
}
else
return NULL_TREE;
gcc_assert (suffix != NULL);
bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
strcpy (name, bname + sizeof ("__builtin_") - 1);
strcat (name, suffix);
if (n_args == 1)
fntype = build_function_type_list (type_out, type_in, NULL);
else if (n_args == 2)
fntype = build_function_type_list (type_out, type_in, type_in, NULL);
else
gcc_unreachable ();
/* Build a function declaration for the vectorized function. */
new_fndecl = build_decl (BUILTINS_LOCATION,
FUNCTION_DECL, get_identifier (name), fntype);
TREE_PUBLIC (new_fndecl) = 1;
DECL_EXTERNAL (new_fndecl) = 1;
DECL_IS_NOVOPS (new_fndecl) = 1;
TREE_READONLY (new_fndecl) = 1;
return new_fndecl;
}
/* Returns a function decl for a vectorized version of the builtin function
with builtin function code FN and the result vector type TYPE, or NULL_TREE
if it is not available. */
@ -3768,6 +3908,10 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
}
}
/* Generate calls to libmass if appropriate. */
if (TARGET_MASS)
return rs6000_builtin_vectorized_libmass (fndecl, type_out, type_in);
return NULL_TREE;
}

View file

@ -115,6 +115,10 @@ mpopcntd
Target Report Mask(POPCNTD)
Use PowerPC V2.06 popcntd instruction
mmass
Target Report Var(TARGET_MASS) Init(0)
Use the Mathematical Acceleration Subsystem library high performance math libraries.
mvsx
Target Report Mask(VSX)
Use vector/scalar (VSX) instructions

View file

@ -786,7 +786,9 @@ See RS/6000 and PowerPC Options.
-mprototype -mno-prototype @gol
-msim -mmvme -mads -myellowknife -memb -msdata @gol
-msdata=@var{opt} -mvxworks -G @var{num} -pthread @gol
-mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision -mno-recip-precision}
-mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision
-mno-recip-precision @gol
-mmass}
@emph{RX Options}
@gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol
@ -15847,6 +15849,29 @@ automatically selects @option{-mrecip-precision}. The double
precision square root estimate instructions are not generated by
default on low precision machines, since they do not provide an
estimate that converges after three steps.
@item -mmass
@itemx -mno-mass
@opindex mmass
Specifies to use IBM's Mathematical Acceleration Subsystem (MASS)
libraries for vectorizing intrinsics using external libraries. GCC
will currently emit calls to @code{acosd2}, @code{acosf4},
@code{acoshd2}, @code{acoshf4}, @code{asind2}, @code{asinf4},
@code{asinhd2}, @code{asinhf4}, @code{atan2d2}, @code{atan2f4},
@code{atand2}, @code{atanf4}, @code{atanhd2}, @code{atanhf4},
@code{cbrtd2}, @code{cbrtf4}, @code{cosd2}, @code{cosf4},
@code{coshd2}, @code{coshf4}, @code{erfcd2}, @code{erfcf4},
@code{erfd2}, @code{erff4}, @code{exp2d2}, @code{exp2f4},
@code{expd2}, @code{expf4}, @code{expm1d2}, @code{expm1f4},
@code{hypotd2}, @code{hypotf4}, @code{lgammad2}, @code{lgammaf4},
@code{log10d2}, @code{log10f4}, @code{log1pd2}, @code{log1pf4},
@code{log2d2}, @code{log2f4}, @code{logd2}, @code{logf4},
@code{powd2}, @code{powf4}, @code{sind2}, @code{sinf4}, @code{sinhd2},
@code{sinhf4}, @code{sqrtd2}, @code{sqrtf4}, @code{tand2},
@code{tanf4}, @code{tanhd2}, and @code{tanhf4} when generating code
for power7. Both @option{-ftree-vectorize} and
@option{-funsafe-math-optimizations} have to be enabled. The MASS
libraries will have to be specified at link time.
@end table
@node RX Options

View file

@ -1,3 +1,7 @@
2010-08-23 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/vsx-mass-1.c: New file, test -mmass.
2010-08-23 Janus Weil <janus@gcc.gnu.org>
PR fortran/45366