rs6000: Add MMA built-in function definitions and test cases.
Add the Matrix-Multiply Assist (MMA) built-ins. The MMA accumulators are INOUT operands for most MMA instructions, but they are also very expensive to move around. For this reason, we have implemented a built-in API where the accumulators are passed using pass-by-reference/pointers, so the user won't use one accumulator as input and another as output, which wouldentail a lot of copies. However, using pointers gives us poor code generation when we expand the built-ins at normal expand time. We therefore expand the MMA built-ins early into gimple, converting the pass-by-reference calls to an internal built-in that uses pass-by-value calling convention, where we can enforce the input and output accumulators are the same. This gives us much better code generation. 2020-06-20 Peter Bergner <bergner@linux.ibm.com> gcc/ * config/rs6000/predicates.md (mma_assemble_input_operand): New. * config/rs6000/rs6000-builtin.def (BU_MMA_1, BU_MMA_V2, BU_MMA_3, BU_MMA_5, BU_MMA_6, BU_VSX_1): Add support macros for defining MMA built-in functions. (ASSEMBLE_ACC, ASSEMBLE_PAIR, DISASSEMBLE_ACC, DISASSEMBLE_PAIR, PMXVBF16GER2, PMXVBF16GER2NN, PMXVBF16GER2NP, PMXVBF16GER2PN, PMXVBF16GER2PP, PMXVF16GER2, PMXVF16GER2NN, PMXVF16GER2NP, PMXVF16GER2PN, PMXVF16GER2PP, PMXVF32GER, PMXVF32GERNN, PMXVF32GERNP, PMXVF32GERPN, PMXVF32GERPP, PMXVF64GER, PMXVF64GERNN, PMXVF64GERNP, PMXVF64GERPN, PMXVF64GERPP, PMXVI16GER2, PMXVI16GER2PP, PMXVI16GER2S, PMXVI16GER2SPP, PMXVI4GER8, PMXVI4GER8PP, PMXVI8GER4, PMXVI8GER4PP, PMXVI8GER4SPP, XVBF16GER2, XVBF16GER2NN, XVBF16GER2NP, XVBF16GER2PN, XVBF16GER2PP, XVCVBF16SP, XVCVSPBF16, XVF16GER2, XVF16GER2NN, XVF16GER2NP, XVF16GER2PN, XVF16GER2PP, XVF32GER, XVF32GERNN, XVF32GERNP, XVF32GERPN, XVF32GERPP, XVF64GER, XVF64GERNN, XVF64GERNP, XVF64GERPN, XVF64GERPP, XVI16GER2, XVI16GER2PP, XVI16GER2S, XVI16GER2SPP, XVI4GER8, XVI4GER8PP, XVI8GER4, XVI8GER4PP, XVI8GER4SPP, XXMFACC, XXMTACC, XXSETACCZ): Add MMA built-ins. * config/rs6000/rs6000.c (rs6000_emit_move): Use CONST_INT_P. Allow zero constants. (print_operand) <case 'A'>: New output modifier. (rs6000_split_multireg_move): Add support for inserting accumulator priming and depriming instructions. Add support for splitting an assemble accumulator pattern. * config/rs6000/rs6000-call.c (mma_init_builtins, mma_expand_builtin, rs6000_gimple_fold_mma_builtin): New functions. (RS6000_BUILTIN_M): New macro. (def_builtin): Handle RS6000_BTC_QUAD and RS6000_BTC_PAIR attributes. (bdesc_mma): Add new MMA built-in support. (htm_expand_builtin): Use RS6000_BTC_OPND_MASK. (rs6000_invalid_builtin): Add handling of RS6000_BTM_FUTURE and RS6000_BTM_MMA. (rs6000_builtin_valid_without_lhs): Handle RS6000_BTC_VOID attribute. (rs6000_gimple_fold_builtin): Call rs6000_builtin_is_supported_p and rs6000_gimple_fold_mma_builtin. (rs6000_expand_builtin): Call mma_expand_builtin. Use RS6000_BTC_OPND_MASK. (rs6000_init_builtins): Adjust comment. Call mma_init_builtins. (htm_init_builtins): Use RS6000_BTC_OPND_MASK. (builtin_function_type): Handle VSX_BUILTIN_XVCVSPBF16 and VSX_BUILTIN_XVCVBF16SP. * config/rs6000/rs6000.h (RS6000_BTC_QUINARY, RS6000_BTC_SENARY, RS6000_BTC_OPND_MASK, RS6000_BTC_QUAD, RS6000_BTC_PAIR, RS6000_BTC_QUADPAIR, RS6000_BTC_GIMPLE): New defines. (RS6000_BTC_PREDICATE, RS6000_BTC_ABS, RS6000_BTC_DST, RS6000_BTC_TYPE_MASK, RS6000_BTC_ATTR_MASK): Adjust values. * config/rs6000/mma.md (MAX_MMA_OPERANDS): New define_constant. (UNSPEC_MMA_ASSEMBLE_ACC, UNSPEC_MMA_PMXVBF16GER2, UNSPEC_MMA_PMXVBF16GER2NN, UNSPEC_MMA_PMXVBF16GER2NP, UNSPEC_MMA_PMXVBF16GER2PN, UNSPEC_MMA_PMXVBF16GER2PP, UNSPEC_MMA_PMXVF16GER2, UNSPEC_MMA_PMXVF16GER2NN, UNSPEC_MMA_PMXVF16GER2NP, UNSPEC_MMA_PMXVF16GER2PN, UNSPEC_MMA_PMXVF16GER2PP, UNSPEC_MMA_PMXVF32GER, UNSPEC_MMA_PMXVF32GERNN, UNSPEC_MMA_PMXVF32GERNP, UNSPEC_MMA_PMXVF32GERPN, UNSPEC_MMA_PMXVF32GERPP, UNSPEC_MMA_PMXVF64GER, UNSPEC_MMA_PMXVF64GERNN, UNSPEC_MMA_PMXVF64GERNP, UNSPEC_MMA_PMXVF64GERPN, UNSPEC_MMA_PMXVF64GERPP, UNSPEC_MMA_PMXVI16GER2, UNSPEC_MMA_PMXVI16GER2PP, UNSPEC_MMA_PMXVI16GER2S, UNSPEC_MMA_PMXVI16GER2SPP, UNSPEC_MMA_PMXVI4GER8, UNSPEC_MMA_PMXVI4GER8PP, UNSPEC_MMA_PMXVI8GER4, UNSPEC_MMA_PMXVI8GER4PP, UNSPEC_MMA_PMXVI8GER4SPP, UNSPEC_MMA_XVBF16GER2, UNSPEC_MMA_XVBF16GER2NN, UNSPEC_MMA_XVBF16GER2NP, UNSPEC_MMA_XVBF16GER2PN, UNSPEC_MMA_XVBF16GER2PP, UNSPEC_MMA_XVF16GER2, UNSPEC_MMA_XVF16GER2NN, UNSPEC_MMA_XVF16GER2NP, UNSPEC_MMA_XVF16GER2PN, UNSPEC_MMA_XVF16GER2PP, UNSPEC_MMA_XVF32GER, UNSPEC_MMA_XVF32GERNN, UNSPEC_MMA_XVF32GERNP, UNSPEC_MMA_XVF32GERPN, UNSPEC_MMA_XVF32GERPP, UNSPEC_MMA_XVF64GER, UNSPEC_MMA_XVF64GERNN, UNSPEC_MMA_XVF64GERNP, UNSPEC_MMA_XVF64GERPN, UNSPEC_MMA_XVF64GERPP, UNSPEC_MMA_XVI16GER2, UNSPEC_MMA_XVI16GER2PP, UNSPEC_MMA_XVI16GER2S, UNSPEC_MMA_XVI16GER2SPP, UNSPEC_MMA_XVI4GER8, UNSPEC_MMA_XVI4GER8PP, UNSPEC_MMA_XVI8GER4, UNSPEC_MMA_XVI8GER4PP, UNSPEC_MMA_XVI8GER4SPP, UNSPEC_MMA_XXMFACC, UNSPEC_MMA_XXMTACC): New. (MMA_ACC, MMA_VV, MMA_AVV, MMA_PV, MMA_APV, MMA_VVI4I4I8, MMA_AVVI4I4I8, MMA_VVI4I4I2, MMA_AVVI4I4I2, MMA_VVI4I4, MMA_AVVI4I4, MMA_PVI4I2, MMA_APVI4I2, MMA_VVI4I4I4, MMA_AVVI4I4I4): New define_int_iterator. (acc, vv, avv, pv, apv, vvi4i4i8, avvi4i4i8, vvi4i4i2, avvi4i4i2, vvi4i4, avvi4i4, pvi4i2, apvi4i2, vvi4i4i4, avvi4i4i4): New define_int_attr. (*movpxi): Add zero constant alternative. (mma_assemble_pair, mma_assemble_acc): New define_expand. (*mma_assemble_acc): New define_insn_and_split. (mma_<acc>, mma_xxsetaccz, mma_<vv>, mma_<avv>, mma_<pv>, mma_<apv>, mma_<vvi4i4i8>, mma_<avvi4i4i8>, mma_<vvi4i4i2>, mma_<avvi4i4i2>, mma_<vvi4i4>, mma_<avvi4i4>, mma_<pvi4i2>, mma_<apvi4i2>, mma_<vvi4i4i4>, mma_<avvi4i4i4>): New define_insn. * config/rs6000/rs6000.md (define_attr "type"): New type mma. * config/rs6000/vsx.md (UNSPEC_VSX_XVCVBF16SP): New. (UNSPEC_VSX_XVCVSPBF16): Likewise. (XVCVBF16): New define_int_iterator. (xvcvbf16): New define_int_attr. (vsx_<xvcvbf16>): New define_insn. * doc/extend.texi: Document the mma built-ins.
This commit is contained in:
parent
f002c046e3
commit
8ee2640bfd
15 changed files with 1813 additions and 24 deletions
|
@ -31,6 +31,240 @@
|
|||
;; therefore, we define the XImode and OImode move patterns, but we
|
||||
;; disable their use with a "false" condition flag.
|
||||
|
||||
(define_constants [(MAX_MMA_OPERANDS 7)])
|
||||
|
||||
;; Constants for creating unspecs
|
||||
|
||||
(define_c_enum "unspec"
|
||||
[UNSPEC_MMA_ASSEMBLE_ACC
|
||||
UNSPEC_MMA_PMXVBF16GER2
|
||||
UNSPEC_MMA_PMXVBF16GER2NN
|
||||
UNSPEC_MMA_PMXVBF16GER2NP
|
||||
UNSPEC_MMA_PMXVBF16GER2PN
|
||||
UNSPEC_MMA_PMXVBF16GER2PP
|
||||
UNSPEC_MMA_PMXVF16GER2
|
||||
UNSPEC_MMA_PMXVF16GER2NN
|
||||
UNSPEC_MMA_PMXVF16GER2NP
|
||||
UNSPEC_MMA_PMXVF16GER2PN
|
||||
UNSPEC_MMA_PMXVF16GER2PP
|
||||
UNSPEC_MMA_PMXVF32GER
|
||||
UNSPEC_MMA_PMXVF32GERNN
|
||||
UNSPEC_MMA_PMXVF32GERNP
|
||||
UNSPEC_MMA_PMXVF32GERPN
|
||||
UNSPEC_MMA_PMXVF32GERPP
|
||||
UNSPEC_MMA_PMXVF64GER
|
||||
UNSPEC_MMA_PMXVF64GERNN
|
||||
UNSPEC_MMA_PMXVF64GERNP
|
||||
UNSPEC_MMA_PMXVF64GERPN
|
||||
UNSPEC_MMA_PMXVF64GERPP
|
||||
UNSPEC_MMA_PMXVI16GER2
|
||||
UNSPEC_MMA_PMXVI16GER2PP
|
||||
UNSPEC_MMA_PMXVI16GER2S
|
||||
UNSPEC_MMA_PMXVI16GER2SPP
|
||||
UNSPEC_MMA_PMXVI4GER8
|
||||
UNSPEC_MMA_PMXVI4GER8PP
|
||||
UNSPEC_MMA_PMXVI8GER4
|
||||
UNSPEC_MMA_PMXVI8GER4PP
|
||||
UNSPEC_MMA_PMXVI8GER4SPP
|
||||
UNSPEC_MMA_XVBF16GER2
|
||||
UNSPEC_MMA_XVBF16GER2NN
|
||||
UNSPEC_MMA_XVBF16GER2NP
|
||||
UNSPEC_MMA_XVBF16GER2PN
|
||||
UNSPEC_MMA_XVBF16GER2PP
|
||||
UNSPEC_MMA_XVF16GER2
|
||||
UNSPEC_MMA_XVF16GER2NN
|
||||
UNSPEC_MMA_XVF16GER2NP
|
||||
UNSPEC_MMA_XVF16GER2PN
|
||||
UNSPEC_MMA_XVF16GER2PP
|
||||
UNSPEC_MMA_XVF32GER
|
||||
UNSPEC_MMA_XVF32GERNN
|
||||
UNSPEC_MMA_XVF32GERNP
|
||||
UNSPEC_MMA_XVF32GERPN
|
||||
UNSPEC_MMA_XVF32GERPP
|
||||
UNSPEC_MMA_XVF64GER
|
||||
UNSPEC_MMA_XVF64GERNN
|
||||
UNSPEC_MMA_XVF64GERNP
|
||||
UNSPEC_MMA_XVF64GERPN
|
||||
UNSPEC_MMA_XVF64GERPP
|
||||
UNSPEC_MMA_XVI16GER2
|
||||
UNSPEC_MMA_XVI16GER2PP
|
||||
UNSPEC_MMA_XVI16GER2S
|
||||
UNSPEC_MMA_XVI16GER2SPP
|
||||
UNSPEC_MMA_XVI4GER8
|
||||
UNSPEC_MMA_XVI4GER8PP
|
||||
UNSPEC_MMA_XVI8GER4
|
||||
UNSPEC_MMA_XVI8GER4PP
|
||||
UNSPEC_MMA_XVI8GER4SPP
|
||||
UNSPEC_MMA_XXMFACC
|
||||
UNSPEC_MMA_XXMTACC
|
||||
])
|
||||
|
||||
;; MMA instructions with 1 accumulator argument
|
||||
(define_int_iterator MMA_ACC [UNSPEC_MMA_XXMFACC
|
||||
UNSPEC_MMA_XXMTACC])
|
||||
|
||||
;; MMA instructions with 2 vector arguments
|
||||
(define_int_iterator MMA_VV [UNSPEC_MMA_XVI4GER8
|
||||
UNSPEC_MMA_XVI8GER4
|
||||
UNSPEC_MMA_XVI16GER2
|
||||
UNSPEC_MMA_XVI16GER2S
|
||||
UNSPEC_MMA_XVF16GER2
|
||||
UNSPEC_MMA_XVBF16GER2
|
||||
UNSPEC_MMA_XVF32GER])
|
||||
|
||||
;; MMA instructions with 1 accumulator and 2 vector arguments
|
||||
(define_int_iterator MMA_AVV [UNSPEC_MMA_XVI4GER8PP
|
||||
UNSPEC_MMA_XVI8GER4PP
|
||||
UNSPEC_MMA_XVI8GER4SPP
|
||||
UNSPEC_MMA_XVI16GER2PP
|
||||
UNSPEC_MMA_XVI16GER2SPP
|
||||
UNSPEC_MMA_XVF16GER2PP
|
||||
UNSPEC_MMA_XVF16GER2PN
|
||||
UNSPEC_MMA_XVF16GER2NP
|
||||
UNSPEC_MMA_XVF16GER2NN
|
||||
UNSPEC_MMA_XVBF16GER2PP
|
||||
UNSPEC_MMA_XVBF16GER2PN
|
||||
UNSPEC_MMA_XVBF16GER2NP
|
||||
UNSPEC_MMA_XVBF16GER2NN
|
||||
UNSPEC_MMA_XVF32GERPP
|
||||
UNSPEC_MMA_XVF32GERPN
|
||||
UNSPEC_MMA_XVF32GERNP
|
||||
UNSPEC_MMA_XVF32GERNN])
|
||||
|
||||
;; MMA instructions with 1 vector pair and 1 vector arguments
|
||||
(define_int_iterator MMA_PV [UNSPEC_MMA_XVF64GER])
|
||||
|
||||
;; MMA instructions with 1 accumulator, 1 vector pair and 1 vector arguments
|
||||
(define_int_iterator MMA_APV [UNSPEC_MMA_XVF64GERPP
|
||||
UNSPEC_MMA_XVF64GERPN
|
||||
UNSPEC_MMA_XVF64GERNP
|
||||
UNSPEC_MMA_XVF64GERNN])
|
||||
|
||||
;; MMA instructions with 2 vector, 2 4-bit and 1 8-bit arguments
|
||||
(define_int_iterator MMA_VVI4I4I8 [UNSPEC_MMA_PMXVI4GER8])
|
||||
|
||||
;; MMA instructions with 1 accumulator, 2 vector, 2 4-bit and 1 8-bit arguments
|
||||
(define_int_iterator MMA_AVVI4I4I8 [UNSPEC_MMA_PMXVI4GER8PP])
|
||||
|
||||
;; MMA instructions with 2 vector, 2 4-bit and 1 2-bit arguments
|
||||
(define_int_iterator MMA_VVI4I4I2 [UNSPEC_MMA_PMXVI16GER2
|
||||
UNSPEC_MMA_PMXVI16GER2S
|
||||
UNSPEC_MMA_PMXVF16GER2
|
||||
UNSPEC_MMA_PMXVBF16GER2])
|
||||
|
||||
;; MMA instructions with 1 accumulator, 2 vector, 2 4-bit and 1 2-bit arguments
|
||||
(define_int_iterator MMA_AVVI4I4I2 [UNSPEC_MMA_PMXVI16GER2PP
|
||||
UNSPEC_MMA_PMXVI16GER2SPP
|
||||
UNSPEC_MMA_PMXVF16GER2PP
|
||||
UNSPEC_MMA_PMXVF16GER2PN
|
||||
UNSPEC_MMA_PMXVF16GER2NP
|
||||
UNSPEC_MMA_PMXVF16GER2NN
|
||||
UNSPEC_MMA_PMXVBF16GER2PP
|
||||
UNSPEC_MMA_PMXVBF16GER2PN
|
||||
UNSPEC_MMA_PMXVBF16GER2NP
|
||||
UNSPEC_MMA_PMXVBF16GER2NN])
|
||||
|
||||
;; MMA instructions with 2 vector and 2 4-bit arguments
|
||||
(define_int_iterator MMA_VVI4I4 [UNSPEC_MMA_PMXVF32GER])
|
||||
|
||||
;; MMA instructions with 1 accumulator, 2 vector and 2 4-bit arguments
|
||||
(define_int_iterator MMA_AVVI4I4 [UNSPEC_MMA_PMXVF32GERPP
|
||||
UNSPEC_MMA_PMXVF32GERPN
|
||||
UNSPEC_MMA_PMXVF32GERNP
|
||||
UNSPEC_MMA_PMXVF32GERNN])
|
||||
|
||||
;; MMA instructions with 2 vector, 1 4-bit and 1 2-bit arguments
|
||||
(define_int_iterator MMA_PVI4I2 [UNSPEC_MMA_PMXVF64GER])
|
||||
|
||||
;; MMA instructions with 1 accumulator, 2 vector, 1 4-bit and 1 2-bit arguments
|
||||
(define_int_iterator MMA_APVI4I2 [UNSPEC_MMA_PMXVF64GERPP
|
||||
UNSPEC_MMA_PMXVF64GERPN
|
||||
UNSPEC_MMA_PMXVF64GERNP
|
||||
UNSPEC_MMA_PMXVF64GERNN])
|
||||
|
||||
;; MMA instructions with 2 vector and 3 4-bit arguments
|
||||
(define_int_iterator MMA_VVI4I4I4 [UNSPEC_MMA_PMXVI8GER4])
|
||||
|
||||
;; MMA instructions with 1 accumulator, 2 vector and 3 4-bit arguments
|
||||
(define_int_iterator MMA_AVVI4I4I4 [UNSPEC_MMA_PMXVI8GER4PP
|
||||
UNSPEC_MMA_PMXVI8GER4SPP])
|
||||
|
||||
(define_int_attr acc [(UNSPEC_MMA_XXMFACC "xxmfacc")
|
||||
(UNSPEC_MMA_XXMTACC "xxmtacc")])
|
||||
|
||||
(define_int_attr vv [(UNSPEC_MMA_XVI4GER8 "xvi4ger8")
|
||||
(UNSPEC_MMA_XVI8GER4 "xvi8ger4")
|
||||
(UNSPEC_MMA_XVI16GER2 "xvi16ger2")
|
||||
(UNSPEC_MMA_XVI16GER2S "xvi16ger2s")
|
||||
(UNSPEC_MMA_XVF16GER2 "xvf16ger2")
|
||||
(UNSPEC_MMA_XVBF16GER2 "xvbf16ger2")
|
||||
(UNSPEC_MMA_XVF32GER "xvf32ger")])
|
||||
|
||||
(define_int_attr avv [(UNSPEC_MMA_XVI4GER8PP "xvi4ger8pp")
|
||||
(UNSPEC_MMA_XVI8GER4PP "xvi8ger4pp")
|
||||
(UNSPEC_MMA_XVI8GER4SPP "xvi8ger4spp")
|
||||
(UNSPEC_MMA_XVI16GER2PP "xvi16ger2pp")
|
||||
(UNSPEC_MMA_XVI16GER2SPP "xvi16ger2spp")
|
||||
(UNSPEC_MMA_XVF16GER2PP "xvf16ger2pp")
|
||||
(UNSPEC_MMA_XVF16GER2PN "xvf16ger2pn")
|
||||
(UNSPEC_MMA_XVF16GER2NP "xvf16ger2np")
|
||||
(UNSPEC_MMA_XVF16GER2NN "xvf16ger2nn")
|
||||
(UNSPEC_MMA_XVBF16GER2PP "xvbf16ger2pp")
|
||||
(UNSPEC_MMA_XVBF16GER2PN "xvbf16ger2pn")
|
||||
(UNSPEC_MMA_XVBF16GER2NP "xvbf16ger2np")
|
||||
(UNSPEC_MMA_XVBF16GER2NN "xvbf16ger2nn")
|
||||
(UNSPEC_MMA_XVF32GERPP "xvf32gerpp")
|
||||
(UNSPEC_MMA_XVF32GERPN "xvf32gerpn")
|
||||
(UNSPEC_MMA_XVF32GERNP "xvf32gernp")
|
||||
(UNSPEC_MMA_XVF32GERNN "xvf32gernn")])
|
||||
|
||||
(define_int_attr pv [(UNSPEC_MMA_XVF64GER "xvf64ger")])
|
||||
|
||||
(define_int_attr apv [(UNSPEC_MMA_XVF64GERPP "xvf64gerpp")
|
||||
(UNSPEC_MMA_XVF64GERPN "xvf64gerpn")
|
||||
(UNSPEC_MMA_XVF64GERNP "xvf64gernp")
|
||||
(UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
|
||||
|
||||
(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
|
||||
|
||||
(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "pmxvi4ger8pp")])
|
||||
|
||||
(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "pmxvi16ger2")
|
||||
(UNSPEC_MMA_PMXVI16GER2S "pmxvi16ger2s")
|
||||
(UNSPEC_MMA_PMXVF16GER2 "pmxvf16ger2")
|
||||
(UNSPEC_MMA_PMXVBF16GER2 "pmxvbf16ger2")])
|
||||
|
||||
(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp")
|
||||
(UNSPEC_MMA_PMXVI16GER2SPP "pmxvi16ger2spp")
|
||||
(UNSPEC_MMA_PMXVF16GER2PP "pmxvf16ger2pp")
|
||||
(UNSPEC_MMA_PMXVF16GER2PN "pmxvf16ger2pn")
|
||||
(UNSPEC_MMA_PMXVF16GER2NP "pmxvf16ger2np")
|
||||
(UNSPEC_MMA_PMXVF16GER2NN "pmxvf16ger2nn")
|
||||
(UNSPEC_MMA_PMXVBF16GER2PP "pmxvbf16ger2pp")
|
||||
(UNSPEC_MMA_PMXVBF16GER2PN "pmxvbf16ger2pn")
|
||||
(UNSPEC_MMA_PMXVBF16GER2NP "pmxvbf16ger2np")
|
||||
(UNSPEC_MMA_PMXVBF16GER2NN "pmxvbf16ger2nn")])
|
||||
|
||||
(define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "pmxvf32ger")])
|
||||
|
||||
(define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "pmxvf32gerpp")
|
||||
(UNSPEC_MMA_PMXVF32GERPN "pmxvf32gerpn")
|
||||
(UNSPEC_MMA_PMXVF32GERNP "pmxvf32gernp")
|
||||
(UNSPEC_MMA_PMXVF32GERNN "pmxvf32gernn")])
|
||||
|
||||
(define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "pmxvf64ger")])
|
||||
|
||||
(define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "pmxvf64gerpp")
|
||||
(UNSPEC_MMA_PMXVF64GERPN "pmxvf64gerpn")
|
||||
(UNSPEC_MMA_PMXVF64GERNP "pmxvf64gernp")
|
||||
(UNSPEC_MMA_PMXVF64GERNN "pmxvf64gernn")])
|
||||
|
||||
(define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "pmxvi8ger4")])
|
||||
|
||||
(define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "pmxvi8ger4pp")
|
||||
(UNSPEC_MMA_PMXVI8GER4SPP "pmxvi8ger4spp")])
|
||||
|
||||
|
||||
;; Define a disabled OImode move pattern, so we can use POImode.
|
||||
(define_expand "movoi"
|
||||
[(set (match_operand:OI 0 "nonimmediate_operand")
|
||||
|
@ -91,10 +325,11 @@
|
|||
})
|
||||
|
||||
(define_insn_and_split "*movpxi"
|
||||
[(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d")
|
||||
(match_operand:PXI 1 "input_operand" "m,d,d"))]
|
||||
[(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
|
||||
(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
|
||||
"TARGET_MMA
|
||||
&& (gpc_reg_operand (operands[0], PXImode)
|
||||
&& ((gpc_reg_operand (operands[0], PXImode)
|
||||
&& !(CONST_INT_P (operands[1]) && INTVAL (operands[1]) == 0))
|
||||
|| gpc_reg_operand (operands[1], PXImode))"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
|
@ -103,6 +338,249 @@
|
|||
rs6000_split_multireg_move (operands[0], operands[1]);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "vecload,vecstore,veclogical")
|
||||
(set_attr "length" "8,8,16")
|
||||
(set_attr "max_prefixed_insns" "2,2,*")])
|
||||
[(set_attr "type" "vecload,vecstore,veclogical,mma")
|
||||
(set_attr "length" "8,8,16,*")
|
||||
(set_attr "max_prefixed_insns" "2,2,*,*")])
|
||||
|
||||
(define_expand "mma_assemble_pair"
|
||||
[(match_operand:POI 0 "vsx_register_operand")
|
||||
(match_operand:V16QI 1 "input_operand")
|
||||
(match_operand:V16QI 2 "input_operand")]
|
||||
"TARGET_MMA"
|
||||
{
|
||||
rtx dst;
|
||||
|
||||
/* Let the compiler know the code below fully defines our output value. */
|
||||
emit_clobber (operands[0]);
|
||||
|
||||
dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
|
||||
emit_move_insn (dst, operands[1]);
|
||||
dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
|
||||
emit_move_insn (dst, operands[2]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "mma_assemble_acc"
|
||||
[(match_operand:PXI 0 "fpr_reg_operand")
|
||||
(match_operand:V16QI 1 "input_operand")
|
||||
(match_operand:V16QI 2 "input_operand")
|
||||
(match_operand:V16QI 3 "input_operand")
|
||||
(match_operand:V16QI 4 "input_operand")]
|
||||
"TARGET_MMA"
|
||||
{
|
||||
rtx src = gen_rtx_UNSPEC (PXImode,
|
||||
gen_rtvec (4, operands[1], operands[2],
|
||||
operands[3], operands[4]),
|
||||
UNSPEC_MMA_ASSEMBLE_ACC);
|
||||
emit_move_insn (operands[0], src);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*mma_assemble_acc"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
|
||||
(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
|
||||
(match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
|
||||
UNSPEC_MMA_ASSEMBLE_ACC))]
|
||||
"TARGET_MMA
|
||||
&& fpr_reg_operand (operands[0], PXImode)"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx src = gen_rtx_UNSPEC (PXImode,
|
||||
gen_rtvec (4, operands[1], operands[2],
|
||||
operands[3], operands[4]),
|
||||
UNSPEC_MMA_ASSEMBLE_ACC);
|
||||
rs6000_split_multireg_move (operands[0], src);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; MMA instructions that do not use their accumulators as an input, still
|
||||
;; must not allow their vector operands to overlap the registers used by
|
||||
;; the accumulator. We enforce this by marking the output as early clobber.
|
||||
|
||||
(define_insn "mma_<acc>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
|
||||
MMA_ACC))]
|
||||
"TARGET_MMA"
|
||||
"<acc> %A0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_xxsetaccz"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
|
||||
(const_int 0))]
|
||||
"TARGET_MMA"
|
||||
"xxsetaccz %A0"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<vv>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
|
||||
MMA_VV))]
|
||||
"TARGET_MMA"
|
||||
"<vv> %A0,%x1,%x2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<avv>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
|
||||
MMA_AVV))]
|
||||
"TARGET_MMA"
|
||||
"<avv> %A0,%x2,%x3"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<pv>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")]
|
||||
MMA_PV))]
|
||||
"TARGET_MMA"
|
||||
"<pv> %A0,%x1,%x2"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<apv>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
|
||||
(match_operand:POI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")]
|
||||
MMA_APV))]
|
||||
"TARGET_MMA"
|
||||
"<apv> %A0,%x2,%x3"
|
||||
[(set_attr "type" "mma")])
|
||||
|
||||
(define_insn "mma_<vvi4i4i8>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "u8bit_cint_operand" "n")]
|
||||
MMA_VVI4I4I8))]
|
||||
"TARGET_MMA"
|
||||
"<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<avvi4i4i8>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 6 "u8bit_cint_operand" "n")]
|
||||
MMA_AVVI4I4I8))]
|
||||
"TARGET_MMA"
|
||||
"<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<vvi4i4i2>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_3_operand" "n")]
|
||||
MMA_VVI4I4I2))]
|
||||
"TARGET_MMA"
|
||||
"<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<avvi4i4i2>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 6 "const_0_to_3_operand" "n")]
|
||||
MMA_AVVI4I4I2))]
|
||||
"TARGET_MMA"
|
||||
"<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<vvi4i4>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")]
|
||||
MMA_VVI4I4))]
|
||||
"TARGET_MMA"
|
||||
"<vvi4i4> %A0,%x1,%x2,%3,%4"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<avvi4i4>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")]
|
||||
MMA_AVVI4I4))]
|
||||
"TARGET_MMA"
|
||||
"<avvi4i4> %A0,%x2,%x3,%4,%5"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<pvi4i2>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_3_operand" "n")]
|
||||
MMA_PVI4I2))]
|
||||
"TARGET_MMA"
|
||||
"<pvi4i2> %A0,%x1,%x2,%3,%4"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<apvi4i2>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
|
||||
(match_operand:POI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_3_operand" "n")]
|
||||
MMA_APVI4I2))]
|
||||
"TARGET_MMA"
|
||||
"<apvi4i2> %A0,%x2,%x3,%4,%5"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<vvi4i4i4>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")]
|
||||
MMA_VVI4I4I4))]
|
||||
"TARGET_MMA"
|
||||
"<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
(define_insn "mma_<avvi4i4i4>"
|
||||
[(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
|
||||
(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
|
||||
(match_operand:V16QI 2 "vsx_register_operand" "wa")
|
||||
(match_operand:V16QI 3 "vsx_register_operand" "wa")
|
||||
(match_operand:SI 4 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 5 "const_0_to_15_operand" "n")
|
||||
(match_operand:SI 6 "const_0_to_15_operand" "n")]
|
||||
MMA_AVVI4I4I4))]
|
||||
"TARGET_MMA"
|
||||
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
|
||||
[(set_attr "type" "mma")
|
||||
(set_attr "length" "8")])
|
||||
|
|
|
@ -1119,6 +1119,11 @@
|
|||
return gpc_reg_operand (op, mode);
|
||||
})
|
||||
|
||||
;; Return 1 if this operand is valid for a MMA assemble accumulator insn.
|
||||
(define_special_predicate "mma_assemble_input_operand"
|
||||
(match_test "(mode == V16QImode
|
||||
&& (vsx_register_operand (op, mode) || MEM_P (op)))"))
|
||||
|
||||
;; Return true if operand is an operator used in rotate-and-mask instructions.
|
||||
(define_predicate "rotate_mask_operator"
|
||||
(match_code "rotate,ashift,lshiftrt"))
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
RS6000_BUILTIN_A -- ABS builtins
|
||||
RS6000_BUILTIN_D -- DST builtins
|
||||
RS6000_BUILTIN_H -- HTM builtins
|
||||
RS6000_BUILTIN_M -- MMA builtins
|
||||
RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins
|
||||
RS6000_BUILTIN_X -- special builtins
|
||||
|
||||
|
@ -74,6 +75,10 @@
|
|||
#error "RS6000_BUILTIN_H is not defined."
|
||||
#endif
|
||||
|
||||
#ifndef RS6000_BUILTIN_M
|
||||
#error "RS6000_BUILTIN_M is not defined."
|
||||
#endif
|
||||
|
||||
#ifndef RS6000_BUILTIN_P
|
||||
#error "RS6000_BUILTIN_P is not defined."
|
||||
#endif
|
||||
|
@ -329,6 +334,82 @@
|
|||
| RS6000_BTC_SPECIAL), \
|
||||
CODE_FOR_nothing) /* ICODE */
|
||||
|
||||
/* MMA convenience macros. */
|
||||
|
||||
#define BU_MMA_1(ENUM, NAME, ATTR, ICODE) \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
|
||||
"__builtin_mma_" NAME, /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_UNARY \
|
||||
| RS6000_BTC_VOID \
|
||||
| RS6000_BTC_GIMPLE), \
|
||||
CODE_FOR_nothing) /* ICODE */ \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \
|
||||
"__builtin_mma_" NAME "_internal", /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_UNARY), \
|
||||
CODE_FOR_ ## ICODE) /* ICODE */
|
||||
|
||||
#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE) \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
|
||||
"__builtin_mma_" NAME, /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_BINARY \
|
||||
| RS6000_BTC_VOID \
|
||||
| RS6000_BTC_GIMPLE), \
|
||||
CODE_FOR_nothing) /* ICODE */
|
||||
|
||||
#define BU_MMA_3(ENUM, NAME, ATTR, ICODE) \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
|
||||
"__builtin_mma_" NAME, /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_TERNARY \
|
||||
| RS6000_BTC_VOID \
|
||||
| RS6000_BTC_GIMPLE), \
|
||||
CODE_FOR_nothing) /* ICODE */ \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \
|
||||
"__builtin_mma_" NAME "_internal", /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_TERNARY), \
|
||||
CODE_FOR_ ## ICODE) /* ICODE */
|
||||
|
||||
#define BU_MMA_5(ENUM, NAME, ATTR, ICODE) \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
|
||||
"__builtin_mma_" NAME, /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_QUINARY \
|
||||
| RS6000_BTC_VOID \
|
||||
| RS6000_BTC_GIMPLE), \
|
||||
CODE_FOR_nothing) /* ICODE */ \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \
|
||||
"__builtin_mma_" NAME "_internal", /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_QUINARY), \
|
||||
CODE_FOR_ ## ICODE) /* ICODE */
|
||||
|
||||
#define BU_MMA_6(ENUM, NAME, ATTR, ICODE) \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM, /* ENUM */ \
|
||||
"__builtin_mma_" NAME, /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_SENARY \
|
||||
| RS6000_BTC_VOID \
|
||||
| RS6000_BTC_GIMPLE), \
|
||||
CODE_FOR_nothing) /* ICODE */ \
|
||||
RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL, /* ENUM */ \
|
||||
"__builtin_mma_" NAME "_internal", /* NAME */ \
|
||||
RS6000_BTM_MMA, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_SENARY), \
|
||||
CODE_FOR_ ## ICODE) /* ICODE */
|
||||
|
||||
/* ISA 2.05 (power6) convenience macros. */
|
||||
/* For functions that depend on the CMPB instruction */
|
||||
#define BU_P6_2(ENUM, NAME, ATTR, ICODE) \
|
||||
|
@ -2785,3 +2866,77 @@ BU_SPECIAL_X (RS6000_BUILTIN_CPU_SUPPORTS, "__builtin_cpu_supports",
|
|||
/* Darwin CfString builtin. */
|
||||
BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS,
|
||||
RS6000_BTC_MISC)
|
||||
|
||||
/* FUTURE MMA builtins. */
|
||||
BU_VSX_1 (XVCVBF16SP, "xvcvbf16sp", MISC, vsx_xvcvbf16sp)
|
||||
BU_VSX_1 (XVCVSPBF16, "xvcvspbf16", MISC, vsx_xvcvspbf16)
|
||||
|
||||
BU_MMA_1 (XXMFACC, "xxmfacc", QUAD, mma_xxmfacc)
|
||||
BU_MMA_1 (XXMTACC, "xxmtacc", QUAD, mma_xxmtacc)
|
||||
BU_MMA_1 (XXSETACCZ, "xxsetaccz", MISC, mma_xxsetaccz)
|
||||
|
||||
BU_MMA_V2 (DISASSEMBLE_ACC, "disassemble_acc", QUAD, nothing)
|
||||
BU_MMA_V2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, nothing)
|
||||
|
||||
BU_MMA_3 (ASSEMBLE_PAIR, "assemble_pair", MISC, mma_assemble_pair)
|
||||
BU_MMA_3 (XVBF16GER2, "xvbf16ger2", MISC, mma_xvbf16ger2)
|
||||
BU_MMA_3 (XVF16GER2, "xvf16ger2", MISC, mma_xvf16ger2)
|
||||
BU_MMA_3 (XVF32GER, "xvf32ger", MISC, mma_xvf32ger)
|
||||
BU_MMA_3 (XVF64GER, "xvf64ger", PAIR, mma_xvf64ger)
|
||||
BU_MMA_3 (XVI4GER8, "xvi4ger8", MISC, mma_xvi4ger8)
|
||||
BU_MMA_3 (XVI8GER4, "xvi8ger4", MISC, mma_xvi8ger4)
|
||||
BU_MMA_3 (XVI16GER2, "xvi16ger2", MISC, mma_xvi16ger2)
|
||||
BU_MMA_3 (XVI16GER2S, "xvi16ger2s", MISC, mma_xvi16ger2s)
|
||||
BU_MMA_3 (XVBF16GER2NN, "xvbf16ger2nn", QUAD, mma_xvbf16ger2nn)
|
||||
BU_MMA_3 (XVBF16GER2NP, "xvbf16ger2np", QUAD, mma_xvbf16ger2np)
|
||||
BU_MMA_3 (XVBF16GER2PN, "xvbf16ger2pn", QUAD, mma_xvbf16ger2pn)
|
||||
BU_MMA_3 (XVBF16GER2PP, "xvbf16ger2pp", QUAD, mma_xvbf16ger2pp)
|
||||
BU_MMA_3 (XVF16GER2NN, "xvf16ger2nn", QUAD, mma_xvf16ger2nn)
|
||||
BU_MMA_3 (XVF16GER2NP, "xvf16ger2np", QUAD, mma_xvf16ger2np)
|
||||
BU_MMA_3 (XVF16GER2PN, "xvf16ger2pn", QUAD, mma_xvf16ger2pn)
|
||||
BU_MMA_3 (XVF16GER2PP, "xvf16ger2pp", QUAD, mma_xvf16ger2pp)
|
||||
BU_MMA_3 (XVF32GERNN, "xvf32gernn", QUAD, mma_xvf32gernn)
|
||||
BU_MMA_3 (XVF32GERNP, "xvf32gernp", QUAD, mma_xvf32gernp)
|
||||
BU_MMA_3 (XVF32GERPN, "xvf32gerpn", QUAD, mma_xvf32gerpn)
|
||||
BU_MMA_3 (XVF32GERPP, "xvf32gerpp", QUAD, mma_xvf32gerpp)
|
||||
BU_MMA_3 (XVF64GERNN, "xvf64gernn", QUADPAIR, mma_xvf64gernn)
|
||||
BU_MMA_3 (XVF64GERNP, "xvf64gernp", QUADPAIR, mma_xvf64gernp)
|
||||
BU_MMA_3 (XVF64GERPN, "xvf64gerpn", QUADPAIR, mma_xvf64gerpn)
|
||||
BU_MMA_3 (XVF64GERPP, "xvf64gerpp", QUADPAIR, mma_xvf64gerpp)
|
||||
BU_MMA_3 (XVI4GER8PP, "xvi4ger8pp", QUAD, mma_xvi4ger8pp)
|
||||
BU_MMA_3 (XVI8GER4PP, "xvi8ger4pp", QUAD, mma_xvi8ger4pp)
|
||||
BU_MMA_3 (XVI8GER4SPP, "xvi8ger4spp", QUAD, mma_xvi8ger4spp)
|
||||
BU_MMA_3 (XVI16GER2PP, "xvi16ger2pp", QUAD, mma_xvi16ger2pp)
|
||||
BU_MMA_3 (XVI16GER2SPP, "xvi16ger2spp", QUAD, mma_xvi16ger2spp)
|
||||
|
||||
BU_MMA_5 (ASSEMBLE_ACC, "assemble_acc", MISC, mma_assemble_acc)
|
||||
BU_MMA_5 (PMXVF32GER, "pmxvf32ger", MISC, mma_pmxvf32ger)
|
||||
BU_MMA_5 (PMXVF64GER, "pmxvf64ger", PAIR, mma_pmxvf64ger)
|
||||
BU_MMA_5 (PMXVF32GERNN, "pmxvf32gernn", QUAD, mma_pmxvf32gernn)
|
||||
BU_MMA_5 (PMXVF32GERNP, "pmxvf32gernp", QUAD, mma_pmxvf32gernp)
|
||||
BU_MMA_5 (PMXVF32GERPN, "pmxvf32gerpn", QUAD, mma_pmxvf32gerpn)
|
||||
BU_MMA_5 (PMXVF32GERPP, "pmxvf32gerpp", QUAD, mma_pmxvf32gerpp)
|
||||
BU_MMA_5 (PMXVF64GERNN, "pmxvf64gernn", QUADPAIR, mma_pmxvf64gernn)
|
||||
BU_MMA_5 (PMXVF64GERNP, "pmxvf64gernp", QUADPAIR, mma_pmxvf64gernp)
|
||||
BU_MMA_5 (PMXVF64GERPN, "pmxvf64gerpn", QUADPAIR, mma_pmxvf64gerpn)
|
||||
BU_MMA_5 (PMXVF64GERPP, "pmxvf64gerpp", QUADPAIR, mma_pmxvf64gerpp)
|
||||
|
||||
BU_MMA_6 (PMXVBF16GER2, "pmxvbf16ger2", MISC, mma_pmxvbf16ger2)
|
||||
BU_MMA_6 (PMXVF16GER2, "pmxvf16ger2", MISC, mma_pmxvf16ger2)
|
||||
BU_MMA_6 (PMXVI4GER8, "pmxvi4ger8", MISC, mma_pmxvi4ger8)
|
||||
BU_MMA_6 (PMXVI8GER4, "pmxvi8ger4", MISC, mma_pmxvi8ger4)
|
||||
BU_MMA_6 (PMXVI16GER2, "pmxvi16ger2", MISC, mma_pmxvi16ger2)
|
||||
BU_MMA_6 (PMXVI16GER2S, "pmxvi16ger2s", MISC, mma_pmxvi16ger2s)
|
||||
BU_MMA_6 (PMXVBF16GER2NN, "pmxvbf16ger2nn", QUAD, mma_pmxvbf16ger2nn)
|
||||
BU_MMA_6 (PMXVBF16GER2NP, "pmxvbf16ger2np", QUAD, mma_pmxvbf16ger2np)
|
||||
BU_MMA_6 (PMXVBF16GER2PN, "pmxvbf16ger2pn", QUAD, mma_pmxvbf16ger2pn)
|
||||
BU_MMA_6 (PMXVBF16GER2PP, "pmxvbf16ger2pp", QUAD, mma_pmxvbf16ger2pp)
|
||||
BU_MMA_6 (PMXVF16GER2NN, "pmxvf16ger2nn", QUAD, mma_pmxvf16ger2nn)
|
||||
BU_MMA_6 (PMXVF16GER2NP, "pmxvf16ger2np", QUAD, mma_pmxvf16ger2np)
|
||||
BU_MMA_6 (PMXVF16GER2PN, "pmxvf16ger2pn", QUAD, mma_pmxvf16ger2pn)
|
||||
BU_MMA_6 (PMXVF16GER2PP, "pmxvf16ger2pp", QUAD, mma_pmxvf16ger2pp)
|
||||
BU_MMA_6 (PMXVI4GER8PP, "pmxvi4ger8pp", QUAD, mma_pmxvi4ger8pp)
|
||||
BU_MMA_6 (PMXVI8GER4PP, "pmxvi8ger4pp", QUAD, mma_pmxvi8ger4pp)
|
||||
BU_MMA_6 (PMXVI8GER4SPP, "pmxvi8ger4spp", QUAD, mma_pmxvi8ger4spp)
|
||||
BU_MMA_6 (PMXVI16GER2PP, "pmxvi16ger2pp", QUAD, mma_pmxvi16ger2pp)
|
||||
BU_MMA_6 (PMXVI16GER2SPP, "pmxvi16ger2spp", QUAD, mma_pmxvi16ger2spp)
|
||||
|
|
|
@ -183,6 +183,7 @@ static tree builtin_function_type (machine_mode, machine_mode,
|
|||
enum rs6000_builtins, const char *name);
|
||||
static void rs6000_common_init_builtins (void);
|
||||
static void htm_init_builtins (void);
|
||||
static void mma_init_builtins (void);
|
||||
|
||||
|
||||
/* Hash table to keep track of the argument types for builtin functions. */
|
||||
|
@ -243,6 +244,7 @@ builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -270,6 +272,9 @@ builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
|
|||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
|
||||
{ NAME, ICODE, MASK, ATTR },
|
||||
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE) \
|
||||
{ NAME, ICODE, MASK, ATTR },
|
||||
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
|
||||
{ NAME, ICODE, MASK, ATTR },
|
||||
|
||||
|
@ -296,6 +301,7 @@ static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8354,6 +8360,9 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code)
|
|||
attr_string = ", fp, const";
|
||||
}
|
||||
}
|
||||
else if ((classify & (RS6000_BTC_QUAD | RS6000_BTC_PAIR)) != 0)
|
||||
/* The function uses a register quad and/or pair. Nothing to do. */
|
||||
;
|
||||
else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
|
||||
gcc_unreachable ();
|
||||
|
||||
|
@ -8372,6 +8381,7 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code)
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8385,6 +8395,7 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code)
|
|||
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
|
@ -8403,6 +8414,7 @@ static const struct builtin_description bdesc_3arg[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8416,6 +8428,7 @@ static const struct builtin_description bdesc_3arg[] =
|
|||
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
|
@ -8434,6 +8447,7 @@ static const struct builtin_description bdesc_4arg[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8447,6 +8461,7 @@ static const struct builtin_description bdesc_4arg[] =
|
|||
{ MASK, ICODE, NAME, ENUM },
|
||||
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
|
@ -8465,6 +8480,7 @@ static const struct builtin_description bdesc_dst[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8478,6 +8494,7 @@ static const struct builtin_description bdesc_dst[] =
|
|||
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
|
@ -8494,6 +8511,7 @@ static const struct builtin_description bdesc_2arg[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8505,6 +8523,7 @@ static const struct builtin_description bdesc_2arg[] =
|
|||
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
|
||||
{ MASK, ICODE, NAME, ENUM },
|
||||
|
||||
|
@ -8527,6 +8546,7 @@ static const struct builtin_description bdesc_altivec_preds[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8540,6 +8560,7 @@ static const struct builtin_description bdesc_altivec_preds[] =
|
|||
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
|
@ -8559,6 +8580,7 @@ static const struct builtin_description bdesc_abs[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8572,6 +8594,7 @@ static const struct builtin_description bdesc_abs[] =
|
|||
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
|
@ -8590,6 +8613,7 @@ static const struct builtin_description bdesc_1arg[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8603,6 +8627,7 @@ static const struct builtin_description bdesc_1arg[] =
|
|||
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
|
@ -8620,6 +8645,7 @@ static const struct builtin_description bdesc_0arg[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -8633,6 +8659,7 @@ static const struct builtin_description bdesc_0arg[] =
|
|||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
|
||||
{ MASK, ICODE, NAME, ENUM },
|
||||
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
|
@ -8641,6 +8668,38 @@ static const struct builtin_description bdesc_htm[] =
|
|||
#include "rs6000-builtin.def"
|
||||
};
|
||||
|
||||
/* MMA builtins. */
|
||||
#undef RS6000_BUILTIN_0
|
||||
#undef RS6000_BUILTIN_1
|
||||
#undef RS6000_BUILTIN_2
|
||||
#undef RS6000_BUILTIN_3
|
||||
#undef RS6000_BUILTIN_4
|
||||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_4(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE) \
|
||||
{ MASK, ICODE, NAME, ENUM },
|
||||
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
|
||||
|
||||
static const struct builtin_description bdesc_mma[] =
|
||||
{
|
||||
#include "rs6000-builtin.def"
|
||||
};
|
||||
|
||||
#undef RS6000_BUILTIN_0
|
||||
#undef RS6000_BUILTIN_1
|
||||
#undef RS6000_BUILTIN_2
|
||||
|
@ -8649,7 +8708,9 @@ static const struct builtin_description bdesc_htm[] =
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
/* Return true if a builtin function is overloaded. */
|
||||
bool
|
||||
|
@ -9393,6 +9454,133 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
|
|||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Expand the MMA built-in in EXP.
|
||||
Store true in *EXPANDEDP if we found a built-in to expand. */
|
||||
|
||||
static rtx
|
||||
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
|
||||
{
|
||||
unsigned i;
|
||||
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
|
||||
enum rs6000_builtins fcode
|
||||
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
|
||||
const struct builtin_description *d = bdesc_mma;
|
||||
|
||||
/* Expand the MMA built-in. */
|
||||
for (i = 0; i < ARRAY_SIZE (bdesc_mma); i++, d++)
|
||||
if (d->code == fcode)
|
||||
break;
|
||||
|
||||
if (i >= ARRAY_SIZE (bdesc_mma))
|
||||
{
|
||||
*expandedp = false;
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
*expandedp = true;
|
||||
|
||||
tree arg;
|
||||
call_expr_arg_iterator iter;
|
||||
enum insn_code icode = d->icode;
|
||||
const struct insn_operand_data *insn_op;
|
||||
rtx op[MAX_MMA_OPERANDS];
|
||||
unsigned nopnds = 0;
|
||||
unsigned attr = rs6000_builtin_info[fcode].attr;
|
||||
bool void_func = (attr & RS6000_BTC_VOID);
|
||||
machine_mode tmode = VOIDmode;
|
||||
|
||||
if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
|
||||
{
|
||||
tmode = insn_data[icode].operand[0].mode;
|
||||
if (!target
|
||||
|| GET_MODE (target) != tmode
|
||||
|| !(*insn_data[icode].operand[0].predicate) (target, tmode))
|
||||
target = gen_reg_rtx (tmode);
|
||||
op[nopnds++] = target;
|
||||
}
|
||||
else
|
||||
target = const0_rtx;
|
||||
|
||||
FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
|
||||
{
|
||||
if (arg == error_mark_node)
|
||||
return const0_rtx;
|
||||
|
||||
rtx opnd;
|
||||
insn_op = &insn_data[icode].operand[nopnds];
|
||||
if (TREE_CODE (arg) == ADDR_EXPR
|
||||
&& MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
|
||||
opnd = DECL_RTL (TREE_OPERAND (arg, 0));
|
||||
else
|
||||
opnd = expand_normal (arg);
|
||||
|
||||
if (!(*insn_op->predicate) (opnd, insn_op->mode))
|
||||
{
|
||||
if (!strcmp (insn_op->constraint, "n"))
|
||||
{
|
||||
if (!CONST_INT_P (opnd))
|
||||
error ("argument %d must be an unsigned literal", nopnds);
|
||||
else
|
||||
error ("argument %d is an unsigned literal that is "
|
||||
"out of range", nopnds);
|
||||
return const0_rtx;
|
||||
}
|
||||
opnd = copy_to_mode_reg (insn_op->mode, opnd);
|
||||
}
|
||||
|
||||
/* Some MMA instructions have INOUT accumulator operands, so force
|
||||
their target register to be the same as their input register. */
|
||||
if (!void_func
|
||||
&& nopnds == 1
|
||||
&& !strcmp (insn_op->constraint, "0")
|
||||
&& insn_op->mode == tmode
|
||||
&& REG_P (opnd)
|
||||
&& (*insn_data[icode].operand[0].predicate) (opnd, tmode))
|
||||
target = op[0] = opnd;
|
||||
|
||||
op[nopnds++] = opnd;
|
||||
}
|
||||
|
||||
unsigned attr_args = attr & RS6000_BTC_OPND_MASK;
|
||||
if (attr & RS6000_BTC_QUAD)
|
||||
attr_args++;
|
||||
|
||||
gcc_assert (nopnds == attr_args);
|
||||
|
||||
rtx pat;
|
||||
switch (nopnds)
|
||||
{
|
||||
case 1:
|
||||
pat = GEN_FCN (icode) (op[0]);
|
||||
break;
|
||||
case 2:
|
||||
pat = GEN_FCN (icode) (op[0], op[1]);
|
||||
break;
|
||||
case 3:
|
||||
pat = GEN_FCN (icode) (op[0], op[1], op[2]);
|
||||
break;
|
||||
case 4:
|
||||
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
|
||||
break;
|
||||
case 5:
|
||||
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
|
||||
break;
|
||||
case 6:
|
||||
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
|
||||
break;
|
||||
case 7:
|
||||
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
if (!pat)
|
||||
return NULL_RTX;
|
||||
emit_insn (pat);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
/* Return the appropriate SPR number associated with the given builtin. */
|
||||
static inline HOST_WIDE_INT
|
||||
htm_spr_num (enum rs6000_builtins code)
|
||||
|
@ -9539,11 +9727,11 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp)
|
|||
if (flag_checking)
|
||||
{
|
||||
int expected_nopnds = 0;
|
||||
if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
|
||||
if ((attr & RS6000_BTC_OPND_MASK) == RS6000_BTC_UNARY)
|
||||
expected_nopnds = 1;
|
||||
else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
|
||||
else if ((attr & RS6000_BTC_OPND_MASK) == RS6000_BTC_BINARY)
|
||||
expected_nopnds = 2;
|
||||
else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
|
||||
else if ((attr & RS6000_BTC_OPND_MASK) == RS6000_BTC_TERNARY)
|
||||
expected_nopnds = 3;
|
||||
else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_QUATERNARY)
|
||||
expected_nopnds = 4;
|
||||
|
@ -10647,6 +10835,10 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode)
|
|||
"-m64");
|
||||
else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
|
||||
error ("%qs requires the %qs option", name, "-mcpu=power9");
|
||||
else if ((fnmask & RS6000_BTM_FUTURE) != 0)
|
||||
error ("%qs requires the %qs option", name, "-mcpu=future");
|
||||
else if ((fnmask & RS6000_BTM_MMA) != 0)
|
||||
error ("%qs requires the %qs option", name, "-mmma");
|
||||
else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
|
||||
{
|
||||
if (!TARGET_HARD_FLOAT)
|
||||
|
@ -10690,6 +10882,10 @@ rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
|
|||
static bool
|
||||
rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
|
||||
{
|
||||
/* Check for built-ins explicitly marked as a void function. */
|
||||
if (rs6000_builtin_info[fn_code].attr & RS6000_BTC_VOID)
|
||||
return true;
|
||||
|
||||
switch (fn_code)
|
||||
{
|
||||
case ALTIVEC_BUILTIN_STVX_V16QI:
|
||||
|
@ -10836,6 +11032,156 @@ fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
|
|||
gsi_replace (gsi, g, true);
|
||||
}
|
||||
|
||||
/* Expand the MMA built-ins early, so that we can convert the pass-by-reference
|
||||
__vector_quad arguments into pass-by-value arguments, leading to more
|
||||
efficient code generation. */
|
||||
|
||||
bool
|
||||
rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
|
||||
{
|
||||
gimple *stmt = gsi_stmt (*gsi);
|
||||
tree fndecl = gimple_call_fndecl (stmt);
|
||||
enum rs6000_builtins fncode
|
||||
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
|
||||
unsigned attr = rs6000_builtin_info[fncode].attr;
|
||||
|
||||
if ((attr & RS6000_BTC_GIMPLE) == 0)
|
||||
return false;
|
||||
|
||||
unsigned nopnds = (attr & RS6000_BTC_OPND_MASK);
|
||||
gimple_seq new_seq = NULL;
|
||||
gimple *new_call;
|
||||
tree new_decl;
|
||||
|
||||
if (rs6000_builtin_info[fncode + 1].icode == CODE_FOR_nothing)
|
||||
{
|
||||
/* This is an MMA disassemble built-in function. */
|
||||
gcc_assert (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
|
||||
|| fncode == MMA_BUILTIN_DISASSEMBLE_PAIR);
|
||||
|
||||
push_gimplify_context (true);
|
||||
tree dst_ptr = gimple_call_arg (stmt, 0);
|
||||
tree src_ptr = gimple_call_arg (stmt, 1);
|
||||
tree src_type = TREE_TYPE (src_ptr);
|
||||
tree src = make_ssa_name (TREE_TYPE (src_type));
|
||||
gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
|
||||
|
||||
/* If we are not disassembling an accumulator or our destination is
|
||||
another accumulator, then just copy the entire thing as is. */
|
||||
if (fncode != MMA_BUILTIN_DISASSEMBLE_ACC
|
||||
|| TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
|
||||
{
|
||||
tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR,
|
||||
src_type, dst_ptr));
|
||||
gimplify_assign (dst, src, &new_seq);
|
||||
pop_gimplify_context (NULL);
|
||||
gsi_replace_with_seq (gsi, new_seq, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* We're disassembling an accumulator into a different type, so we need
|
||||
to emit a xxmfacc instruction now, since we cannot do it later. */
|
||||
new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
|
||||
new_call = gimple_build_call (new_decl, 1, src);
|
||||
src = make_ssa_name (vector_quad_type_node);
|
||||
gimple_call_set_lhs (new_call, src);
|
||||
gimple_seq_add_stmt (&new_seq, new_call);
|
||||
|
||||
/* Copy the accumulator vector by vector. */
|
||||
tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
|
||||
ptr_mode, true);
|
||||
tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr);
|
||||
tree array_type = build_array_type_nelts (unsigned_V16QI_type_node, 4);
|
||||
tree src_array = build1 (VIEW_CONVERT_EXPR, array_type, src);
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
{
|
||||
tree ref = build4 (ARRAY_REF, unsigned_V16QI_type_node, src_array,
|
||||
build_int_cst (size_type_node, i),
|
||||
NULL_TREE, NULL_TREE);
|
||||
tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
|
||||
build_int_cst (dst_type, i * 16));
|
||||
gimplify_assign (dst, ref, &new_seq);
|
||||
}
|
||||
pop_gimplify_context (NULL);
|
||||
gsi_replace_with_seq (gsi, new_seq, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Convert this built-in into an internal version that uses pass-by-value
|
||||
arguments. The internal built-in follows immediately after this one. */
|
||||
new_decl = rs6000_builtin_decls[fncode + 1];
|
||||
tree lhs, mem, op[MAX_MMA_OPERANDS];
|
||||
tree acc = gimple_call_arg (stmt, 0);
|
||||
if (TREE_CODE (acc) == PARM_DECL)
|
||||
mem = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (acc)), acc);
|
||||
else
|
||||
mem = build_simple_mem_ref (acc);
|
||||
push_gimplify_context (true);
|
||||
|
||||
if ((attr & RS6000_BTC_QUAD) != 0)
|
||||
{
|
||||
/* This built-in has a pass-by-reference accumulator input, so load it
|
||||
into a temporary accumulator for use as a pass-by-value input. */
|
||||
op[0] = make_ssa_name (vector_quad_type_node);
|
||||
for (unsigned i = 1; i < nopnds; i++)
|
||||
op[i] = gimple_call_arg (stmt, i);
|
||||
gimplify_assign (op[0], mem, &new_seq);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This built-in does not use its pass-by-reference accumulator argument
|
||||
as an input argument, so remove it from the input list. */
|
||||
nopnds--;
|
||||
for (unsigned i = 0; i < nopnds; i++)
|
||||
op[i] = gimple_call_arg (stmt, i + 1);
|
||||
}
|
||||
|
||||
switch (nopnds)
|
||||
{
|
||||
case 0:
|
||||
new_call = gimple_build_call (new_decl, 0);
|
||||
break;
|
||||
case 1:
|
||||
new_call = gimple_build_call (new_decl, 1, op[0]);
|
||||
break;
|
||||
case 2:
|
||||
new_call = gimple_build_call (new_decl, 2, op[0], op[1]);
|
||||
break;
|
||||
case 3:
|
||||
new_call = gimple_build_call (new_decl, 3, op[0], op[1], op[2]);
|
||||
break;
|
||||
case 4:
|
||||
new_call = gimple_build_call (new_decl, 4, op[0], op[1], op[2], op[3]);
|
||||
break;
|
||||
case 5:
|
||||
new_call = gimple_build_call (new_decl, 5, op[0], op[1], op[2], op[3],
|
||||
op[4]);
|
||||
break;
|
||||
case 6:
|
||||
new_call = gimple_build_call (new_decl, 6, op[0], op[1], op[2], op[3],
|
||||
op[4], op[5]);
|
||||
break;
|
||||
case 7:
|
||||
new_call = gimple_build_call (new_decl, 7, op[0], op[1], op[2], op[3],
|
||||
op[4], op[5], op[6]);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
if (fncode == MMA_BUILTIN_ASSEMBLE_PAIR)
|
||||
lhs = make_ssa_name (vector_pair_type_node);
|
||||
else
|
||||
lhs = make_ssa_name (vector_quad_type_node);
|
||||
gimple_call_set_lhs (new_call, lhs);
|
||||
gimple_seq_add_stmt (&new_seq, new_call);
|
||||
gimplify_assign (mem, lhs, &new_seq);
|
||||
pop_gimplify_context (NULL);
|
||||
gsi_replace_with_seq (gsi, new_seq, true);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Fold a machine-dependent built-in in GIMPLE. (For folding into
|
||||
a constant, use rs6000_fold_builtin.) */
|
||||
|
||||
|
@ -10871,11 +11217,12 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
|||
return false;
|
||||
|
||||
/* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
|
||||
HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
|
||||
bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
|
||||
if (!func_valid_p)
|
||||
if (!rs6000_builtin_is_supported_p (fn_code))
|
||||
return false;
|
||||
|
||||
if (rs6000_gimple_fold_mma_builtin (gsi))
|
||||
return true;
|
||||
|
||||
switch (fn_code)
|
||||
{
|
||||
/* Flavors of vec_add. We deliberately don't expand
|
||||
|
@ -12010,6 +12357,13 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
|||
break;
|
||||
}
|
||||
|
||||
if (TARGET_MMA)
|
||||
{
|
||||
ret = mma_expand_builtin (exp, target, &success);
|
||||
|
||||
if (success)
|
||||
return ret;
|
||||
}
|
||||
if (TARGET_ALTIVEC)
|
||||
{
|
||||
ret = altivec_expand_builtin (exp, target, &success);
|
||||
|
@ -12025,7 +12379,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
|||
return ret;
|
||||
}
|
||||
|
||||
unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
|
||||
unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
|
||||
/* RS6000_BTC_SPECIAL represents no-operand operators. */
|
||||
gcc_assert (attr == RS6000_BTC_UNARY
|
||||
|| attr == RS6000_BTC_BINARY
|
||||
|
@ -12208,7 +12562,7 @@ rs6000_init_builtins (void)
|
|||
else
|
||||
ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
|
||||
|
||||
/* Vector paired and vector quad support. */
|
||||
/* Vector pair and vector quad support. */
|
||||
if (TARGET_MMA)
|
||||
{
|
||||
tree oi_uns_type = make_unsigned_type (256);
|
||||
|
@ -12290,6 +12644,8 @@ rs6000_init_builtins (void)
|
|||
the target attribute. */
|
||||
if (TARGET_EXTRA_BUILTINS)
|
||||
altivec_init_builtins ();
|
||||
if (TARGET_MMA)
|
||||
mma_init_builtins ();
|
||||
if (TARGET_HTM)
|
||||
htm_init_builtins ();
|
||||
|
||||
|
@ -13015,6 +13371,119 @@ altivec_init_builtins (void)
|
|||
|
||||
}
|
||||
|
||||
static void
|
||||
mma_init_builtins (void)
|
||||
{
|
||||
const struct builtin_description *d = bdesc_mma;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE (bdesc_mma); i++, d++)
|
||||
{
|
||||
tree op[MAX_MMA_OPERANDS], type;
|
||||
HOST_WIDE_INT mask = d->mask;
|
||||
unsigned icode = (unsigned) d->icode;
|
||||
unsigned attr = rs6000_builtin_info[d->code].attr;
|
||||
int attr_args = (attr & RS6000_BTC_OPND_MASK);
|
||||
bool gimple_func = (attr & RS6000_BTC_GIMPLE);
|
||||
unsigned nopnds = 0;
|
||||
|
||||
if ((mask & rs6000_builtin_mask) != mask)
|
||||
{
|
||||
if (TARGET_DEBUG_BUILTIN)
|
||||
fprintf (stderr, "mma_builtin, skip binary %s\n", d->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (d->name == 0)
|
||||
{
|
||||
if (TARGET_DEBUG_BUILTIN)
|
||||
fprintf (stderr, "mma_builtin, bdesc_mma[%ld] no name\n",
|
||||
(long unsigned) i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (gimple_func)
|
||||
{
|
||||
gcc_assert (icode == CODE_FOR_nothing);
|
||||
op[nopnds++] = void_type_node;
|
||||
/* Some MMA built-ins that are expanded into gimple are converted
|
||||
into internal MMA built-ins that are expanded into rtl.
|
||||
The internal built-in follows immediately after this built-in. */
|
||||
icode = d[1].icode;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((attr & RS6000_BTC_QUAD) == 0)
|
||||
attr_args--;
|
||||
|
||||
/* Ensure we have the correct number and type of operands. */
|
||||
gcc_assert (attr_args == insn_data[icode].n_operands - 1);
|
||||
}
|
||||
|
||||
if (icode == CODE_FOR_nothing)
|
||||
{
|
||||
/* This is a disassemble MMA built-in function. */
|
||||
gcc_assert (attr_args == RS6000_BTC_BINARY
|
||||
&& (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
|
||||
|| d->code == MMA_BUILTIN_DISASSEMBLE_PAIR));
|
||||
op[nopnds++] = build_pointer_type (void_type_node);
|
||||
if (attr & RS6000_BTC_QUAD)
|
||||
op[nopnds++] = build_pointer_type (vector_quad_type_node);
|
||||
else
|
||||
op[nopnds++] = build_pointer_type (vector_pair_type_node);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This is a normal MMA built-in function. */
|
||||
unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
|
||||
for (; j < insn_data[icode].n_operands; j++)
|
||||
{
|
||||
machine_mode mode = insn_data[icode].operand[j].mode;
|
||||
if (gimple_func && mode == PXImode)
|
||||
op[nopnds++] = build_pointer_type (vector_quad_type_node);
|
||||
else if (gimple_func && mode == POImode
|
||||
&& d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
|
||||
op[nopnds++] = build_pointer_type (vector_pair_type_node);
|
||||
else
|
||||
/* MMA uses unsigned types. */
|
||||
op[nopnds++] = builtin_mode_to_type[mode][1];
|
||||
}
|
||||
}
|
||||
|
||||
switch (nopnds)
|
||||
{
|
||||
case 1:
|
||||
type = build_function_type_list (op[0], NULL_TREE);
|
||||
break;
|
||||
case 2:
|
||||
type = build_function_type_list (op[0], op[1], NULL_TREE);
|
||||
break;
|
||||
case 3:
|
||||
type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
|
||||
break;
|
||||
case 4:
|
||||
type = build_function_type_list (op[0], op[1], op[2], op[3],
|
||||
NULL_TREE);
|
||||
break;
|
||||
case 5:
|
||||
type = build_function_type_list (op[0], op[1], op[2], op[3], op[4],
|
||||
NULL_TREE);
|
||||
break;
|
||||
case 6:
|
||||
type = build_function_type_list (op[0], op[1], op[2], op[3], op[4],
|
||||
op[5], NULL_TREE);
|
||||
break;
|
||||
case 7:
|
||||
type = build_function_type_list (op[0], op[1], op[2], op[3], op[4],
|
||||
op[5], op[6], NULL_TREE);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
def_builtin (d->name, type, d->code);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
htm_init_builtins (void)
|
||||
{
|
||||
|
@ -13029,7 +13498,7 @@ htm_init_builtins (void)
|
|||
HOST_WIDE_INT mask = d->mask;
|
||||
unsigned attr = rs6000_builtin_info[d->code].attr;
|
||||
bool void_func = (attr & RS6000_BTC_VOID);
|
||||
int attr_args = (attr & RS6000_BTC_TYPE_MASK);
|
||||
int attr_args = (attr & RS6000_BTC_OPND_MASK);
|
||||
int nopnds = 0;
|
||||
tree gpr_type_node;
|
||||
tree rettype;
|
||||
|
@ -13195,6 +13664,8 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
|
|||
case P8V_BUILTIN_VGBBD:
|
||||
case MISC_BUILTIN_CDTBCD:
|
||||
case MISC_BUILTIN_CBCDTD:
|
||||
case VSX_BUILTIN_XVCVSPBF16:
|
||||
case VSX_BUILTIN_XVCVBF16SP:
|
||||
h.uns_p[0] = 1;
|
||||
h.uns_p[1] = 1;
|
||||
break;
|
||||
|
|
|
@ -9941,7 +9941,7 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
|
|||
|
||||
case E_POImode:
|
||||
case E_PXImode:
|
||||
if (CONSTANT_P (operands[1]))
|
||||
if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
|
||||
error ("%qs is an opaque type, and you can't set it to other values.",
|
||||
(mode == POImode) ? "__vector_pair" : "__vector_quad");
|
||||
break;
|
||||
|
@ -12853,6 +12853,14 @@ print_operand (FILE *file, rtx x, int code)
|
|||
/* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
|
||||
output_operand. */
|
||||
|
||||
case 'A':
|
||||
/* Write the MMA accumulator number associated with VSX register X. */
|
||||
if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
|
||||
output_operand_lossage ("invalid %%A value");
|
||||
else
|
||||
fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
|
||||
return;
|
||||
|
||||
case 'D':
|
||||
/* Like 'J' but get to the GT bit only. */
|
||||
if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
|
||||
|
@ -15963,6 +15971,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
|||
unsigned offset = 0;
|
||||
unsigned size = GET_MODE_SIZE (reg_mode);
|
||||
|
||||
/* If we are reading an accumulator register, we have to
|
||||
deprime it before we can access it. */
|
||||
if (TARGET_MMA
|
||||
&& GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
|
||||
emit_insn (gen_mma_xxmfacc (src, src));
|
||||
|
||||
for (int i = 0; i < nregs; i++)
|
||||
{
|
||||
unsigned subreg = (WORDS_BIG_ENDIAN)
|
||||
|
@ -15991,6 +16005,32 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
|||
emit_insn (gen_rtx_SET (dst2, src2));
|
||||
}
|
||||
|
||||
/* If we are writing an accumulator register, we have to
|
||||
prime it after we've written it. */
|
||||
if (TARGET_MMA
|
||||
&& GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
|
||||
emit_insn (gen_mma_xxmtacc (dst, dst));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (GET_CODE (src) == UNSPEC)
|
||||
{
|
||||
gcc_assert (REG_P (dst)
|
||||
&& FP_REGNO_P (REGNO (dst))
|
||||
&& XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
|
||||
|
||||
reg_mode = GET_MODE (XVECEXP (src, 0, 0));
|
||||
for (int i = 0; i < XVECLEN (src, 0); i++)
|
||||
{
|
||||
rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
|
||||
emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
|
||||
}
|
||||
|
||||
/* We are writing an accumulator register, so we have to
|
||||
prime it after we've written it. */
|
||||
emit_insn (gen_mma_xxmtacc (dst, dst));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -15999,6 +16039,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
|||
|
||||
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
|
||||
{
|
||||
/* If we are reading an accumulator register, we have to
|
||||
deprime it before we can access it. */
|
||||
if (TARGET_MMA
|
||||
&& GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
|
||||
emit_insn (gen_mma_xxmfacc (src, src));
|
||||
|
||||
/* Move register range backwards, if we might have destructive
|
||||
overlap. */
|
||||
int i;
|
||||
|
@ -16007,6 +16053,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
|||
i * reg_mode_size),
|
||||
simplify_gen_subreg (reg_mode, src, mode,
|
||||
i * reg_mode_size)));
|
||||
|
||||
/* If we are writing an accumulator register, we have to
|
||||
prime it after we've written it. */
|
||||
if (TARGET_MMA
|
||||
&& GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
|
||||
emit_insn (gen_mma_xxmtacc (dst, dst));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -16139,6 +16191,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
|||
gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
|
||||
}
|
||||
|
||||
/* If we are reading an accumulator register, we have to
|
||||
deprime it before we can access it. */
|
||||
if (TARGET_MMA && REG_P (src)
|
||||
&& GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
|
||||
emit_insn (gen_mma_xxmfacc (src, src));
|
||||
|
||||
for (i = 0; i < nregs; i++)
|
||||
{
|
||||
/* Calculate index to next subword. */
|
||||
|
@ -16156,6 +16214,13 @@ rs6000_split_multireg_move (rtx dst, rtx src)
|
|||
simplify_gen_subreg (reg_mode, src, mode,
|
||||
j * reg_mode_size)));
|
||||
}
|
||||
|
||||
/* If we are writing an accumulator register, we have to
|
||||
prime it after we've written it. */
|
||||
if (TARGET_MMA && REG_P (dst)
|
||||
&& GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
|
||||
emit_insn (gen_mma_xxmtacc (dst, dst));
|
||||
|
||||
if (restore_basereg != NULL_RTX)
|
||||
emit_insn (restore_basereg);
|
||||
}
|
||||
|
|
|
@ -2251,20 +2251,24 @@ extern int frame_pointer_needed;
|
|||
flags macros, but we've run out of bits, so we now map the options into new
|
||||
settings used here. */
|
||||
|
||||
/* Builtin attributes. */
|
||||
#define RS6000_BTC_SPECIAL 0x00000000 /* Special function. */
|
||||
/* Builtin operand count. */
|
||||
#define RS6000_BTC_UNARY 0x00000001 /* normal unary function. */
|
||||
#define RS6000_BTC_BINARY 0x00000002 /* normal binary function. */
|
||||
#define RS6000_BTC_TERNARY 0x00000003 /* normal ternary function. */
|
||||
#define RS6000_BTC_QUATERNARY 0x00000004 /* normal quaternary
|
||||
function. */
|
||||
#define RS6000_BTC_QUINARY 0x00000005 /* normal quinary function. */
|
||||
#define RS6000_BTC_SENARY 0x00000006 /* normal senary function. */
|
||||
#define RS6000_BTC_OPND_MASK 0x00000007 /* Mask to isolate operands. */
|
||||
|
||||
#define RS6000_BTC_PREDICATE 0x00000005 /* predicate function. */
|
||||
#define RS6000_BTC_ABS 0x00000006 /* Altivec/VSX ABS
|
||||
/* Builtin attributes. */
|
||||
#define RS6000_BTC_SPECIAL 0x00000000 /* Special function. */
|
||||
#define RS6000_BTC_PREDICATE 0x00000008 /* predicate function. */
|
||||
#define RS6000_BTC_ABS 0x00000010 /* Altivec/VSX ABS
|
||||
function. */
|
||||
#define RS6000_BTC_DST 0x00000007 /* Altivec DST function. */
|
||||
#define RS6000_BTC_DST 0x00000020 /* Altivec DST function. */
|
||||
|
||||
#define RS6000_BTC_TYPE_MASK 0x0000000f /* Mask to isolate types */
|
||||
#define RS6000_BTC_TYPE_MASK 0x0000003f /* Mask to isolate types */
|
||||
|
||||
#define RS6000_BTC_MISC 0x00000000 /* No special attributes. */
|
||||
#define RS6000_BTC_CONST 0x00000100 /* Neither uses, nor
|
||||
|
@ -2273,13 +2277,18 @@ extern int frame_pointer_needed;
|
|||
state/mem and does
|
||||
not modify global state. */
|
||||
#define RS6000_BTC_FP 0x00000400 /* depends on rounding mode. */
|
||||
#define RS6000_BTC_ATTR_MASK 0x00000700 /* Mask of the attributes. */
|
||||
#define RS6000_BTC_QUAD 0x00000800 /* Uses a register quad. */
|
||||
#define RS6000_BTC_PAIR 0x00001000 /* Uses a register pair. */
|
||||
#define RS6000_BTC_QUADPAIR 0x00001800 /* Uses a quad and a pair. */
|
||||
#define RS6000_BTC_ATTR_MASK 0x00001f00 /* Mask of the attributes. */
|
||||
|
||||
/* Miscellaneous information. */
|
||||
#define RS6000_BTC_SPR 0x01000000 /* function references SPRs. */
|
||||
#define RS6000_BTC_VOID 0x02000000 /* function has no return value. */
|
||||
#define RS6000_BTC_CR 0x04000000 /* function references a CR. */
|
||||
#define RS6000_BTC_OVERLOADED 0x08000000 /* function is overloaded. */
|
||||
#define RS6000_BTC_GIMPLE 0x10000000 /* function should be expanded
|
||||
into gimple. */
|
||||
#define RS6000_BTC_MISC_MASK 0x1f000000 /* Mask of the misc info. */
|
||||
|
||||
/* Convenience macros to document the instruction type. */
|
||||
|
@ -2348,6 +2357,7 @@ extern int frame_pointer_needed;
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
@ -2359,6 +2369,7 @@ extern int frame_pointer_needed;
|
|||
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
|
||||
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
|
||||
#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
|
||||
#define RS6000_BUILTIN_M(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
|
||||
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
|
||||
#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
|
||||
|
||||
|
@ -2377,6 +2388,7 @@ enum rs6000_builtins
|
|||
#undef RS6000_BUILTIN_A
|
||||
#undef RS6000_BUILTIN_D
|
||||
#undef RS6000_BUILTIN_H
|
||||
#undef RS6000_BUILTIN_M
|
||||
#undef RS6000_BUILTIN_P
|
||||
#undef RS6000_BUILTIN_X
|
||||
|
||||
|
|
|
@ -203,7 +203,7 @@
|
|||
vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,
|
||||
vecfloat,vecfdiv,vecdouble,mffgpr,mftgpr,crypto,
|
||||
veclogical,veccmpfx,vecexts,vecmove,
|
||||
htm,htmsimple,dfp"
|
||||
htm,htmsimple,dfp,mma"
|
||||
(const_string "integer"))
|
||||
|
||||
;; What data size does this instruction work on?
|
||||
|
|
|
@ -296,6 +296,8 @@
|
|||
UNSPEC_VSX_DIVUD
|
||||
UNSPEC_VSX_MULSD
|
||||
UNSPEC_VSX_SIGN_EXTEND
|
||||
UNSPEC_VSX_XVCVBF16SP
|
||||
UNSPEC_VSX_XVCVSPBF16
|
||||
UNSPEC_VSX_XVCVSPSXDS
|
||||
UNSPEC_VSX_VSLO
|
||||
UNSPEC_VSX_EXTRACT
|
||||
|
@ -346,6 +348,12 @@
|
|||
UNSPEC_XXGENPCV
|
||||
])
|
||||
|
||||
(define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
|
||||
UNSPEC_VSX_XVCVBF16SP])
|
||||
|
||||
(define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
|
||||
(UNSPEC_VSX_XVCVBF16SP "xvcvbf16sp")])
|
||||
|
||||
;; VSX moves
|
||||
|
||||
;; The patterns for LE permuted loads and stores come before the general
|
||||
|
@ -5676,3 +5684,10 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "vsx_<xvcvbf16>"
|
||||
[(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
|
||||
XVCVBF16))]
|
||||
"TARGET_FUTURE"
|
||||
"<xvcvbf16> %x0,%x1"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
|
|
@ -13858,6 +13858,7 @@ instructions, but allow the compiler to schedule those calls.
|
|||
* PowerPC AltiVec/VSX Built-in Functions::
|
||||
* PowerPC Hardware Transactional Memory Built-in Functions::
|
||||
* PowerPC Atomic Memory Operation Functions::
|
||||
* PowerPC Matrix-Multiply Assist Built-in Functions::
|
||||
* RX Built-in Functions::
|
||||
* S/390 System z Built-in Functions::
|
||||
* SH Built-in Functions::
|
||||
|
@ -21359,6 +21360,100 @@ void amo_stdat_smax (int64_t *, int64_t);
|
|||
void amo_stdat_smin (int64_t *, int64_t);
|
||||
@end smallexample
|
||||
|
||||
@node PowerPC Matrix-Multiply Assist Built-in Functions
|
||||
@subsection PowerPC Matrix-Multiply Assist Built-in Functions
|
||||
ISA 3.1 of the PowerPC added new Matrix-Multiply Assist (MMA) instructions.
|
||||
GCC provides support for these instructions through the following built-in
|
||||
functions which are enabled with the @code{-mmma} option. The vec_t type
|
||||
below is defined to be a normal vector unsigned char type. The uint2, uint4
|
||||
and uint8 parameters are 2-bit, 4-bit and 8-bit unsigned integer constants
|
||||
respectively. The compiler will verify that they are constants and that
|
||||
their values are within range.
|
||||
|
||||
The built-in functions supported are:
|
||||
|
||||
@smallexample
|
||||
void __builtin_mma_xvi4ger8 (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvi8ger4 (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvi16ger2 (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvi16ger2s (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf16ger2 (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvbf16ger2 (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf32ger (__vector_quad *, vec_t, vec_t);
|
||||
|
||||
void __builtin_mma_xvi4ger8pp (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvi8ger4pp (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvi8ger4spp(__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvi16ger2pp (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvi16ger2spp (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf16ger2pp (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf16ger2pn (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf16ger2np (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf16ger2nn (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvbf16ger2pp (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvbf16ger2pn (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvbf16ger2np (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvbf16ger2nn (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf32gerpp (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf32gerpn (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf32gernp (__vector_quad *, vec_t, vec_t);
|
||||
void __builtin_mma_xvf32gernn (__vector_quad *, vec_t, vec_t);
|
||||
|
||||
void __builtin_mma_pmxvi4ger8 (__vector_quad *, vec_t, vec_t, uint4, uint4, uint8);
|
||||
void __builtin_mma_pmxvi4ger8pp (__vector_quad *, vec_t, vec_t, uint4, uint4, uint8);
|
||||
|
||||
void __builtin_mma_pmxvi8ger4 (__vector_quad *, vec_t, vec_t, uint4, uint4, uint4);
|
||||
void __builtin_mma_pmxvi8ger4pp (__vector_quad *, vec_t, vec_t, uint4, uint4, uint4);
|
||||
void __builtin_mma_pmxvi8ger4spp(__vector_quad *, vec_t, vec_t, uint4, uint4, uint4);
|
||||
|
||||
void __builtin_mma_pmxvi16ger2 (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvi16ger2s (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvf16ger2 (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvbf16ger2 (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
|
||||
void __builtin_mma_pmxvi16ger2pp (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvi16ger2spp (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvf16ger2pp (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvf16ger2pn (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvf16ger2np (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvf16ger2nn (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvbf16ger2pp (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvbf16ger2pn (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvbf16ger2np (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
void __builtin_mma_pmxvbf16ger2nn (__vector_quad *, vec_t, vec_t, uint4, uint4, uint2);
|
||||
|
||||
void __builtin_mma_pmxvf32ger (__vector_quad *, vec_t, vec_t, uint4, uint4);
|
||||
void __builtin_mma_pmxvf32gerpp (__vector_quad *, vec_t, vec_t, uint4, uint4);
|
||||
void __builtin_mma_pmxvf32gerpn (__vector_quad *, vec_t, vec_t, uint4, uint4);
|
||||
void __builtin_mma_pmxvf32gernp (__vector_quad *, vec_t, vec_t, uint4, uint4);
|
||||
void __builtin_mma_pmxvf32gernn (__vector_quad *, vec_t, vec_t, uint4, uint4);
|
||||
|
||||
void __builtin_mma_xvf64ger (__vector_quad *, __vector_pair, vec_t);
|
||||
void __builtin_mma_xvf64gerpp (__vector_quad *, __vector_pair, vec_t);
|
||||
void __builtin_mma_xvf64gerpn (__vector_quad *, __vector_pair, vec_t);
|
||||
void __builtin_mma_xvf64gernp (__vector_quad *, __vector_pair, vec_t);
|
||||
void __builtin_mma_xvf64gernn (__vector_quad *, __vector_pair, vec_t);
|
||||
|
||||
void __builtin_mma_pmxvf64ger (__vector_quad *, __vector_pair, vec_t, uint4, uint2);
|
||||
void __builtin_mma_pmxvf64gerpp (__vector_quad *, __vector_pair, vec_t, uint4, uint2);
|
||||
void __builtin_mma_pmxvf64gerpn (__vector_quad *, __vector_pair, vec_t, uint4, uint2);
|
||||
void __builtin_mma_pmxvf64gernp (__vector_quad *, __vector_pair, vec_t, uint4, uint2);
|
||||
void __builtin_mma_pmxvf64gernn (__vector_quad *, __vector_pair, vec_t, uint4, uint2);
|
||||
|
||||
void __builtin_mma_xxmtacc (__vector_quad *);
|
||||
void __builtin_mma_xxmfacc (__vector_quad *);
|
||||
void __builtin_mma_xxsetaccz (__vector_quad *);
|
||||
|
||||
void __builtin_mma_assemble_acc (__vector_quad *, vec_t, vec_t, vec_t, vec_t);
|
||||
void __builtin_mma_disassemble_acc (void *, __vector_quad *);
|
||||
|
||||
void __builtin_mma_assemble_pair (__vector_pair *, vec_t, vec_t);
|
||||
void __builtin_mma_disassemble_pair (void *, __vector_pair *);
|
||||
|
||||
vec_t __builtin_vsx_xvcvspbf16 (vec_t);
|
||||
vec_t __builtin_vsx_xvcvbf16sp (vec_t);
|
||||
@end smallexample
|
||||
|
||||
@node RX Built-in Functions
|
||||
@subsection RX Built-in Functions
|
||||
GCC supports some of the RX instructions which cannot be expressed in
|
||||
|
|
313
gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c
Normal file
313
gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c
Normal file
|
@ -0,0 +1,313 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_future_ok } */
|
||||
/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
|
||||
|
||||
typedef unsigned char vec_t __attribute__((vector_size(16)));
|
||||
|
||||
void
|
||||
foo0 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_xvi4ger8 (&acc, vec0, vec1);
|
||||
__builtin_mma_xvi4ger8pp (&acc, vec0, vec1);
|
||||
dst[0] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo1 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_xvi8ger4 (&acc, vec0, vec1);
|
||||
__builtin_mma_xvi8ger4pp (&acc, vec0, vec1);
|
||||
__builtin_mma_xvi8ger4spp(&acc, vec0, vec1);
|
||||
dst[1] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo2 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_xvi16ger2 (&acc, vec0, vec1);
|
||||
__builtin_mma_xvi16ger2pp (&acc, vec0, vec1);
|
||||
dst[2] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo3 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_xvi16ger2s (&acc, vec0, vec1);
|
||||
__builtin_mma_xvi16ger2spp (&acc, vec0, vec1);
|
||||
dst[3] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo4 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_xvf16ger2 (&acc, vec0, vec1);
|
||||
__builtin_mma_xvf16ger2pp (&acc, vec0, vec1);
|
||||
__builtin_mma_xvf16ger2pn (&acc, vec0, vec1);
|
||||
dst[4] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo4b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
acc = src[0];
|
||||
__builtin_mma_xvf16ger2np (&acc, vec0, vec1);
|
||||
__builtin_mma_xvf16ger2nn (&acc, vec0, vec1);
|
||||
dst[4] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo5 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_xvbf16ger2 (&acc, vec0, vec1);
|
||||
__builtin_mma_xvbf16ger2pp (&acc, vec0, vec1);
|
||||
__builtin_mma_xvbf16ger2pn (&acc, vec0, vec1);
|
||||
dst[5] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo5b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
acc = src[0];
|
||||
__builtin_mma_xvbf16ger2np (&acc, vec0, vec1);
|
||||
__builtin_mma_xvbf16ger2nn (&acc, vec0, vec1);
|
||||
dst[5] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo6 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_xvf32ger (&acc, vec0, vec1);
|
||||
__builtin_mma_xvf32gerpp (&acc, vec0, vec1);
|
||||
__builtin_mma_xvf32gerpn (&acc, vec0, vec1);
|
||||
dst[6] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo6b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
acc = src[0];
|
||||
__builtin_mma_xvf32gernp (&acc, vec0, vec1);
|
||||
__builtin_mma_xvf32gernn (&acc, vec0, vec1);
|
||||
dst[6] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo7 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_pmxvi4ger8 (&acc, vec0, vec1, 15, 15, 255);
|
||||
__builtin_mma_pmxvi4ger8pp (&acc, vec0, vec1, 15, 15, 255);
|
||||
dst[7] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo8 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_pmxvi8ger4 (&acc, vec0, vec1, 15, 15, 15);
|
||||
__builtin_mma_pmxvi8ger4pp (&acc, vec0, vec1, 15, 15, 15);
|
||||
__builtin_mma_pmxvi8ger4spp(&acc, vec0, vec1, 15, 15, 15);
|
||||
dst[8] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo9 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_pmxvi16ger2 (&acc, vec0, vec1, 15, 15, 3);
|
||||
__builtin_mma_pmxvi16ger2pp (&acc, vec0, vec1, 15, 15, 3);
|
||||
dst[9] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo10 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_pmxvi16ger2s (&acc, vec0, vec1, 15, 15, 3);
|
||||
__builtin_mma_pmxvi16ger2spp (&acc, vec0, vec1, 15, 15, 3);
|
||||
dst[10] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo11 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_pmxvf16ger2 (&acc, vec0, vec1, 15, 15, 3);
|
||||
__builtin_mma_pmxvf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
|
||||
__builtin_mma_pmxvf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
|
||||
dst[11] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo11b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
acc = src[0];
|
||||
__builtin_mma_pmxvf16ger2np (&acc, vec0, vec1, 15, 15, 3);
|
||||
__builtin_mma_pmxvf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
|
||||
dst[11] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo12 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_pmxvbf16ger2 (&acc, vec0, vec1, 15, 15, 3);
|
||||
__builtin_mma_pmxvbf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
|
||||
__builtin_mma_pmxvbf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
|
||||
dst[12] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo12b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
acc = src[0];
|
||||
__builtin_mma_pmxvbf16ger2np (&acc, vec0, vec1, 15, 15, 3);
|
||||
__builtin_mma_pmxvbf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
|
||||
dst[12] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo13 (__vector_quad *dst, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_pmxvf32ger (&acc, vec0, vec1, 15, 15);
|
||||
__builtin_mma_pmxvf32gerpp (&acc, vec0, vec1, 15, 15);
|
||||
__builtin_mma_pmxvf32gerpn (&acc, vec0, vec1, 15, 15);
|
||||
dst[13] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo13b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
|
||||
{
|
||||
__vector_quad acc;
|
||||
vec_t vec0 = vec[0];
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
acc = src[0];
|
||||
__builtin_mma_pmxvf32gernp (&acc, vec0, vec1, 15, 15);
|
||||
__builtin_mma_pmxvf32gernn (&acc, vec0, vec1, 15, 15);
|
||||
dst[13] = acc;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvbf16ger2nn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvbf16ger2np\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvbf16ger2pn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvbf16ger2pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf16ger2\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf16ger2nn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf16ger2np\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf16ger2pn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf16ger2pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf32ger\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf32gernn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf32gernp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf32gerpn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf32gerpp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi16ger2\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi16ger2pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi16ger2s\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi16ger2spp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi4ger8\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi4ger8pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi8ger4\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi8ger4pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvi8ger4spp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2nn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2np\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2pn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf16ger2\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf16ger2nn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf16ger2np\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf16ger2pn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf16ger2pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf32ger\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf32gernn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf32gernp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf32gerpn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf32gerpp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi16ger2\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi16ger2pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi16ger2s\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi16ger2spp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi4ger8\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi4ger8pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi8ger4\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi8ger4pp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvi8ger4spp\M} 1 } } */
|
72
gcc/testsuite/gcc.target/powerpc/mma-builtin-2.c
Normal file
72
gcc/testsuite/gcc.target/powerpc/mma-builtin-2.c
Normal file
|
@ -0,0 +1,72 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_future_ok } */
|
||||
/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
|
||||
|
||||
typedef unsigned char vec_t __attribute__((vector_size(16)));
|
||||
|
||||
void
|
||||
foo0 (__vector_quad *dst, vec_t *vec, __vector_pair *pvecp)
|
||||
{
|
||||
__vector_quad acc;
|
||||
__vector_pair vecp0 = *pvecp;
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
__builtin_mma_xvf64ger (&acc, vecp0, vec1);
|
||||
__builtin_mma_xvf64gerpp (&acc, vecp0, vec1);
|
||||
__builtin_mma_xvf64gerpn (&acc, vecp0, vec1);
|
||||
dst[0] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo1 (__vector_quad *dst, __vector_quad *src, vec_t *vec, __vector_pair *pvecp)
|
||||
{
|
||||
__vector_quad acc;
|
||||
__vector_pair vecp0 = *pvecp;
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
acc = src[0];
|
||||
__builtin_mma_xvf64gernp (&acc, vecp0, vec1);
|
||||
__builtin_mma_xvf64gernn (&acc, vecp0, vec1);
|
||||
dst[0] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo2 (__vector_quad *dst, vec_t *vec, __vector_pair *pvecp)
|
||||
{
|
||||
__vector_quad acc;
|
||||
__vector_pair vecp0 = *pvecp;
|
||||
vec_t vec1 = vec[1];
|
||||
__builtin_mma_pmxvf64ger (&acc, vecp0, vec1, 15, 3);
|
||||
__builtin_mma_pmxvf64gerpp (&acc, vecp0, vec1, 15, 3);
|
||||
__builtin_mma_pmxvf64gerpn (&acc, vecp0, vec1, 15, 3);
|
||||
dst[1] = acc;
|
||||
}
|
||||
|
||||
void
|
||||
foo3 (__vector_quad *dst, __vector_quad *src, vec_t *vec, __vector_pair *pvecp)
|
||||
{
|
||||
__vector_quad acc;
|
||||
__vector_pair vecp0 = *pvecp;
|
||||
vec_t vec1 = vec[1];
|
||||
|
||||
acc = src[0];
|
||||
__builtin_mma_pmxvf64gernp (&acc, vecp0, vec1, 15, 3);
|
||||
__builtin_mma_pmxvf64gernn (&acc, vecp0, vec1, 15, 3);
|
||||
dst[1] = acc;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxvp\M} 8 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf64ger\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf64gerpp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf64gerpn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf64gernp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvf64gernn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf64ger\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf64gerpp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf64gerpn\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf64gernp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mpmxvf64gernn\M} 1 } } */
|
31
gcc/testsuite/gcc.target/powerpc/mma-builtin-3.c
Normal file
31
gcc/testsuite/gcc.target/powerpc/mma-builtin-3.c
Normal file
|
@ -0,0 +1,31 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_future_ok } */
|
||||
/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
|
||||
|
||||
void
|
||||
foo0 (void)
|
||||
{
|
||||
__vector_quad acc;
|
||||
asm ("#..." : "=d" (acc));
|
||||
__builtin_mma_xxmtacc (&acc);
|
||||
__builtin_mma_xxmfacc (&acc);
|
||||
asm ("#..." :: "d" (acc));
|
||||
}
|
||||
|
||||
typedef unsigned char vec_t __attribute__((vector_size(16)));
|
||||
|
||||
void
|
||||
foo1 (vec_t *vec)
|
||||
{
|
||||
vec[1] = __builtin_vsx_xvcvspbf16 (vec[0]);
|
||||
vec[3] = __builtin_vsx_xvcvbf16sp (vec[2]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxv\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-not {\mlxvp\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mstxvp\M} } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvcvspbf16\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxvcvbf16sp\M} 1 } } */
|
28
gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c
Normal file
28
gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_future_ok } */
|
||||
/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
|
||||
|
||||
typedef unsigned char vec_t __attribute__((vector_size(16)));
|
||||
|
||||
void
|
||||
foo (__vector_pair *dst, vec_t *src)
|
||||
{
|
||||
__vector_pair pair;
|
||||
__builtin_mma_assemble_pair (&pair, src[0], src[4]);
|
||||
*dst = pair;
|
||||
}
|
||||
|
||||
void
|
||||
bar (vec_t *dst, __vector_pair *src)
|
||||
{
|
||||
vec_t res[2];
|
||||
__builtin_mma_disassemble_pair (res, src);
|
||||
dst[0] = res[0];
|
||||
dst[4] = res[1];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxv\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */
|
||||
|
31
gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c
Normal file
31
gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c
Normal file
|
@ -0,0 +1,31 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_future_ok } */
|
||||
/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
|
||||
|
||||
typedef unsigned char vec_t __attribute__((vector_size(16)));
|
||||
|
||||
void
|
||||
foo (__vector_quad *dst, vec_t *src)
|
||||
{
|
||||
__vector_quad acc;
|
||||
__builtin_mma_assemble_acc (&acc, src[0], src[4], src[8], src[12]);
|
||||
*dst = acc;
|
||||
}
|
||||
|
||||
void
|
||||
bar (vec_t *dst, __vector_quad *src)
|
||||
{
|
||||
vec_t res[4];
|
||||
__builtin_mma_disassemble_acc (res, src);
|
||||
dst[0] = res[0];
|
||||
dst[4] = res[1];
|
||||
dst[8] = res[2];
|
||||
dst[12] = res[3];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
|
18
gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c
Normal file
18
gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target powerpc_future_ok } */
|
||||
/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
|
||||
|
||||
void
|
||||
foo (__vector_quad *dst)
|
||||
{
|
||||
__vector_quad acc;
|
||||
__builtin_mma_xxsetaccz (&acc);
|
||||
*dst = acc;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {\mlxv\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mlxvp\M} } } */
|
||||
/* { dg-final { scan-assembler-not {\mxxmtacc\M} } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
|
Loading…
Add table
Reference in a new issue