x86: also optimize KXOR{D,Q} and KANDN{D,Q}
These can be converted to 2-byte VEX encoding when both source registers are the same, by using KXORW / KANDNW as replacement.
This commit is contained in:
parent
ae2387feae
commit
1424ad8677
9 changed files with 56 additions and 9 deletions
|
@ -1,3 +1,12 @@
|
|||
2018-07-31 Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
* config/tc-i386.c (optimize_encoding): Also handle kandnd,
|
||||
kandnq, kxord, and kxorq.
|
||||
* testsuite/gas/i386/optimize-1.s: Add kandn and kxor tests.
|
||||
* testsuite/gas/i386/optimize-1.d,
|
||||
testsuite/gas/i386/optimize-4.d,
|
||||
testsuite/gas/i386/optimize-5.d: Adjust expectations.
|
||||
|
||||
2018-07-31 Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
* config/tc-i386.c (check_VecOperands): Convert masking handling
|
||||
|
|
|
@ -3942,7 +3942,11 @@ optimize_encoding (void)
|
|||
|| i.tm.base_opcode == 0x66f8
|
||||
|| i.tm.base_opcode == 0x66f9
|
||||
|| i.tm.base_opcode == 0x66fa
|
||||
|| i.tm.base_opcode == 0x66fb)
|
||||
|| i.tm.base_opcode == 0x66fb
|
||||
|| i.tm.base_opcode == 0x42
|
||||
|| i.tm.base_opcode == 0x6642
|
||||
|| i.tm.base_opcode == 0x47
|
||||
|| i.tm.base_opcode == 0x6647)
|
||||
&& i.tm.extension_opcode == None))
|
||||
{
|
||||
/* Optimize: -O2:
|
||||
|
@ -3973,6 +3977,12 @@ optimize_encoding (void)
|
|||
EVEX VOP %ymmM, %ymmM, %ymmN
|
||||
-> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
|
||||
-> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16)
|
||||
VOP, one of kxord and kxorq:
|
||||
VEX VOP %kM, %kM, %kN
|
||||
-> VEX kxorw %kM, %kM, %kN
|
||||
VOP, one of kandnd and kandnq:
|
||||
VEX VOP %kM, %kM, %kN
|
||||
-> VEX kandnw %kM, %kM, %kN
|
||||
*/
|
||||
if (is_evex_encoding (&i.tm))
|
||||
{
|
||||
|
@ -3985,6 +3995,11 @@ optimize_encoding (void)
|
|||
i.tm.opcode_modifier.evex = 0;
|
||||
}
|
||||
}
|
||||
else if (i.tm.operand_types[0].bitfield.regmask)
|
||||
{
|
||||
i.tm.base_opcode &= 0xff;
|
||||
i.tm.opcode_modifier.vexw = VEXW0;
|
||||
}
|
||||
else
|
||||
i.tm.opcode_modifier.vex = VEX128;
|
||||
|
||||
|
|
|
@ -58,4 +58,8 @@ Disassembly of section .text:
|
|||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5
|
||||
#pass
|
||||
|
|
|
@ -66,3 +66,9 @@ _start:
|
|||
vpsubq %ymm1, %ymm1, %ymm5{z}{%k7}
|
||||
vpsubq %zmm1, %zmm1, %zmm5
|
||||
vpsubq %ymm1, %ymm1, %ymm5
|
||||
|
||||
kxord %k1, %k1, %k5
|
||||
kxorq %k1, %k1, %k5
|
||||
|
||||
kandnd %k1, %k1, %k5
|
||||
kandnq %k1, %k1, %k5
|
||||
|
|
|
@ -58,6 +58,10 @@ Disassembly of section .text:
|
|||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5
|
||||
+[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5
|
||||
#pass
|
||||
|
|
|
@ -58,6 +58,10 @@ Disassembly of section .text:
|
|||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f1 fb e9 vpsubq %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 47 e9 kxorw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5
|
||||
+[a-f0-9]+: c5 f4 42 e9 kandnw %k1,%k1,%k5
|
||||
+[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5
|
||||
+[a-f0-9]+: 62 f1 f5 08 55 e9 vandnpd %xmm1,%xmm1,%xmm5
|
||||
#pass
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2018-07-31 Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
* i386-opc.tbl (kandnd, kandnq, kxord, kxorq): Add Optimize.
|
||||
* i386-init.h, i386-tbl.h: Re-generate.
|
||||
|
||||
2018-07-31 Jan Beulich <jbeulich@suse.com>
|
||||
|
||||
* i386-opc.h (ZEROING_MASKING) Rename to ...
|
||||
|
|
|
@ -4211,7 +4211,7 @@ vpmovzxwq, 2, 0x6634, None, 1, CpuAVX512F|CpuAVX512VL, Modrm|EVex=3|Masking=3|Ve
|
|||
|
||||
kaddd, 3, 0x664A, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kandd, 3, 0x6641, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kandnd, 3, 0x6642, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kandnd, 3, 0x6642, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegMask, RegMask, RegMask }
|
||||
kmovd, 2, 0x6690, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask|Dword|Unspecified|BaseIndex, RegMask }
|
||||
kmovd, 2, 0x6691, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, Dword|Unspecified|BaseIndex }
|
||||
knotd, 2, 0x6644, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask }
|
||||
|
@ -4219,10 +4219,10 @@ kord, 3, 0x6645, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|
|
|||
kortestd, 2, 0x6698, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask }
|
||||
ktestd, 2, 0x6699, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask }
|
||||
kxnord, 3, 0x6646, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kxord, 3, 0x6647, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kxord, 3, 0x6647, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegMask, RegMask, RegMask }
|
||||
|
||||
kaddq, 3, 0x4A, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kandnq, 3, 0x42, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kandnq, 3, 0x42, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegMask, RegMask, RegMask }
|
||||
kandq, 3, 0x41, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kmovq, 2, 0x90, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask|Qword|Unspecified|BaseIndex, RegMask }
|
||||
kmovq, 2, 0x91, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, Qword|Unspecified|BaseIndex }
|
||||
|
@ -4233,7 +4233,7 @@ ktestq, 2, 0x99, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=2|IgnoreSize
|
|||
kunpckdq, 3, 0x4B, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kunpckwd, 3, 0x4B, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kxnorq, 3, 0x46, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kxorq, 3, 0x47, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, RegMask, RegMask }
|
||||
kxorq, 3, 0x47, None, 1, CpuAVX512BW, Modrm|Vex=2|VexOpcode=0|VexVVVV=1|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Optimize, { RegMask, RegMask, RegMask }
|
||||
|
||||
kmovd, 2, 0xF292, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32, RegMask }
|
||||
kmovd, 2, 0xF293, None, 1, CpuAVX512BW, Modrm|Vex=1|VexOpcode=0|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMask, Reg32 }
|
||||
|
|
|
@ -67739,7 +67739,7 @@ const insn_template i386_optab[] =
|
|||
0, 0, 0, 0 } },
|
||||
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
|
||||
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0 },
|
||||
{ { { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -67918,7 +67918,7 @@ const insn_template i386_optab[] =
|
|||
0, 0, 0, 0 } },
|
||||
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
|
||||
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0 },
|
||||
{ { { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -67958,7 +67958,7 @@ const insn_template i386_optab[] =
|
|||
0, 0, 0, 0 } },
|
||||
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
|
||||
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0 },
|
||||
{ { { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
@ -68197,7 +68197,7 @@ const insn_template i386_optab[] =
|
|||
0, 0, 0, 0 } },
|
||||
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
|
||||
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0 },
|
||||
{ { { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
|
Loading…
Add table
Reference in a new issue