Skip to content

Commit c30df5f

Browse files
committed
[X86] Allow legacy vcvtps2ph intrinsics to select EVEX encoded instructions. Rely on EVEX->VEX to convert back.
Missed store folding opportunities will be fixed in a subsequent commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317661 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 14c6360 commit c30df5f

File tree

3 files changed

+32
-30
lines changed

3 files changed

+32
-30
lines changed

lib/Target/X86/X86InstrSSE.td

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7698,11 +7698,11 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
76987698
T8PD, VEX, Sched<[WriteCvtF2FLd]>;
76997699
}
77007700

7701-
multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
7701+
multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> {
77027702
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
77037703
(ins RC:$src1, i32u8imm:$src2),
77047704
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7705-
[(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
7705+
[(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
77067706
TAPD, VEX, Sched<[WriteCvtF2F]>;
77077707
let hasSideEffects = 0, mayStore = 1,
77087708
SchedRW = [WriteCvtF2FLd, WriteRMW] in
@@ -7715,6 +7715,8 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
77157715
let Predicates = [HasF16C, NoVLX] in {
77167716
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>;
77177717
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L;
7718+
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem>;
7719+
defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L;
77187720

77197721
// Pattern match vcvtph2ps of a scalar i64 load.
77207722
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
@@ -7724,23 +7726,17 @@ let Predicates = [HasF16C, NoVLX] in {
77247726
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
77257727
(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
77267728
(VCVTPH2PSrm addr:$src)>;
7727-
}
7728-
7729-
let Predicates = [HasF16C] in {
7730-
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
7731-
defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
77327729

7733-
def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
7734-
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
7735-
addr:$dst),
7736-
(VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7737-
def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16
7738-
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
7739-
addr:$dst),
7740-
(VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7741-
def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)),
7742-
addr:$dst),
7743-
(VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
7730+
def : Pat<(store (f64 (extractelt
7731+
(bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
7732+
(iPTR 0))), addr:$dst),
7733+
(VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7734+
def : Pat<(store (i64 (extractelt
7735+
(bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
7736+
(iPTR 0))), addr:$dst),
7737+
(VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
7738+
def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst),
7739+
(VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
77447740
}
77457741

77467742
// Patterns for matching conversions from float to half-float and vice versa.

lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,6 +1588,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
15881588
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
15891589
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
15901590
X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
1591+
X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
1592+
X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
15911593
X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
15921594
X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
15931595
X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),

test/CodeGen/X86/f16c-intrinsics.ll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,12 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
121121
;
122122
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
123123
; X32-AVX512VL: # BB#0:
124-
; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
124+
; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
125125
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
126126
;
127127
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
128128
; X64-AVX512VL: # BB#0:
129-
; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
129+
; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
130130
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
131131
%res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
132132
ret <8 x i16> %res
@@ -148,13 +148,13 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
148148
;
149149
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
150150
; X32-AVX512VL: # BB#0:
151-
; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
151+
; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
152152
; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
153153
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
154154
;
155155
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
156156
; X64-AVX512VL: # BB#0:
157-
; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
157+
; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
158158
; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
159159
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
160160
%res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
@@ -238,13 +238,13 @@ define void @test_x86_vcvtps2ph_256_m(<8 x i16>* nocapture %d, <8 x float> %a) n
238238
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
239239
; X32-AVX512VL: # BB#0: # %entry
240240
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
241-
; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
241+
; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
242242
; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
243243
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
244244
;
245245
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
246246
; X64-AVX512VL: # BB#0: # %entry
247-
; X64-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%rdi) # encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03]
247+
; X64-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03]
248248
; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
249249
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
250250
entry:
@@ -268,15 +268,15 @@ define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) n
268268
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
269269
; X32-AVX512VL: # BB#0: # %entry
270270
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
271-
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
271+
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
272272
; X32-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
273273
; X32-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
274274
; X32-AVX512VL-NEXT: vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00]
275275
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
276276
;
277277
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
278278
; X64-AVX512VL: # BB#0: # %entry
279-
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
279+
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
280280
; X64-AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
281281
; X64-AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
282282
; X64-AVX512VL-NEXT: vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
@@ -303,12 +303,14 @@ define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f
303303
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
304304
; X32-AVX512VL: # BB#0: # %entry
305305
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
306-
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
306+
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
307+
; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
307308
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
308309
;
309310
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
310311
; X64-AVX512VL: # BB#0: # %entry
311-
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
312+
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
313+
; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
312314
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
313315
entry:
314316
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
@@ -333,12 +335,14 @@ define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x3
333335
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
334336
; X32-AVX512VL: # BB#0: # %entry
335337
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
336-
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
338+
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
339+
; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
337340
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
338341
;
339342
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
340343
; X64-AVX512VL: # BB#0: # %entry
341-
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
344+
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
345+
; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
342346
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
343347
entry:
344348
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)

0 commit comments

Comments
 (0)