Skip to content

Commit 577c662

Browse files
committed
Revert r359392 and r358887
Reverts "[X86] Remove (V)MOV64toSDrr/m and (V)MOVDI2SSrr/m. Use 128-bit result MOVD/MOVQ and COPY_TO_REGCLASS instead" Reverts "[TargetLowering][AMDGPU][X86] Improve SimplifyDemandedBits bitcast handling" Eric Christopher and Jorge Gorbe Moya reported some issues with these patches to me off list. Removing the CodeGenOnly instructions has changed how fneg is handled during fast-isel with sse/sse2. We're now emitting fsub -0.0, x instead moving to the integer domain(in a GPR), xoring the sign bit, and then moving back to xmm. This is because the fast isel table no longer contains an entry for (f32/f64 bitcast (i32/i64)) so the target independent fneg code fails. The use of fsub changes the behavior of nan with respect to -O2 codegen which will always use a pxor. NOTE: We still have a difference with double with -m32 since the move to GPR doesn't work there. I'll file a separate PR for that and add test cases. Since removing the CodeGenOnly instructions was fixing PR41619, I'm reverting r358887 which exposed that PR. Though I wouldn't be surprised if that bug can still be hit independent of that. This should hopefully get Google back to green. I'll work with Simon and other X86 folks to figure out how to move forward again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360066 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ce031c4 commit 577c662

16 files changed

+374
-208
lines changed

lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,30 +1567,6 @@ bool TargetLowering::SimplifyDemandedBits(
15671567
KnownSrcZero, TLO, Depth + 1))
15681568
return true;
15691569

1570-
KnownBits KnownSrcBits;
1571-
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1572-
KnownSrcBits, TLO, Depth + 1))
1573-
return true;
1574-
} else if ((NumSrcEltBits % BitWidth) == 0 &&
1575-
TLO.DAG.getDataLayout().isLittleEndian()) {
1576-
unsigned Scale = NumSrcEltBits / BitWidth;
1577-
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1578-
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1579-
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1580-
for (unsigned i = 0; i != NumElts; ++i)
1581-
if (DemandedElts[i]) {
1582-
unsigned Offset = (i % Scale) * BitWidth;
1583-
DemandedSrcBits.insertBits(DemandedBits, Offset);
1584-
DemandedSrcElts.setBit(i / Scale);
1585-
}
1586-
1587-
if (SrcVT.isVector()) {
1588-
APInt KnownSrcUndef, KnownSrcZero;
1589-
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1590-
KnownSrcZero, TLO, Depth + 1))
1591-
return true;
1592-
}
1593-
15941570
KnownBits KnownSrcBits;
15951571
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
15961572
KnownSrcBits, TLO, Depth + 1))
@@ -1600,7 +1576,7 @@ bool TargetLowering::SimplifyDemandedBits(
16001576
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
16011577
// recursive call where Known may be useful to the caller.
16021578
if (Depth > 0) {
1603-
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1579+
Known = TLO.DAG.computeKnownBits(Op, Depth);
16041580
return false;
16051581
}
16061582
break;

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3202,44 +3202,30 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
32023202

32033203
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
32043204
DAGCombinerInfo &DCI) const {
3205-
auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
3206-
if (!RHS)
3205+
if (N->getValueType(0) != MVT::i64)
32073206
return SDValue();
32083207

3209-
EVT VT = N->getValueType(0);
3210-
SDValue LHS = N->getOperand(0);
3211-
unsigned ShiftAmt = RHS->getZExtValue();
3212-
SelectionDAG &DAG = DCI.DAG;
3213-
SDLoc SL(N);
3214-
3215-
// fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
3216-
// this improves the ability to match BFE patterns in isel.
3217-
if (LHS.getOpcode() == ISD::AND) {
3218-
if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
3219-
if (Mask->getAPIntValue().isShiftedMask() &&
3220-
Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) {
3221-
return DAG.getNode(
3222-
ISD::AND, SL, VT,
3223-
DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
3224-
DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
3225-
}
3226-
}
3227-
}
3228-
3229-
if (VT != MVT::i64)
3208+
const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
3209+
if (!RHS)
32303210
return SDValue();
32313211

3212+
unsigned ShiftAmt = RHS->getZExtValue();
32323213
if (ShiftAmt < 32)
32333214
return SDValue();
32343215

32353216
// srl i64:x, C for C >= 32
32363217
// =>
32373218
// build_pair (srl hi_32(x), C - 32), 0
3219+
3220+
SelectionDAG &DAG = DCI.DAG;
3221+
SDLoc SL(N);
3222+
32383223
SDValue One = DAG.getConstant(1, SL, MVT::i32);
32393224
SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
32403225

3241-
SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS);
3242-
SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One);
3226+
SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0));
3227+
SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
3228+
VecOp, One);
32433229

32443230
SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
32453231
SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);

lib/Target/X86/X86InstrAVX512.td

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3832,6 +3832,14 @@ def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
38323832
"vmovq\t{$src, $dst|$dst, $src}", []>,
38333833
EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
38343834
let isCodeGenOnly = 1 in {
3835+
def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3836+
"vmovq\t{$src, $dst|$dst, $src}",
3837+
[(set FR64X:$dst, (bitconvert GR64:$src))]>,
3838+
EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3839+
def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3840+
"vmovq\t{$src, $dst|$dst, $src}",
3841+
[(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3842+
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
38353843
def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
38363844
"vmovq\t{$src, $dst|$dst, $src}",
38373845
[(set GR64:$dst, (bitconvert FR64X:$src))]>,
@@ -3844,6 +3852,20 @@ def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$
38443852
}
38453853
} // ExeDomain = SSEPackedInt
38463854

3855+
// Move Int Doubleword to Single Scalar
3856+
//
3857+
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3858+
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3859+
"vmovd\t{$src, $dst|$dst, $src}",
3860+
[(set FR32X:$dst, (bitconvert GR32:$src))]>,
3861+
EVEX, Sched<[WriteVecMoveFromGpr]>;
3862+
3863+
def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3864+
"vmovd\t{$src, $dst|$dst, $src}",
3865+
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3866+
EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3867+
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3868+
38473869
// Move doubleword from xmm register to r/m32
38483870
//
38493871
let ExeDomain = SSEPackedInt in {
@@ -3860,13 +3882,6 @@ def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
38603882
EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
38613883
} // ExeDomain = SSEPackedInt
38623884

3863-
let Predicates = [HasAVX512] in {
3864-
def : Pat<(f64 (bitconvert GR64:$src)),
3865-
(COPY_TO_REGCLASS (VMOV64toPQIZrr GR64:$src), FR64X)>;
3866-
def : Pat<(f32 (bitconvert GR32:$src)),
3867-
(COPY_TO_REGCLASS (VMOVDI2PDIZrr GR32:$src), FR32X)>;
3868-
}
3869-
38703885
// Move quadword from xmm1 register to r/m64
38713886
//
38723887
let ExeDomain = SSEPackedInt in {

lib/Target/X86/X86InstrFoldTables.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,11 +531,13 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
531531
{ X86::MOV32rr, X86::MOV32rm, 0 },
532532
{ X86::MOV64rr, X86::MOV64rm, 0 },
533533
{ X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
534+
{ X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
534535
{ X86::MOV8rr, X86::MOV8rm, 0 },
535536
{ X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
536537
{ X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
537538
{ X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
538539
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
540+
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
539541
{ X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
540542
{ X86::MOVDQUrr, X86::MOVDQUrm, 0 },
541543
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
@@ -816,6 +818,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
816818
{ X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 },
817819
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
818820
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
821+
{ X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
822+
{ X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
819823
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
820824
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
821825
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
@@ -833,6 +837,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
833837
{ X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
834838
{ X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
835839
{ X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
840+
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
841+
{ X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
836842
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
837843
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
838844
{ X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },

lib/Target/X86/X86InstrSSE.td

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4109,6 +4109,11 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
41094109
def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
41104110
"movq\t{$src, $dst|$dst, $src}", []>,
41114111
VEX, Sched<[WriteVecLoad]>;
4112+
let isCodeGenOnly = 1 in
4113+
def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4114+
"movq\t{$src, $dst|$dst, $src}",
4115+
[(set FR64:$dst, (bitconvert GR64:$src))]>,
4116+
VEX, Sched<[WriteVecMoveFromGpr]>;
41124117

41134118
def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
41144119
"movd\t{$src, $dst|$dst, $src}",
@@ -4129,8 +4134,37 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
41294134
def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
41304135
"movq\t{$src, $dst|$dst, $src}", []>,
41314136
Sched<[WriteVecLoad]>;
4137+
let isCodeGenOnly = 1 in
4138+
def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4139+
"movq\t{$src, $dst|$dst, $src}",
4140+
[(set FR64:$dst, (bitconvert GR64:$src))]>,
4141+
Sched<[WriteVecMoveFromGpr]>;
41324142
} // ExeDomain = SSEPackedInt
41334143

4144+
//===---------------------------------------------------------------------===//
4145+
// Move Int Doubleword to Single Scalar
4146+
//
4147+
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4148+
def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4149+
"movd\t{$src, $dst|$dst, $src}",
4150+
[(set FR32:$dst, (bitconvert GR32:$src))]>,
4151+
VEX, Sched<[WriteVecMoveFromGpr]>;
4152+
4153+
def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
4154+
"movd\t{$src, $dst|$dst, $src}",
4155+
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
4156+
VEX, Sched<[WriteVecLoad]>;
4157+
def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4158+
"movd\t{$src, $dst|$dst, $src}",
4159+
[(set FR32:$dst, (bitconvert GR32:$src))]>,
4160+
Sched<[WriteVecMoveFromGpr]>;
4161+
4162+
def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
4163+
"movd\t{$src, $dst|$dst, $src}",
4164+
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
4165+
Sched<[WriteVecLoad]>;
4166+
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4167+
41344168
//===---------------------------------------------------------------------===//
41354169
// Move Packed Doubleword Int to Packed Double Int
41364170
//
@@ -4158,21 +4192,6 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
41584192
Sched<[WriteVecStore]>;
41594193
} // ExeDomain = SSEPackedInt
41604194

4161-
let Predicates = [UseAVX] in {
4162-
def : Pat<(f64 (bitconvert GR64:$src)),
4163-
(COPY_TO_REGCLASS (VMOV64toPQIrr GR64:$src), FR64)>;
4164-
def : Pat<(f32 (bitconvert GR32:$src)),
4165-
(COPY_TO_REGCLASS (VMOVDI2PDIrr GR32:$src), FR32)>;
4166-
}
4167-
4168-
let Predicates = [UseSSE2] in
4169-
def : Pat<(f64 (bitconvert GR64:$src)),
4170-
(COPY_TO_REGCLASS (MOV64toPQIrr GR64:$src), FR64)>;
4171-
4172-
let Predicates = [UseSSE1] in
4173-
def : Pat<(f32 (bitconvert GR32:$src)),
4174-
(COPY_TO_REGCLASS (MOVDI2PDIrr GR32:$src), FR32)>;
4175-
41764195
//===---------------------------------------------------------------------===//
41774196
// Move Packed Doubleword Int first element to Doubleword Int
41784197
//
@@ -4206,6 +4225,10 @@ def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
42064225
//
42074226
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
42084227
let Predicates = [UseAVX] in
4228+
def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
4229+
"movq\t{$src, $dst|$dst, $src}",
4230+
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
4231+
VEX, Sched<[WriteVecLoad]>;
42094232
def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
42104233
"movq\t{$src, $dst|$dst, $src}",
42114234
[(set GR64:$dst, (bitconvert FR64:$src))]>,
@@ -4215,6 +4238,10 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
42154238
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
42164239
VEX, Sched<[WriteVecStore]>;
42174240

4241+
def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
4242+
"movq\t{$src, $dst|$dst, $src}",
4243+
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
4244+
Sched<[WriteVecLoad]>;
42184245
def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
42194246
"movq\t{$src, $dst|$dst, $src}",
42204247
[(set GR64:$dst, (bitconvert FR64:$src))]>,

test/CodeGen/AMDGPU/store-weird-sizes.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0
8686
; GFX9-NEXT: v_mov_b32_e32 v2, s2
8787
; GFX9-NEXT: ds_write_b16 v1, v2 offset:4
8888
; GFX9-NEXT: s_waitcnt vmcnt(0)
89-
; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 7
90-
; GFX9-NEXT: ds_write_b8 v1, v0 offset:6
89+
; GFX9-NEXT: v_and_b32_e32 v0, 0x7f0000, v0
90+
; GFX9-NEXT: ds_write_b8_d16_hi v1, v0 offset:6
9191
; GFX9-NEXT: ds_write_b32 v1, v3
9292
; GFX9-NEXT: s_endpgm
9393
store i55 %arg, i55 addrspace(3)* %ptr, align 8

test/CodeGen/X86/bitcast-setcc-256.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,22 @@ define void @bitcast_8i32_store(i8* %p, <8 x i32> %a0) {
448448
define void @bitcast_4i64_store(i4* %p, <4 x i64> %a0) {
449449
; SSE2-SSSE3-LABEL: bitcast_4i64_store:
450450
; SSE2-SSSE3: # %bb.0:
451+
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
452+
; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
453+
; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
454+
; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3
455+
; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4
456+
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4
457+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
458+
; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
459+
; SSE2-SSSE3-NEXT: por %xmm4, %xmm1
460+
; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
461+
; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3
462+
; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3
463+
; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
464+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
465+
; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
466+
; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
451467
; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0
452468
; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
453469
; SSE2-SSSE3-NEXT: movb %al, (%rdi)

test/CodeGen/X86/bitcast-setcc-512.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -609,13 +609,15 @@ define void @bitcast_8i64_store(i8* %p, <8 x i64> %a0) {
609609
;
610610
; AVX1-LABEL: bitcast_8i64_store:
611611
; AVX1: # %bb.0:
612-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
612+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
613613
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
614614
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
615+
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
616+
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
617+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
618+
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
615619
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
616620
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
617-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
618-
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
619621
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
620622
; AVX1-NEXT: vmovmskps %ymm0, %eax
621623
; AVX1-NEXT: movb %al, (%rdi)

0 commit comments

Comments
 (0)