-
Notifications
You must be signed in to change notification settings - Fork 14.5k
release/21.x: [RISCV] Fix issues in ORI to QC.INSBI transformation (#148809) #148933
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@topperc What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-risc-v Author: None (llvmbot) ChangesBackport d67d91a Requested by: @lenary Full diff: https://github.com/llvm/llvm-project/pull/148933.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 666c76b21e631..ad5ecaea85448 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -689,10 +689,16 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) {
if (!isShiftedMask_32(C1) || isInt<12>(C1))
return false;
+ // INSBI will clobber the input register in N0. Bail out if we need a copy to
+ // preserve this value.
+ SDValue N0 = Node->getOperand(0);
+ if (!N0.hasOneUse())
+ return false;
+
// If C1 is a shifted mask (but can't be formed as an ORI),
// use a bitfield insert of -1.
// Transform (or x, C1)
- // -> (qc.insbi x, width, shift)
+ // -> (qc.insbi x, -1, width, shift)
const unsigned Leading = llvm::countl_zero((uint32_t)C1);
const unsigned Trailing = llvm::countr_zero((uint32_t)C1);
const unsigned Width = 32 - Leading - Trailing;
@@ -705,7 +711,7 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) {
SDLoc DL(Node);
MVT VT = Node->getSimpleValueType(0);
- SDValue Ops[] = {CurDAG->getSignedTargetConstant(-1, DL, VT),
+ SDValue Ops[] = {N0, CurDAG->getSignedTargetConstant(-1, DL, VT),
CurDAG->getTargetConstant(Width, DL, VT),
CurDAG->getTargetConstant(Trailing, DL, VT)};
SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 26bb1e8d17857..c7cb6e237aeac 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -845,10 +845,11 @@ let Predicates = [HasVendorXqcibi, IsRV32] in {
let Predicates = [HasVendorXqcibm, IsRV32] in {
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def QC_INSBRI : QCIRVInstRI<0b1, simm11, "qc.insbri">;
- def QC_INSBI : RVInstIBase<0b001, OPC_CUSTOM_0, (outs GPRNoX0:$rd),
- (ins simm5:$imm5, uimm5_plus1:$width,
+ def QC_INSBI : RVInstIBase<0b001, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb),
+ (ins GPRNoX0:$rd, simm5:$imm5, uimm5_plus1:$width,
uimm5:$shamt), "qc.insbi",
"$rd, $imm5, $width, $shamt"> {
+ let Constraints = "$rd = $rd_wb";
bits<5> imm5;
bits<5> shamt;
bits<5> width;
diff --git a/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll b/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll
index f227fa9aa423d..2fa06517508ce 100644
--- a/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll
+++ b/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll
@@ -105,6 +105,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
;
; RV32ZBBXQCIBM-LABEL: test_cttz_i16:
; RV32ZBBXQCIBM: # %bb.0:
+; RV32ZBBXQCIBM-NEXT: not a0, a0
; RV32ZBBXQCIBM-NEXT: qc.insbi a0, -1, 1, 16
; RV32ZBBXQCIBM-NEXT: ctz a0, a0
; RV32ZBBXQCIBM-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/xqcibm-insert.ll b/llvm/test/CodeGen/RISCV/xqcibm-insert.ll
index 6b7f9ae856625..88054a691bad1 100644
--- a/llvm/test/CodeGen/RISCV/xqcibm-insert.ll
+++ b/llvm/test/CodeGen/RISCV/xqcibm-insert.ll
@@ -47,6 +47,29 @@ define i32 @test_insbi_mask(i32 %a) nounwind {
ret i32 %or
}
+define i32 @test_insbi_mask_mv(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: test_insbi_mask_mv:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a0, 16
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test_insbi_mask_mv:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: mv a0, a1
+; RV32IXQCIBM-NEXT: qc.insbi a0, -1, 16, 0
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test_insbi_mask_mv:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: mv a0, a1
+; RV32IXQCIBMZBS-NEXT: qc.insbi a0, -1, 16, 0
+; RV32IXQCIBMZBS-NEXT: ret
+ %or = or i32 %b, 65535
+ ret i32 %or
+}
+
define i32 @test_insbi_shifted_mask(i32 %a) nounwind {
; RV32I-LABEL: test_insbi_shifted_mask:
; RV32I: # %bb.0:
@@ -67,6 +90,36 @@ define i32 @test_insbi_shifted_mask(i32 %a) nounwind {
ret i32 %or
}
+define i32 @test_insbi_shifted_mask_multiple_uses(i32 %a) nounwind {
+; RV32I-LABEL: test_insbi_shifted_mask_multiple_uses:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a1, 15
+; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: addi a0, a0, 10
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32IXQCIBM-LABEL: test_insbi_shifted_mask_multiple_uses:
+; RV32IXQCIBM: # %bb.0:
+; RV32IXQCIBM-NEXT: lui a1, 15
+; RV32IXQCIBM-NEXT: or a1, a1, a0
+; RV32IXQCIBM-NEXT: addi a0, a0, 10
+; RV32IXQCIBM-NEXT: xor a0, a0, a1
+; RV32IXQCIBM-NEXT: ret
+;
+; RV32IXQCIBMZBS-LABEL: test_insbi_shifted_mask_multiple_uses:
+; RV32IXQCIBMZBS: # %bb.0:
+; RV32IXQCIBMZBS-NEXT: lui a1, 15
+; RV32IXQCIBMZBS-NEXT: or a1, a1, a0
+; RV32IXQCIBMZBS-NEXT: addi a0, a0, 10
+; RV32IXQCIBMZBS-NEXT: xor a0, a0, a1
+; RV32IXQCIBMZBS-NEXT: ret
+ %or = or i32 %a, 61440
+ %add = add i32 %a, 10
+ %xor = xor i32 %or, %add
+ ret i32 %xor
+}
+
define i32 @test_single_bit_set(i32 %a) nounwind {
; RV32I-LABEL: test_single_bit_set:
; RV32I: # %bb.0:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The transformation done in llvm#147349 was incorrect since we were not passing the input node of the `OR` instruction to the `QC.INSBI` instruction leading to the generated instruction doing the wrong thing. In order to do this we first needed to add the output register to `QC.INSBI` as being both an input and output. The code produced after the above fix will need a copy (mv) to preserve the register input to the OR instruction if it has more than one use making the transformation net neutral ( `6-byte QC.E.ORI/ORAI` vs `2-byte C.MV + 4-byte QC.INSB`I). Avoid doing the transformation if there is more than one use of the input register to the OR instruction. (cherry picked from commit d67d91a)
@lenary (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport d67d91a
Requested by: @lenary