-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[AMDGPU] Recognise bitmask operations as srcmods #149110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[AMDGPU] Recognise bitmask operations as srcmods #149110
Conversation
Add to the VOP patterns to recognise when or/xor/and are modifying only the sign bit and replace with the appropriate srcmod.
@llvm/pr-subscribers-backend-amdgpu Author: Chris Jackson (chrisjbris) ChangesAdd to the VOP patterns to recognise when or/xor/and are modifying only the sign bit and replace with the appropriate srcmod. Patch is 70.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149110.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 25672a52345cb..3b10f12e984dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3036,6 +3036,36 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
Src = Src.getOperand(0);
}
+ // Convert various sign-bit masks to src mods. Currently disabled for 16-bit
+ // types as the codegen replaces the operand without adding a srcmod.
+ // Recognise (xor a, 0x80000000) as NEG SrcMod.
+ if (Src->getOpcode() == ISD::XOR &&
+ Src.getValueType().getFixedSizeInBits() != 16)
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1)))
+ if (CRHS->getAPIntValue().isSignMask()) {
+ Mods |= SISrcMods::NEG;
+ Src = Src.getOperand(0);
+ }
+
+ // Recognise (and a, 0x7fffffff) as ABS SrcMod.
+ if (Src->getOpcode() == ISD::AND && AllowAbs &&
+ Src.getValueType().getFixedSizeInBits() != 16)
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1)))
+ if (CRHS->getAPIntValue().isMaxSignedValue()) {
+ Mods |= SISrcMods::ABS;
+ Src = Src.getOperand(0);
+ }
+
+ // Recognise (or a, 0x80000000) as NEG+ABS SrcModifiers.
+ if (Src->getOpcode() == ISD::OR && AllowAbs &&
+ Src.getValueType().getFixedSizeInBits() != 16)
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1)))
+ if (CRHS->getAPIntValue().isSignMask()) {
+ Mods |= SISrcMods::ABS;
+ Mods |= SISrcMods::NEG;
+ Src = Src.getOperand(0);
+ }
+
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
index 1b092b283290a..5674ae328406d 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -349,29 +349,24 @@ define i32 @select_fneg_xor_select_i32(i1 %cond0, i1 %cond1, i32 %arg0, i32 %arg
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
-; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v0
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: select_fneg_xor_select_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fneg0 = xor i32 %arg0, -2147483648
%select0 = select i1 %cond0, i32 %arg1, i32 %fneg0
@@ -550,31 +545,25 @@ define i64 @select_fneg_xor_select_i64(i1 %cond0, i1 %cond1, i64 %arg0, i64 %arg
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
-; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
-; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc
-; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, -v3, v5, vcc
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
-; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v1, v2, -v2, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: select_fneg_xor_select_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
-; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v4 :: v_dual_and_b32 v1, 1, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v2, -v3, v5, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, -v2, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fneg0 = xor i64 %arg0, 9223372036854775808
%select0 = select i1 %cond0, i64 %arg1, i64 %fneg0
diff --git a/llvm/test/CodeGen/AMDGPU/integer-select-src-modifiers.ll b/llvm/test/CodeGen/AMDGPU/integer-select-src-modifiers.ll
new file mode 100644
index 0000000000000..8b8ddc6a99b75
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/integer-select-src-modifiers.ll
@@ -0,0 +1,612 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+
+define i32 @fneg_select_i32_1(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fneg_select_i32_1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, -v1, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_i32_1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, -v1, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor i32 %a, u0x80000000
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %neg.a, i32 %b
+ ret i32 %select
+}
+
+define i32 @fneg_select_i32_2(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fneg_select_i32_2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -v1, v2, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_i32_2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, -v1, v2, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor i32 %a, u0x80000000
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %b, i32 %neg.a
+ ret i32 %select
+}
+
+define i32 @fneg_select_i32_both(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fneg_select_i32_both:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, -v1, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_i32_both:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, -v1, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor i32 %a, u0x80000000
+ %neg.b = xor i32 %b, u0x80000000
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %neg.a, i32 %neg.b
+ ret i32 %select
+}
+
+define i32 @fneg_1_fabs_2_select_i32(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fneg_1_fabs_2_select_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, |v1|, -v1, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_1_fabs_2_select_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, |v1|, -v1, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor i32 %a, u0x80000000
+ %abs.b = and i32 %a, u0x7fffffff
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %neg.a, i32 %abs.b
+ ret i32 %select
+}
+
+define <2 x i32> @fneg_select_v2i32_1(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+; GCN-LABEL: fneg_select_v2i32_1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v4, -v2, vcc
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e64 v1, v5, -v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_v2i32_1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, -v2, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, -v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor <2 x i32> %a, splat (i32 u0x80000000)
+ %cmp = icmp eq <2 x i32> %cond, zeroinitializer
+ %select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
+ ret <2 x i32> %select
+}
+
+define <2 x i32> @fneg_select_v2i32_2(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+; GCN-LABEL: fneg_select_v2i32_2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, v4, vcc
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e64 v1, -v3, v5, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_v2i32_2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, v4, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, -v3, v5, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor <2 x i32> %a, splat (i32 u0x80000000)
+ %cmp = icmp eq <2 x i32> %cond, zeroinitializer
+ %select = select <2 x i1> %cmp, <2 x i32> %b, <2 x i32> %neg.a
+ ret <2 x i32> %select
+}
+
+define i32 @fabs_select_i32_1(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fabs_select_i32_1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, |v1|, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fabs_select_i32_1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, |v1|, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = and i32 %a, u0x7fffffff
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %neg.a, i32 %b
+ ret i32 %select
+}
+
+define i32 @fabs_select_i32_2(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fabs_select_i32_2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, |v1|, v2, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fabs_select_i32_2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, |v1|, v2, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = and i32 %a, u0x7fffffff
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %b, i32 %neg.a
+ ret i32 %select
+}
+
+define <2 x i32> @fneg_1_fabs_2_select_v2i32(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+; GCN-LABEL: fneg_1_fabs_2_select_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, |v2|, vcc
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e64 v1, -v3, |v3|, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_1_fabs_2_select_v2i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, |v2|, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, -v3, |v3|, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor <2 x i32> %a, splat (i32 u0x80000000)
+ %abs.b = and <2 x i32> %a, splat (i32 u0x7fffffff)
+ %cmp = icmp eq <2 x i32> %cond, zeroinitializer
+ %select = select <2 x i1> %cmp, <2 x i32> %abs.b, <2 x i32> %neg.a
+ ret <2 x i32> %select
+}
+
+define i32 @fneg_fabs_select_i32_1(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fneg_fabs_select_i32_1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, -|v1|, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_fabs_select_i32_1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, -|v1|, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = or i32 %a, u0x80000000
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %neg.a, i32 %b
+ ret i32 %select
+}
+
+define i32 @fneg_fabs_select_i32_2(i32 %cond, i32 %a, i32 %b) {
+; GCN-LABEL: fneg_fabs_select_i32_2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -|v1|, v2, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_fabs_select_i32_2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, -|v1|, v2, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = or i32 %a, u0x80000000
+ %cmp = icmp eq i32 %cond, zeroinitializer
+ %select = select i1 %cmp, i32 %b, i32 %neg.a
+ ret i32 %select
+}
+
+define <2 x i32> @fneg_fabs_select_v2i32_1(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+; GCN-LABEL: fneg_fabs_select_v2i32_1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, v4, -|v2|, vcc
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e64 v1, v5, -|v3|, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_fabs_select_v2i32_1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, -|v2|, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, -|v3|, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = or <2 x i32> %a, splat (i32 u0x80000000)
+ %cmp = icmp eq <2 x i32> %cond, zeroinitializer
+ %select = select <2 x i1> %cmp, <2 x i32> %neg.a, <2 x i32> %b
+ ret <2 x i32> %select
+}
+
+define <2 x i32> @fneg_fabs_select_v2i32_2(<2 x i32> %cond, <2 x i32> %a, <2 x i32> %b) {
+; GCN-LABEL: fneg_fabs_select_v2i32_2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, -|v2|, v4, vcc
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e64 v1, -|v3|, v5, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_fabs_select_v2i32_2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, -|v2|, v4, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, -|v3|, v5, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = or <2 x i32> %a, splat (i32 u0x80000000)
+ %cmp = icmp eq <2 x i32> %cond, zeroinitializer
+ %select = select <2 x i1> %cmp, <2 x i32> %b, <2 x i32> %neg.a
+ ret <2 x i32> %select
+}
+
+define i64 @fneg_select_i64_1(i64 %cond, i64 %a, i64 %b) {
+; GCN-LABEL: fneg_select_i64_1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v1, v5, -v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_i64_1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, -v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor i64 %a, u0x8000000000000000
+ %cmp = icmp eq i64 %cond, zeroinitializer
+ %select = select i1 %cmp, i64 %neg.a, i64 %b
+ ret i64 %select
+}
+
+define i64 @fneg_select_i64_2(i64 %cond, i64 %a, i64 %b) {
+; GCN-LABEL: fneg_select_i64_2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v1, -v3, v5, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_select_i64_2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, -v3, v5, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor i64 %a, u0x8000000000000000
+ %cmp = icmp eq i64 %cond, zeroinitializer
+ %select = select i1 %cmp, i64 %b, i64 %neg.a
+ ret i64 %select
+}
+
+define i64 @fneg_1_fabs_2_select_i64(i64 %cond, i64 %a, i64 %b) {
+; GCN-LABEL: fneg_1_fabs_2_select_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v1, |v5|, -v3, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fneg_1_fabs_2_select_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1]
+; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, |v5|, -v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %neg.a = xor i64 %a, u0x8000000000000000
+ %abs.b = and i64 %b, u0x7fffffffffffffff
+ %cmp = icmp eq i64 %cond, zeroinitializer
+ %select = select i1 %cmp, i64 %neg.a, i64 %abs.b
+ ret i64 %select
+}
+
+define i64 @fabs_select_i64_1(i64 %cond, i64 %a, i64 %b) {
+; GCN-LABEL: fabs_select_i64_1:
+; GCN: ; %b...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only nits, really
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add some scalar test cases? Particularly for the 64-bit cases. The SALU versions do not support source modifiers, so they should not fold if it's a scalar candidate
Yes, I've added some tests to integer-select-src-modifiers.ll. |
Add to the VOP patterns to recognise when or/xor/and are modifying only the sign bit and replace with the appropriate srcmod. This enables the use of src mods on integers too.
I think this method is preferred to #147325, as it is simpler and avoids the workarounds for simplydemandedbits / doesn't have to be awkwardly imposed on the Target Independent Combine.
I was uncertain about the status of srcmods for the 16-bit types, so this type is intentionally left unaffected.