Skip to content

Commit 93a4ded

Browse files
committed
[AArch64][SVE] Add ptest intrinsics
Summary: Implements the following intrinsics: * @llvm.aarch64.sve.ptest.any * @llvm.aarch64.sve.ptest.first * @llvm.aarch64.sve.ptest.last Reviewers: sdesmalen, efriedma, dancgr, mgudim, cameron.mcinally, rengolin Reviewed By: efriedma Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D72398
1 parent ada9646 commit 93a4ded

File tree

7 files changed

+130
-1
lines changed

7 files changed

+130
-1
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
10141014
llvm_i32_ty],
10151015
[IntrNoMem]>;
10161016

1017+
class AdvSIMD_SVE_PTEST_Intrinsic
1018+
: Intrinsic<[llvm_i1_ty],
1019+
[llvm_anyvector_ty,
1020+
LLVMMatchType<0>],
1021+
[IntrNoMem]>;
1022+
10171023
class AdvSIMD_SVE_TBL_Intrinsic
10181024
: Intrinsic<[llvm_anyvector_ty],
10191025
[LLVMMatchType<0>,
@@ -1552,6 +1558,14 @@ def int_aarch64_sve_pnext : AdvSIMD_Pred1VectorArg_Intrinsic;
15521558
def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;
15531559
def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;
15541560

1561+
//
1562+
// Testing predicates
1563+
//
1564+
1565+
def int_aarch64_sve_ptest_any : AdvSIMD_SVE_PTEST_Intrinsic;
1566+
def int_aarch64_sve_ptest_first : AdvSIMD_SVE_PTEST_Intrinsic;
1567+
def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;
1568+
15551569
//
15561570
// Gather loads:
15571571
//

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,6 +1357,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
13571357
case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
13581358
case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
13591359
case AArch64ISD::INSR: return "AArch64ISD::INSR";
1360+
case AArch64ISD::PTEST: return "AArch64ISD::PTEST";
13601361
case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
13611362
case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
13621363
case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
@@ -10889,6 +10890,30 @@ static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID,
1088910890
return SDValue();
1089010891
}
1089110892

10893+
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
10894+
AArch64CC::CondCode Cond) {
10895+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10896+
10897+
SDLoc DL(Op);
10898+
EVT OpVT = Op.getValueType();
10899+
assert(OpVT.isScalableVector() && TLI.isTypeLegal(OpVT) &&
10900+
"Expected legal scalable vector type!");
10901+
10902+
// Ensure target specific opcodes are using legal type.
10903+
EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
10904+
SDValue TVal = DAG.getConstant(1, DL, OutVT);
10905+
SDValue FVal = DAG.getConstant(0, DL, OutVT);
10906+
10907+
// Set condition code (CC) flags.
10908+
SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
10909+
10910+
// Convert CC to integer based on requested condition.
10911+
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
10912+
SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
10913+
SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
10914+
return DAG.getZExtOrTrunc(Res, DL, VT);
10915+
}
10916+
1089210917
static SDValue performIntrinsicCombine(SDNode *N,
1089310918
TargetLowering::DAGCombinerInfo &DCI,
1089410919
const AArch64Subtarget *Subtarget) {
@@ -10989,6 +11014,15 @@ static SDValue performIntrinsicCombine(SDNode *N,
1098911014
case Intrinsic::aarch64_sve_cmpls_wide:
1099011015
return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs, true,
1099111016
DCI, DAG);
11017+
case Intrinsic::aarch64_sve_ptest_any:
11018+
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
11019+
AArch64CC::ANY_ACTIVE);
11020+
case Intrinsic::aarch64_sve_ptest_first:
11021+
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
11022+
AArch64CC::FIRST_ACTIVE);
11023+
case Intrinsic::aarch64_sve_ptest_last:
11024+
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
11025+
AArch64CC::LAST_ACTIVE);
1099211026
}
1099311027
return SDValue();
1099411028
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ enum NodeType : unsigned {
212212
TBL,
213213

214214
INSR,
215+
PTEST,
215216
PTRUE,
216217

217218
// Unsigned gather loads.

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithIn
7373
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
7474
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
7575

76+
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
77+
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
78+
7679
let Predicates = [HasSVE] in {
7780

7881
def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">;
@@ -1086,6 +1089,15 @@ let Predicates = [HasSVE] in {
10861089
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
10871090
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
10881091

1092+
def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
1093+
(PTEST_PP PPR:$pg, PPR:$src)>;
1094+
def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),
1095+
(PTEST_PP PPR:$pg, PPR:$src)>;
1096+
def : Pat<(AArch64ptest (nxv4i1 PPR:$pg), (nxv4i1 PPR:$src)),
1097+
(PTEST_PP PPR:$pg, PPR:$src)>;
1098+
def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)),
1099+
(PTEST_PP PPR:$pg, PPR:$src)>;
1100+
10891101
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
10901102
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
10911103
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,13 @@ enum CondCode { // Meaning (integer) Meaning (floating-point)
250250
AL = 0xe, // Always (unconditional) Always (unconditional)
251251
NV = 0xf, // Always (unconditional) Always (unconditional)
252252
// Note the NV exists purely to disassemble 0b1111. Execution is "always".
253-
Invalid
253+
Invalid,
254+
255+
// Common aliases used for SVE.
256+
ANY_ACTIVE = NE, // (!Z)
257+
FIRST_ACTIVE = MI, // ( N)
258+
LAST_ACTIVE = LO, // (!C)
259+
NONE_ACTIVE = EQ // ( Z)
254260
};
255261

256262
inline static const char *getCondCodeName(CondCode Code) {
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2+
3+
;
4+
; PTEST
5+
;
6+
7+
define i1 @ptest_any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
8+
; CHECK-LABEL: ptest_any:
9+
; CHECK: ptest p0, p1.b
10+
; CHECK-NEXT: cset w0, ne
11+
; CHECK-NEXT: ret
12+
%out = call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
13+
ret i1 %out
14+
}
15+
16+
define i1 @ptest_first(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
17+
; CHECK-LABEL: ptest_first:
18+
; CHECK: ptest p0, p1.b
19+
; CHECK-NEXT: cset w0, mi
20+
; CHECK-NEXT: ret
21+
%out = call i1 @llvm.aarch64.sve.ptest.first(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
22+
ret i1 %out
23+
}
24+
25+
define i1 @ptest_last(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
26+
; CHECK-LABEL: ptest_last:
27+
; CHECK: ptest p0, p1.b
28+
; CHECK-NEXT: cset w0, lo
29+
; CHECK-NEXT: ret
30+
%out = call i1 @llvm.aarch64.sve.ptest.last(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
31+
ret i1 %out
32+
}
33+
34+
declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
35+
declare i1 @llvm.aarch64.sve.ptest.first(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
36+
declare i1 @llvm.aarch64.sve.ptest.last(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
2+
3+
; Ensure we use the inverted CC result of SVE compare instructions when branching.
4+
define void @sve_cmplt_setcc_inverted(<vscale x 8 x i16>* %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) {
5+
; CHECK-LABEL: @sve_cmplt_setcc_inverted
6+
; CHECK: cmplt p1.h, p0/z, z0.h, #0
7+
; CHECK-NEXT: ptest p0, p1.b
8+
; CHECK-NEXT: b.ne
9+
entry:
10+
%0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %in, <vscale x 2 x i64> zeroinitializer)
11+
%1 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %0)
12+
br i1 %1, label %if.end, label %if.then
13+
14+
if.then:
15+
tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %in, <vscale x 8 x i16>* %out, i32 2, <vscale x 8 x i1> %pg)
16+
br label %if.end
17+
18+
if.end:
19+
ret void
20+
}
21+
22+
declare i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
23+
24+
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
25+
26+
declare void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>*, i32, <vscale x 8 x i1>)

0 commit comments

Comments
 (0)