Skip to content

Commit 68567be

Browse files
committed
[SDAG][AArch64] Boolean and/or reduce to umax/min reduce (PR41635)
This addresses one half of https://bugs.llvm.org/show_bug.cgi?id=41635 by combining a VECREDUCE_AND/OR into VECREDUCE_UMIN/UMAX (if latter is legal but former is not) for zero-or-all-ones boolean reductions (which are detected based on sign bits). Differential Revision: https://reviews.llvm.org/D61398 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360054 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a0a0058 commit 68567be

File tree

2 files changed

+32
-118
lines changed

2 files changed

+32
-118
lines changed

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18670,6 +18670,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
1867018670
SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
1867118671
SDValue N0 = N->getOperand(0);
1867218672
EVT VT = N0.getValueType();
18673+
unsigned Opcode = N->getOpcode();
1867318674

1867418675
// VECREDUCE over 1-element vector is just an extract.
1867518676
if (VT.getVectorNumElements() == 1) {
@@ -18682,6 +18683,17 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
1868218683
return Res;
1868318684
}
1868418685

18686+
// On an boolean vector an and/or reduction is the same as a umin/umax
18687+
// reduction. Convert them if the latter is legal while the former isn't.
18688+
if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
18689+
unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
18690+
? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
18691+
if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
18692+
TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
18693+
DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
18694+
return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
18695+
}
18696+
1868518697
return SDValue();
1868618698
}
1868718699

test/CodeGen/AArch64/vecreduce-bool.ll

Lines changed: 20 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,8 @@ define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
3535
; CHECK-NEXT: shl v0.2s, v0.2s, #24
3636
; CHECK-NEXT: sshr v0.2s, v0.2s, #24
3737
; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
38-
; CHECK-NEXT: mov w8, v0.s[1]
39-
; CHECK-NEXT: fmov w9, s0
40-
; CHECK-NEXT: and w8, w9, w8
38+
; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s
39+
; CHECK-NEXT: fmov w8, s0
4140
; CHECK-NEXT: tst w8, #0x1
4241
; CHECK-NEXT: csel w0, w0, w1, ne
4342
; CHECK-NEXT: ret
@@ -53,13 +52,8 @@ define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
5352
; CHECK-NEXT: shl v0.4h, v0.4h, #8
5453
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
5554
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
56-
; CHECK-NEXT: umov w10, v0.h[1]
57-
; CHECK-NEXT: umov w11, v0.h[0]
58-
; CHECK-NEXT: umov w9, v0.h[2]
59-
; CHECK-NEXT: and w10, w11, w10
60-
; CHECK-NEXT: umov w8, v0.h[3]
61-
; CHECK-NEXT: and w9, w10, w9
62-
; CHECK-NEXT: and w8, w9, w8
55+
; CHECK-NEXT: uminv h0, v0.4h
56+
; CHECK-NEXT: fmov w8, s0
6357
; CHECK-NEXT: tst w8, #0x1
6458
; CHECK-NEXT: csel w0, w0, w1, ne
6559
; CHECK-NEXT: ret
@@ -73,21 +67,8 @@ define i32 @reduce_and_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
7367
; CHECK-LABEL: reduce_and_v8:
7468
; CHECK: // %bb.0:
7569
; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
76-
; CHECK-NEXT: umov w14, v0.b[1]
77-
; CHECK-NEXT: umov w15, v0.b[0]
78-
; CHECK-NEXT: umov w13, v0.b[2]
79-
; CHECK-NEXT: and w14, w15, w14
80-
; CHECK-NEXT: umov w12, v0.b[3]
81-
; CHECK-NEXT: and w13, w14, w13
82-
; CHECK-NEXT: umov w11, v0.b[4]
83-
; CHECK-NEXT: and w12, w13, w12
84-
; CHECK-NEXT: umov w10, v0.b[5]
85-
; CHECK-NEXT: and w11, w12, w11
86-
; CHECK-NEXT: umov w9, v0.b[6]
87-
; CHECK-NEXT: and w10, w11, w10
88-
; CHECK-NEXT: umov w8, v0.b[7]
89-
; CHECK-NEXT: and w9, w10, w9
90-
; CHECK-NEXT: and w8, w9, w8
70+
; CHECK-NEXT: uminv b0, v0.8b
71+
; CHECK-NEXT: fmov w8, s0
9172
; CHECK-NEXT: tst w8, #0x1
9273
; CHECK-NEXT: csel w0, w0, w1, ne
9374
; CHECK-NEXT: ret
@@ -101,23 +82,8 @@ define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
10182
; CHECK-LABEL: reduce_and_v16:
10283
; CHECK: // %bb.0:
10384
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
104-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
105-
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
106-
; CHECK-NEXT: umov w8, v0.b[1]
107-
; CHECK-NEXT: umov w9, v0.b[0]
108-
; CHECK-NEXT: and w8, w9, w8
109-
; CHECK-NEXT: umov w9, v0.b[2]
110-
; CHECK-NEXT: and w8, w8, w9
111-
; CHECK-NEXT: umov w9, v0.b[3]
112-
; CHECK-NEXT: and w8, w8, w9
113-
; CHECK-NEXT: umov w9, v0.b[4]
114-
; CHECK-NEXT: and w8, w8, w9
115-
; CHECK-NEXT: umov w9, v0.b[5]
116-
; CHECK-NEXT: and w8, w8, w9
117-
; CHECK-NEXT: umov w9, v0.b[6]
118-
; CHECK-NEXT: and w8, w8, w9
119-
; CHECK-NEXT: umov w9, v0.b[7]
120-
; CHECK-NEXT: and w8, w8, w9
85+
; CHECK-NEXT: uminv b0, v0.16b
86+
; CHECK-NEXT: fmov w8, s0
12187
; CHECK-NEXT: tst w8, #0x1
12288
; CHECK-NEXT: csel w0, w0, w1, ne
12389
; CHECK-NEXT: ret
@@ -133,23 +99,8 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
13399
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
134100
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
135101
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
136-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
137-
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
138-
; CHECK-NEXT: umov w8, v0.b[1]
139-
; CHECK-NEXT: umov w9, v0.b[0]
140-
; CHECK-NEXT: and w8, w9, w8
141-
; CHECK-NEXT: umov w9, v0.b[2]
142-
; CHECK-NEXT: and w8, w8, w9
143-
; CHECK-NEXT: umov w9, v0.b[3]
144-
; CHECK-NEXT: and w8, w8, w9
145-
; CHECK-NEXT: umov w9, v0.b[4]
146-
; CHECK-NEXT: and w8, w8, w9
147-
; CHECK-NEXT: umov w9, v0.b[5]
148-
; CHECK-NEXT: and w8, w8, w9
149-
; CHECK-NEXT: umov w9, v0.b[6]
150-
; CHECK-NEXT: and w8, w8, w9
151-
; CHECK-NEXT: umov w9, v0.b[7]
152-
; CHECK-NEXT: and w8, w8, w9
102+
; CHECK-NEXT: uminv b0, v0.16b
103+
; CHECK-NEXT: fmov w8, s0
153104
; CHECK-NEXT: tst w8, #0x1
154105
; CHECK-NEXT: csel w0, w0, w1, ne
155106
; CHECK-NEXT: ret
@@ -179,9 +130,8 @@ define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
179130
; CHECK-NEXT: shl v0.2s, v0.2s, #24
180131
; CHECK-NEXT: sshr v0.2s, v0.2s, #24
181132
; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
182-
; CHECK-NEXT: mov w8, v0.s[1]
183-
; CHECK-NEXT: fmov w9, s0
184-
; CHECK-NEXT: orr w8, w9, w8
133+
; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s
134+
; CHECK-NEXT: fmov w8, s0
185135
; CHECK-NEXT: tst w8, #0x1
186136
; CHECK-NEXT: csel w0, w0, w1, ne
187137
; CHECK-NEXT: ret
@@ -197,13 +147,8 @@ define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
197147
; CHECK-NEXT: shl v0.4h, v0.4h, #8
198148
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
199149
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
200-
; CHECK-NEXT: umov w10, v0.h[1]
201-
; CHECK-NEXT: umov w11, v0.h[0]
202-
; CHECK-NEXT: umov w9, v0.h[2]
203-
; CHECK-NEXT: orr w10, w11, w10
204-
; CHECK-NEXT: umov w8, v0.h[3]
205-
; CHECK-NEXT: orr w9, w10, w9
206-
; CHECK-NEXT: orr w8, w9, w8
150+
; CHECK-NEXT: umaxv h0, v0.4h
151+
; CHECK-NEXT: fmov w8, s0
207152
; CHECK-NEXT: tst w8, #0x1
208153
; CHECK-NEXT: csel w0, w0, w1, ne
209154
; CHECK-NEXT: ret
@@ -217,21 +162,8 @@ define i32 @reduce_or_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
217162
; CHECK-LABEL: reduce_or_v8:
218163
; CHECK: // %bb.0:
219164
; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
220-
; CHECK-NEXT: umov w14, v0.b[1]
221-
; CHECK-NEXT: umov w15, v0.b[0]
222-
; CHECK-NEXT: umov w13, v0.b[2]
223-
; CHECK-NEXT: orr w14, w15, w14
224-
; CHECK-NEXT: umov w12, v0.b[3]
225-
; CHECK-NEXT: orr w13, w14, w13
226-
; CHECK-NEXT: umov w11, v0.b[4]
227-
; CHECK-NEXT: orr w12, w13, w12
228-
; CHECK-NEXT: umov w10, v0.b[5]
229-
; CHECK-NEXT: orr w11, w12, w11
230-
; CHECK-NEXT: umov w9, v0.b[6]
231-
; CHECK-NEXT: orr w10, w11, w10
232-
; CHECK-NEXT: umov w8, v0.b[7]
233-
; CHECK-NEXT: orr w9, w10, w9
234-
; CHECK-NEXT: orr w8, w9, w8
165+
; CHECK-NEXT: umaxv b0, v0.8b
166+
; CHECK-NEXT: fmov w8, s0
235167
; CHECK-NEXT: tst w8, #0x1
236168
; CHECK-NEXT: csel w0, w0, w1, ne
237169
; CHECK-NEXT: ret
@@ -245,23 +177,8 @@ define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
245177
; CHECK-LABEL: reduce_or_v16:
246178
; CHECK: // %bb.0:
247179
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
248-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
249-
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
250-
; CHECK-NEXT: umov w8, v0.b[1]
251-
; CHECK-NEXT: umov w9, v0.b[0]
252-
; CHECK-NEXT: orr w8, w9, w8
253-
; CHECK-NEXT: umov w9, v0.b[2]
254-
; CHECK-NEXT: orr w8, w8, w9
255-
; CHECK-NEXT: umov w9, v0.b[3]
256-
; CHECK-NEXT: orr w8, w8, w9
257-
; CHECK-NEXT: umov w9, v0.b[4]
258-
; CHECK-NEXT: orr w8, w8, w9
259-
; CHECK-NEXT: umov w9, v0.b[5]
260-
; CHECK-NEXT: orr w8, w8, w9
261-
; CHECK-NEXT: umov w9, v0.b[6]
262-
; CHECK-NEXT: orr w8, w8, w9
263-
; CHECK-NEXT: umov w9, v0.b[7]
264-
; CHECK-NEXT: orr w8, w8, w9
180+
; CHECK-NEXT: umaxv b0, v0.16b
181+
; CHECK-NEXT: fmov w8, s0
265182
; CHECK-NEXT: tst w8, #0x1
266183
; CHECK-NEXT: csel w0, w0, w1, ne
267184
; CHECK-NEXT: ret
@@ -277,23 +194,8 @@ define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
277194
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
278195
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
279196
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
280-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
281-
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
282-
; CHECK-NEXT: umov w8, v0.b[1]
283-
; CHECK-NEXT: umov w9, v0.b[0]
284-
; CHECK-NEXT: orr w8, w9, w8
285-
; CHECK-NEXT: umov w9, v0.b[2]
286-
; CHECK-NEXT: orr w8, w8, w9
287-
; CHECK-NEXT: umov w9, v0.b[3]
288-
; CHECK-NEXT: orr w8, w8, w9
289-
; CHECK-NEXT: umov w9, v0.b[4]
290-
; CHECK-NEXT: orr w8, w8, w9
291-
; CHECK-NEXT: umov w9, v0.b[5]
292-
; CHECK-NEXT: orr w8, w8, w9
293-
; CHECK-NEXT: umov w9, v0.b[6]
294-
; CHECK-NEXT: orr w8, w8, w9
295-
; CHECK-NEXT: umov w9, v0.b[7]
296-
; CHECK-NEXT: orr w8, w8, w9
197+
; CHECK-NEXT: umaxv b0, v0.16b
198+
; CHECK-NEXT: fmov w8, s0
297199
; CHECK-NEXT: tst w8, #0x1
298200
; CHECK-NEXT: csel w0, w0, w1, ne
299201
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)