-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[SelectionDAG] Fix copyExtraInfo where new node has entry as operand #149307
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Add special case handling where a new replacement node has the entry node as an operand i.e. does not depend on any other nodes. This can be observed with the existing X86/pcsections-atomics.ll test case when targeting Haswell, where certain 128-bit atomics are transformed into arch-specific instructions, with some operands having no other dependencies.
@llvm/pr-subscribers-backend-x86 Author: Marco Elver (melver) ChangesAdd special case handling where a new replacement node has the entry node as an operand i.e. does not depend on any other nodes. This can be observed with the existing X86/pcsections-atomics.ll test case when targeting Haswell, where certain 128-bit atomics are transformed into arch-specific instructions, with some operands having no other dependencies. Patch is 191.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149307.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 70a39eab1e720..682d93d0abf3f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13872,6 +13872,8 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
return;
}
+ const SDNode *EntrySDN = getEntryNode().getNode();
+
// We need to copy NodeExtraInfo to all _new_ nodes that are being introduced
// through the replacement of From with To. Otherwise, replacements of a node
// (From) with more complex nodes (To and its operands) may result in lost
@@ -13903,9 +13905,14 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
return true;
if (!Visited.insert(N).second)
return true;
- if (getEntryNode().getNode() == N)
+ if (EntrySDN == N)
return false;
for (const SDValue &Op : N->op_values()) {
+ if (N == To && Op.getNode() == EntrySDN) {
+ // Special case: New node's operand is the entry node; just need to
+ // copy extra info to new node.
+ break;
+ }
if (!Self(Self, Op.getNode()))
return false;
}
diff --git a/llvm/test/CodeGen/X86/pcsections-atomics.ll b/llvm/test/CodeGen/X86/pcsections-atomics.ll
index 672ebc1ec7275..69ae1f19f3200 100644
--- a/llvm/test/CodeGen/X86/pcsections-atomics.ll
+++ b/llvm/test/CodeGen/X86/pcsections-atomics.ll
@@ -9,6 +9,7 @@
; RUN: llc -O1 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O1
; RUN: llc -O2 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O2
; RUN: llc -O3 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O3
+; RUN: llc -O3 -mcpu=haswell -mattr=cx16 < %s | FileCheck %s --check-prefixes=HASWELL-O3
target triple = "x86_64-unknown-linux-gnu"
@@ -50,6 +51,14 @@ define void @mixed_atomic_non_atomic(ptr %a) {
; O3-NEXT: movl $1, (%rdi)
; O3-NEXT: decl (%rdi)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: mixed_atomic_non_atomic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: incl (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection0:
+; HASWELL-O3-NEXT: movl $1, (%rdi)
+; HASWELL-O3-NEXT: decl (%rdi)
+; HASWELL-O3-NEXT: retq
entry:
; Accesses the same location atomically and non-atomically.
%0 = load volatile i32, ptr %a, align 4
@@ -107,6 +116,17 @@ define i64 @mixed_complex_atomic_non_atomic(ptr %a, ptr %b) {
; O3-NEXT: movq %rdx, (%rsi)
; O3-NEXT: addq %rcx, %rax
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: mixed_complex_atomic_non_atomic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movl $1, %eax
+; HASWELL-O3-NEXT: .Lpcsection1:
+; HASWELL-O3-NEXT: lock xaddq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq (%rsi), %rcx
+; HASWELL-O3-NEXT: leaq 1(%rcx), %rdx
+; HASWELL-O3-NEXT: movq %rdx, (%rsi)
+; HASWELL-O3-NEXT: addq %rcx, %rax
+; HASWELL-O3-NEXT: retq
entry:
%0 = atomicrmw add ptr %a, i64 1 monotonic, align 8, !pcsections !0
%1 = load i64, ptr %b, align 8
@@ -148,6 +168,14 @@ define i8 @atomic8_load_unordered(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection2:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a unordered, align 1, !pcsections !0
@@ -187,6 +215,14 @@ define i8 @atomic8_load_monotonic(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection3:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a monotonic, align 1, !pcsections !0
@@ -226,6 +262,14 @@ define i8 @atomic8_load_acquire(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection4:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a acquire, align 1, !pcsections !0
@@ -265,6 +309,14 @@ define i8 @atomic8_load_seq_cst(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection5:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a seq_cst, align 1, !pcsections !0
@@ -304,6 +356,14 @@ define void @atomic8_store_unordered(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection6:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a unordered, align 1, !pcsections !0
@@ -343,6 +403,14 @@ define void @atomic8_store_monotonic(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection7:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a monotonic, align 1, !pcsections !0
@@ -382,6 +450,14 @@ define void @atomic8_store_release(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection8:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a release, align 1, !pcsections !0
@@ -425,6 +501,15 @@ define void @atomic8_store_seq_cst(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection9:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a seq_cst, align 1, !pcsections !0
@@ -468,6 +553,15 @@ define void @atomic8_xchg_monotonic(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection10:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -507,6 +601,14 @@ define void @atomic8_add_monotonic(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection11:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -546,6 +648,14 @@ define void @atomic8_sub_monotonic(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection12:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -585,6 +695,14 @@ define void @atomic8_and_monotonic(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection13:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -624,6 +742,14 @@ define void @atomic8_or_monotonic(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection14:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -663,6 +789,14 @@ define void @atomic8_xor_monotonic(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection15:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -763,6 +897,27 @@ define void @atomic8_nand_monotonic(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection16:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB16_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection17:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection18:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection19:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection20:
+; HASWELL-O3-NEXT: jne .LBB16_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -806,6 +961,15 @@ define void @atomic8_xchg_acquire(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection21:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -845,6 +1009,14 @@ define void @atomic8_add_acquire(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection22:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -884,6 +1056,14 @@ define void @atomic8_sub_acquire(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection23:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -923,6 +1103,14 @@ define void @atomic8_and_acquire(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection24:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -962,6 +1150,14 @@ define void @atomic8_or_acquire(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection25:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1001,6 +1197,14 @@ define void @atomic8_xor_acquire(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection26:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1101,6 +1305,27 @@ define void @atomic8_nand_acquire(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection27:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB23_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection28:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection29:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection30:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection31:
+; HASWELL-O3-NEXT: jne .LBB23_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1144,6 +1369,15 @@ define void @atomic8_xchg_release(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection32:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1183,6 +1417,14 @@ define void @atomic8_add_release(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection33:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1222,6 +1464,14 @@ define void @atomic8_sub_release(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection34:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1261,6 +1511,14 @@ define void @atomic8_and_release(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection35:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1300,6 +1558,14 @@ define void @atomic8_or_release(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection36:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1339,6 +1605,14 @@ define void @atomic8_xor_release(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection37:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1439,6 +1713,27 @@ define void @atomic8_nand_release(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection38:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB30_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection39:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection40:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection41:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection42:
+; HASWELL-O3-NEXT: jne .LBB30_1
+; HASWEL...
[truncated]
|
@llvm/pr-subscribers-llvm-selectiondag Author: Marco Elver (melver) ChangesAdd special case handling where a new replacement node has the entry node as an operand i.e. does not depend on any other nodes. This can be observed with the existing X86/pcsections-atomics.ll test case when targeting Haswell, where certain 128-bit atomics are transformed into arch-specific instructions, with some operands having no other dependencies. Patch is 191.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149307.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 70a39eab1e720..682d93d0abf3f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13872,6 +13872,8 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
return;
}
+ const SDNode *EntrySDN = getEntryNode().getNode();
+
// We need to copy NodeExtraInfo to all _new_ nodes that are being introduced
// through the replacement of From with To. Otherwise, replacements of a node
// (From) with more complex nodes (To and its operands) may result in lost
@@ -13903,9 +13905,14 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
return true;
if (!Visited.insert(N).second)
return true;
- if (getEntryNode().getNode() == N)
+ if (EntrySDN == N)
return false;
for (const SDValue &Op : N->op_values()) {
+ if (N == To && Op.getNode() == EntrySDN) {
+ // Special case: New node's operand is the entry node; just need to
+ // copy extra info to new node.
+ break;
+ }
if (!Self(Self, Op.getNode()))
return false;
}
diff --git a/llvm/test/CodeGen/X86/pcsections-atomics.ll b/llvm/test/CodeGen/X86/pcsections-atomics.ll
index 672ebc1ec7275..69ae1f19f3200 100644
--- a/llvm/test/CodeGen/X86/pcsections-atomics.ll
+++ b/llvm/test/CodeGen/X86/pcsections-atomics.ll
@@ -9,6 +9,7 @@
; RUN: llc -O1 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O1
; RUN: llc -O2 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O2
; RUN: llc -O3 -mattr=cx16 < %s | FileCheck %s --check-prefixes=O3
+; RUN: llc -O3 -mcpu=haswell -mattr=cx16 < %s | FileCheck %s --check-prefixes=HASWELL-O3
target triple = "x86_64-unknown-linux-gnu"
@@ -50,6 +51,14 @@ define void @mixed_atomic_non_atomic(ptr %a) {
; O3-NEXT: movl $1, (%rdi)
; O3-NEXT: decl (%rdi)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: mixed_atomic_non_atomic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: incl (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection0:
+; HASWELL-O3-NEXT: movl $1, (%rdi)
+; HASWELL-O3-NEXT: decl (%rdi)
+; HASWELL-O3-NEXT: retq
entry:
; Accesses the same location atomically and non-atomically.
%0 = load volatile i32, ptr %a, align 4
@@ -107,6 +116,17 @@ define i64 @mixed_complex_atomic_non_atomic(ptr %a, ptr %b) {
; O3-NEXT: movq %rdx, (%rsi)
; O3-NEXT: addq %rcx, %rax
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: mixed_complex_atomic_non_atomic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movl $1, %eax
+; HASWELL-O3-NEXT: .Lpcsection1:
+; HASWELL-O3-NEXT: lock xaddq %rax, (%rdi)
+; HASWELL-O3-NEXT: movq (%rsi), %rcx
+; HASWELL-O3-NEXT: leaq 1(%rcx), %rdx
+; HASWELL-O3-NEXT: movq %rdx, (%rsi)
+; HASWELL-O3-NEXT: addq %rcx, %rax
+; HASWELL-O3-NEXT: retq
entry:
%0 = atomicrmw add ptr %a, i64 1 monotonic, align 8, !pcsections !0
%1 = load i64, ptr %b, align 8
@@ -148,6 +168,14 @@ define i8 @atomic8_load_unordered(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection2:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a unordered, align 1, !pcsections !0
@@ -187,6 +215,14 @@ define i8 @atomic8_load_monotonic(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection3:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a monotonic, align 1, !pcsections !0
@@ -226,6 +262,14 @@ define i8 @atomic8_load_acquire(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection4:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a acquire, align 1, !pcsections !0
@@ -265,6 +309,14 @@ define i8 @atomic8_load_seq_cst(ptr %a) {
; O3-NEXT: movzbl (%rdi), %eax
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_load_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection5:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = load atomic i8, ptr %a seq_cst, align 1, !pcsections !0
@@ -304,6 +356,14 @@ define void @atomic8_store_unordered(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_unordered:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection6:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a unordered, align 1, !pcsections !0
@@ -343,6 +403,14 @@ define void @atomic8_store_monotonic(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection7:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a monotonic, align 1, !pcsections !0
@@ -382,6 +450,14 @@ define void @atomic8_store_release(ptr %a) {
; O3-NEXT: movb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection8:
+; HASWELL-O3-NEXT: movb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a release, align 1, !pcsections !0
@@ -425,6 +501,15 @@ define void @atomic8_store_seq_cst(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_store_seq_cst:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection9:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
store atomic i8 42, ptr %a seq_cst, align 1, !pcsections !0
@@ -468,6 +553,15 @@ define void @atomic8_xchg_monotonic(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection10:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -507,6 +601,14 @@ define void @atomic8_add_monotonic(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection11:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -546,6 +648,14 @@ define void @atomic8_sub_monotonic(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection12:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -585,6 +695,14 @@ define void @atomic8_and_monotonic(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection13:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -624,6 +742,14 @@ define void @atomic8_or_monotonic(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection14:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -663,6 +789,14 @@ define void @atomic8_xor_monotonic(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection15:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -763,6 +897,27 @@ define void @atomic8_nand_monotonic(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_monotonic:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection16:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB16_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection17:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection18:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection19:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection20:
+; HASWELL-O3-NEXT: jne .LBB16_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 monotonic, align 1, !pcsections !0
@@ -806,6 +961,15 @@ define void @atomic8_xchg_acquire(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection21:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -845,6 +1009,14 @@ define void @atomic8_add_acquire(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection22:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -884,6 +1056,14 @@ define void @atomic8_sub_acquire(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection23:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -923,6 +1103,14 @@ define void @atomic8_and_acquire(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection24:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -962,6 +1150,14 @@ define void @atomic8_or_acquire(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection25:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1001,6 +1197,14 @@ define void @atomic8_xor_acquire(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection26:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1101,6 +1305,27 @@ define void @atomic8_nand_acquire(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_acquire:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection27:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB23_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection28:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection29:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection30:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection31:
+; HASWELL-O3-NEXT: jne .LBB23_1
+; HASWELL-O3-NEXT: # %bb.2: # %atomicrmw.end
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw nand ptr %a, i8 42 acquire, align 1, !pcsections !0
@@ -1144,6 +1369,15 @@ define void @atomic8_xchg_release(ptr %a) {
; O3-NEXT: xchgb %al, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xchg_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: movb $42, %al
+; HASWELL-O3-NEXT: .Lpcsection32:
+; HASWELL-O3-NEXT: xchgb %al, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xchg ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1183,6 +1417,14 @@ define void @atomic8_add_release(ptr %a) {
; O3-NEXT: lock addb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_add_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection33:
+; HASWELL-O3-NEXT: lock addb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw add ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1222,6 +1464,14 @@ define void @atomic8_sub_release(ptr %a) {
; O3-NEXT: lock subb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_sub_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection34:
+; HASWELL-O3-NEXT: lock subb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw sub ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1261,6 +1511,14 @@ define void @atomic8_and_release(ptr %a) {
; O3-NEXT: lock andb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_and_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection35:
+; HASWELL-O3-NEXT: lock andb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw and ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1300,6 +1558,14 @@ define void @atomic8_or_release(ptr %a) {
; O3-NEXT: lock orb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_or_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection36:
+; HASWELL-O3-NEXT: lock orb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw or ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1339,6 +1605,14 @@ define void @atomic8_xor_release(ptr %a) {
; O3-NEXT: lock xorb $42, (%rdi)
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_xor_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection37:
+; HASWELL-O3-NEXT: lock xorb $42, (%rdi)
+; HASWELL-O3-NEXT: movq $1, foo(%rip)
+; HASWELL-O3-NEXT: retq
entry:
load volatile i64, ptr @foo, align 8
%x = atomicrmw xor ptr %a, i8 42 release, align 1, !pcsections !0
@@ -1439,6 +1713,27 @@ define void @atomic8_nand_release(ptr %a) {
; O3-NEXT: # %bb.2: # %atomicrmw.end
; O3-NEXT: movq $1, foo(%rip)
; O3-NEXT: retq
+;
+; HASWELL-O3-LABEL: atomic8_nand_release:
+; HASWELL-O3: # %bb.0: # %entry
+; HASWELL-O3-NEXT: movq foo(%rip), %rax
+; HASWELL-O3-NEXT: .Lpcsection38:
+; HASWELL-O3-NEXT: movzbl (%rdi), %eax
+; HASWELL-O3-NEXT: .p2align 4
+; HASWELL-O3-NEXT: .LBB30_1: # %atomicrmw.start
+; HASWELL-O3-NEXT: # =>This Inner Loop Header: Depth=1
+; HASWELL-O3-NEXT: movl %eax, %ecx
+; HASWELL-O3-NEXT: .Lpcsection39:
+; HASWELL-O3-NEXT: notb %cl
+; HASWELL-O3-NEXT: .Lpcsection40:
+; HASWELL-O3-NEXT: orb $-43, %cl
+; HASWELL-O3-NEXT: .Lpcsection41:
+; HASWELL-O3-NEXT: lock cmpxchgb %cl, (%rdi)
+; HASWELL-O3-NEXT: .Lpcsection42:
+; HASWELL-O3-NEXT: jne .LBB30_1
+; HASWEL...
[truncated]
|
Add special case handling where a new replacement node has the entry node as an operand i.e. does not depend on any other nodes.
This can be observed with the existing X86/pcsections-atomics.ll test case when targeting Haswell, where certain 128-bit atomics are transformed into arch-specific instructions, with some operands having no other dependencies.