Skip to content

Commit eaabaf7

Browse files
committed
Revert "[MS] Overhaul how clang passes overaligned args on x86_32"
It broke some Chromium tests, so let's revert until it can be fixed; see https://crbug.com/1046362 This reverts commit 2af74e2.
1 parent 8e21d7b commit eaabaf7

File tree

6 files changed

+33
-269
lines changed

6 files changed

+33
-269
lines changed

clang/include/clang/CodeGen/CGFunctionInfo.h

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ class ABIArgInfo {
8888
Kind TheKind;
8989
bool PaddingInReg : 1;
9090
bool InAllocaSRet : 1; // isInAlloca()
91-
bool InAllocaIndirect : 1;// isInAlloca()
9291
bool IndirectByVal : 1; // isIndirect()
9392
bool IndirectRealign : 1; // isIndirect()
9493
bool SRetAfterThis : 1; // isIndirect()
@@ -111,8 +110,8 @@ class ABIArgInfo {
111110

112111
public:
113112
ABIArgInfo(Kind K = Direct)
114-
: TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
115-
PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false),
113+
: TypeData(nullptr), PaddingType(nullptr), DirectOffset(0),
114+
TheKind(K), PaddingInReg(false), InAllocaSRet(false),
116115
IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false),
117116
InReg(false), CanBeFlattened(false), SignExt(false) {}
118117

@@ -186,10 +185,9 @@ class ABIArgInfo {
186185
AI.setInReg(true);
187186
return AI;
188187
}
189-
static ABIArgInfo getInAlloca(unsigned FieldIndex, bool Indirect = false) {
188+
static ABIArgInfo getInAlloca(unsigned FieldIndex) {
190189
auto AI = ABIArgInfo(InAlloca);
191190
AI.setInAllocaFieldIndex(FieldIndex);
192-
AI.setInAllocaIndirect(Indirect);
193191
return AI;
194192
}
195193
static ABIArgInfo getExpand() {
@@ -382,15 +380,6 @@ class ABIArgInfo {
382380
AllocaFieldIndex = FieldIndex;
383381
}
384382

385-
unsigned getInAllocaIndirect() const {
386-
assert(isInAlloca() && "Invalid kind!");
387-
return InAllocaIndirect;
388-
}
389-
void setInAllocaIndirect(bool Indirect) {
390-
assert(isInAlloca() && "Invalid kind!");
391-
InAllocaIndirect = Indirect;
392-
}
393-
394383
/// Return true if this field of an inalloca struct should be returned
395384
/// to implement a struct return calling convention.
396385
bool getInAllocaSRet() const {

clang/lib/CodeGen/CGCall.cpp

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,9 +2339,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
23392339
auto FieldIndex = ArgI.getInAllocaFieldIndex();
23402340
Address V =
23412341
Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
2342-
if (ArgI.getInAllocaIndirect())
2343-
V = Address(Builder.CreateLoad(V),
2344-
getContext().getTypeAlignInChars(Ty));
23452342
ArgVals.push_back(ParamValue::forIndirect(V));
23462343
break;
23472344
}
@@ -4041,39 +4038,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
40414038
assert(NumIRArgs == 0);
40424039
assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
40434040
if (I->isAggregate()) {
4041+
// Replace the placeholder with the appropriate argument slot GEP.
40444042
Address Addr = I->hasLValue()
40454043
? I->getKnownLValue().getAddress(*this)
40464044
: I->getKnownRValue().getAggregateAddress();
40474045
llvm::Instruction *Placeholder =
40484046
cast<llvm::Instruction>(Addr.getPointer());
4049-
4050-
if (!ArgInfo.getInAllocaIndirect()) {
4051-
// Replace the placeholder with the appropriate argument slot GEP.
4052-
CGBuilderTy::InsertPoint IP = Builder.saveIP();
4053-
Builder.SetInsertPoint(Placeholder);
4054-
Addr = Builder.CreateStructGEP(ArgMemory,
4055-
ArgInfo.getInAllocaFieldIndex());
4056-
Builder.restoreIP(IP);
4057-
} else {
4058-
// For indirect things such as overaligned structs, replace the
4059-
// placeholder with a regular aggregate temporary alloca. Store the
4060-
// address of this alloca into the struct.
4061-
Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
4062-
Address ArgSlot = Builder.CreateStructGEP(
4063-
ArgMemory, ArgInfo.getInAllocaFieldIndex());
4064-
Builder.CreateStore(Addr.getPointer(), ArgSlot);
4065-
}
4066-
deferPlaceholderReplacement(Placeholder, Addr.getPointer());
4067-
} else if (ArgInfo.getInAllocaIndirect()) {
4068-
// Make a temporary alloca and store the address of it into the argument
4069-
// struct.
4070-
Address Addr = CreateMemTempWithoutCast(
4071-
I->Ty, getContext().getTypeAlignInChars(I->Ty),
4072-
"indirect-arg-temp");
4073-
I->copyInto(*this, Addr);
4074-
Address ArgSlot =
4047+
CGBuilderTy::InsertPoint IP = Builder.saveIP();
4048+
Builder.SetInsertPoint(Placeholder);
4049+
Addr =
40754050
Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
4076-
Builder.CreateStore(Addr.getPointer(), ArgSlot);
4051+
Builder.restoreIP(IP);
4052+
deferPlaceholderReplacement(Placeholder, Addr.getPointer());
40774053
} else {
40784054
// Store the RValue into the argument struct.
40794055
Address Addr =

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 24 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,7 +1676,6 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
16761676
bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
16771677

16781678
Ty = useFirstFieldIfTransparentUnion(Ty);
1679-
TypeInfo TI = getContext().getTypeInfo(Ty);
16801679

16811680
// Check with the C++ ABI first.
16821681
const RecordType *RT = Ty->getAs<RecordType>();
@@ -1726,7 +1725,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
17261725
bool NeedsPadding = false;
17271726
bool InReg;
17281727
if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
1729-
unsigned SizeInRegs = (TI.Width + 31) / 32;
1728+
unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
17301729
SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
17311730
llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
17321731
if (InReg)
@@ -1736,44 +1735,29 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
17361735
}
17371736
llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
17381737

1739-
// Pass over-aligned aggregates on Windows indirectly. This behavior was
1740-
// added in MSVC 2015.
1741-
if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32)
1742-
return getIndirectResult(Ty, /*ByVal=*/false, State);
1743-
17441738
// Expand small (<= 128-bit) record types when we know that the stack layout
17451739
// of those arguments will match the struct. This is important because the
17461740
// LLVM backend isn't smart enough to remove byval, which inhibits many
17471741
// optimizations.
17481742
// Don't do this for the MCU if there are still free integer registers
17491743
// (see X86_64 ABI for full explanation).
1750-
if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
1751-
canExpandIndirectArgument(Ty))
1744+
if (getContext().getTypeSize(Ty) <= 4 * 32 &&
1745+
(!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
17521746
return ABIArgInfo::getExpandWithPadding(
17531747
IsFastCall || IsVectorCall || IsRegCall, PaddingType);
17541748

17551749
return getIndirectResult(Ty, true, State);
17561750
}
17571751

17581752
if (const VectorType *VT = Ty->getAs<VectorType>()) {
1759-
// On Windows, vectors are passed directly if registers are available, or
1760-
// indirectly if not. This avoids the need to align argument memory. Pass
1761-
// user-defined vector types larger than 512 bits indirectly for simplicity.
1762-
if (IsWin32StructABI) {
1763-
if (TI.Width <= 512 && State.FreeSSERegs > 0) {
1764-
--State.FreeSSERegs;
1765-
return ABIArgInfo::getDirectInReg();
1766-
}
1767-
return getIndirectResult(Ty, /*ByVal=*/false, State);
1768-
}
1769-
17701753
// On Darwin, some vectors are passed in memory, we handle this by passing
17711754
// it as an i8/i16/i32/i64.
17721755
if (IsDarwinVectorABI) {
1773-
if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
1774-
(TI.Width == 64 && VT->getNumElements() == 1))
1775-
return ABIArgInfo::getDirect(
1776-
llvm::IntegerType::get(getVMContext(), TI.Width));
1756+
uint64_t Size = getContext().getTypeSize(Ty);
1757+
if ((Size == 8 || Size == 16 || Size == 32) ||
1758+
(Size == 64 && VT->getNumElements() == 1))
1759+
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
1760+
Size));
17771761
}
17781762

17791763
if (IsX86_MMXType(CGT.ConvertType(Ty)))
@@ -1803,22 +1787,16 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
18031787
CCState State(FI);
18041788
if (IsMCUABI)
18051789
State.FreeRegs = 3;
1806-
else if (State.CC == llvm::CallingConv::X86_FastCall) {
1790+
else if (State.CC == llvm::CallingConv::X86_FastCall)
18071791
State.FreeRegs = 2;
1808-
State.FreeSSERegs = 3;
1809-
} else if (State.CC == llvm::CallingConv::X86_VectorCall) {
1792+
else if (State.CC == llvm::CallingConv::X86_VectorCall) {
18101793
State.FreeRegs = 2;
18111794
State.FreeSSERegs = 6;
18121795
} else if (FI.getHasRegParm())
18131796
State.FreeRegs = FI.getRegParm();
18141797
else if (State.CC == llvm::CallingConv::X86_RegCall) {
18151798
State.FreeRegs = 5;
18161799
State.FreeSSERegs = 8;
1817-
} else if (IsWin32StructABI) {
1818-
// Since MSVC 2015, the first three SSE vectors have been passed in
1819-
// registers. The rest are passed indirectly.
1820-
State.FreeRegs = DefaultNumRegisterParameters;
1821-
State.FreeSSERegs = 3;
18221800
} else
18231801
State.FreeRegs = DefaultNumRegisterParameters;
18241802

@@ -1865,25 +1843,16 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
18651843
CharUnits &StackOffset, ABIArgInfo &Info,
18661844
QualType Type) const {
18671845
// Arguments are always 4-byte-aligned.
1868-
CharUnits WordSize = CharUnits::fromQuantity(4);
1869-
assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
1846+
CharUnits FieldAlign = CharUnits::fromQuantity(4);
18701847

1871-
// sret pointers and indirect things will require an extra pointer
1872-
// indirection, unless they are byval. Most things are byval, and will not
1873-
// require this indirection.
1874-
bool IsIndirect = false;
1875-
if (Info.isIndirect() && !Info.getIndirectByVal())
1876-
IsIndirect = true;
1877-
Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
1878-
llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
1879-
if (IsIndirect)
1880-
LLTy = LLTy->getPointerTo(0);
1881-
FrameFields.push_back(LLTy);
1882-
StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
1848+
assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
1849+
Info = ABIArgInfo::getInAlloca(FrameFields.size());
1850+
FrameFields.push_back(CGT.ConvertTypeForMem(Type));
1851+
StackOffset += getContext().getTypeSizeInChars(Type);
18831852

18841853
// Insert padding bytes to respect alignment.
18851854
CharUnits FieldEnd = StackOffset;
1886-
StackOffset = FieldEnd.alignTo(WordSize);
1855+
StackOffset = FieldEnd.alignTo(FieldAlign);
18871856
if (StackOffset != FieldEnd) {
18881857
CharUnits NumBytes = StackOffset - FieldEnd;
18891858
llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
@@ -1897,12 +1866,16 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
18971866
switch (Info.getKind()) {
18981867
case ABIArgInfo::InAlloca:
18991868
return true;
1869+
case ABIArgInfo::Indirect:
1870+
assert(Info.getIndirectByVal());
1871+
return true;
19001872
case ABIArgInfo::Ignore:
19011873
return false;
1902-
case ABIArgInfo::Indirect:
19031874
case ABIArgInfo::Direct:
19041875
case ABIArgInfo::Extend:
1905-
return !Info.getInReg();
1876+
if (Info.getInReg())
1877+
return false;
1878+
return true;
19061879
case ABIArgInfo::Expand:
19071880
case ABIArgInfo::CoerceAndExpand:
19081881
// These are aggregate types which are never passed in registers when
@@ -1936,7 +1909,8 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
19361909

19371910
// Put the sret parameter into the inalloca struct if it's in memory.
19381911
if (Ret.isIndirect() && !Ret.getInReg()) {
1939-
addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
1912+
CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
1913+
addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
19401914
// On Windows, the hidden sret parameter is always returned in eax.
19411915
Ret.setInAllocaSRet(IsWin32StructABI);
19421916
}

clang/test/CodeGen/x86_32-arguments-win32.c

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -46,47 +46,3 @@ struct s6 {
4646
struct s6 f6_1(void) { while (1) {} }
4747
void f6_2(struct s6 a0) {}
4848

49-
50-
// MSVC passes up to three vectors in registers, and the rest indirectly. We
51-
// (arbitrarily) pass oversized vectors indirectly, since that is the safest way
52-
// to do it.
53-
typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
54-
typedef float __m256 __attribute__((__vector_size__(32), __aligned__(32)));
55-
typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
56-
typedef float __m1024 __attribute__((__vector_size__(128), __aligned__(128)));
57-
58-
__m128 gv128;
59-
__m256 gv256;
60-
__m512 gv512;
61-
__m1024 gv1024;
62-
63-
void receive_vec_128(__m128 x, __m128 y, __m128 z, __m128 w, __m128 q) {
64-
gv128 = x + y + z + w + q;
65-
}
66-
void receive_vec_256(__m256 x, __m256 y, __m256 z, __m256 w, __m256 q) {
67-
gv256 = x + y + z + w + q;
68-
}
69-
void receive_vec_512(__m512 x, __m512 y, __m512 z, __m512 w, __m512 q) {
70-
gv512 = x + y + z + w + q;
71-
}
72-
void receive_vec_1024(__m1024 x, __m1024 y, __m1024 z, __m1024 w, __m1024 q) {
73-
gv1024 = x + y + z + w + q;
74-
}
75-
// CHECK-LABEL: define dso_local void @receive_vec_128(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* %0, <4 x float>* %1)
76-
// CHECK-LABEL: define dso_local void @receive_vec_256(<8 x float> inreg %x, <8 x float> inreg %y, <8 x float> inreg %z, <8 x float>* %0, <8 x float>* %1)
77-
// CHECK-LABEL: define dso_local void @receive_vec_512(<16 x float> inreg %x, <16 x float> inreg %y, <16 x float> inreg %z, <16 x float>* %0, <16 x float>* %1)
78-
// CHECK-LABEL: define dso_local void @receive_vec_1024(<32 x float>* %0, <32 x float>* %1, <32 x float>* %2, <32 x float>* %3, <32 x float>* %4)
79-
80-
void pass_vec_128() {
81-
__m128 z = {0};
82-
receive_vec_128(z, z, z, z, z);
83-
}
84-
85-
// CHECK-LABEL: define dso_local void @pass_vec_128()
86-
// CHECK: call void @receive_vec_128(<4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float>* %{{[^,)]*}}, <4 x float>* %{{[^,)]*}})
87-
88-
89-
void __fastcall fastcall_indirect_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q) {
90-
gv128 = x + y + z + w + q;
91-
}
92-
// CHECK-LABEL: define dso_local x86_fastcallcc void @"\01@fastcall_indirect_vec@84"(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* inreg %0, i32 inreg %edx, <4 x float>* %1)

clang/test/CodeGenCXX/inalloca-overaligned.cpp

Lines changed: 0 additions & 52 deletions
This file was deleted.

0 commit comments

Comments
 (0)