Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit b24ba55

Browse files
ILyoanalexcrichton
authored andcommitted
Splitstack implementation for android
1 parent ae43eea commit b24ba55

9 files changed

+250
-7
lines changed

lib/Target/AArch64/AArch64Subtarget.h

+3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class AArch64Subtarget : public AArch64GenSubtargetInfo {
6464

6565
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
6666
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
67+
bool isTargetAndroid() const {
68+
return TargetTriple.getEnvironment() == Triple::Android;
69+
}
6770

6871
bool hasFPARMv8() const { return HasFPARMv8; }
6972
bool hasNEON() const { return HasNEON; }

lib/Target/ARM/ARMFrameLowering.cpp

+218
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
#include "ARMFrameLowering.h"
1515
#include "ARMBaseInstrInfo.h"
1616
#include "ARMBaseRegisterInfo.h"
17+
#include "ARMInstrInfo.h"
1718
#include "ARMMachineFunctionInfo.h"
19+
#include "ARMTargetMachine.h"
1820
#include "MCTargetDesc/ARMAddressingModes.h"
1921
#include "llvm/CodeGen/MachineFrameInfo.h"
2022
#include "llvm/CodeGen/MachineFunction.h"
@@ -1603,3 +1605,219 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
16031605
MBB.erase(I);
16041606
}
16051607

1608+
// Get minimum constant for ARM instruction set that is greator than
1609+
// or equal to the argument.
1610+
// In ARM instruction, constant can have any value that can be
1611+
// produced by rotating an 8-bit value right by and even number
1612+
// of bits within a 32-bit word.
1613+
static uint32_t AlignToARMConstant(uint32_t Value) {
1614+
unsigned Shifted = 0;
1615+
1616+
if (Value == 0)
1617+
return 0;
1618+
1619+
while (!(Value & 0xC0000000)) {
1620+
Value = Value << 2;
1621+
Shifted += 2;
1622+
}
1623+
1624+
bool Carry = (Value & 0x00FFFFFF);
1625+
Value = ((Value & 0xFF000000) >> 24) + Carry;
1626+
1627+
if (Value & 0x0000100)
1628+
Value = Value & 0x000001FC;
1629+
1630+
if (Shifted > 24)
1631+
Value = Value >> (Shifted - 24);
1632+
else
1633+
Value = Value << (24 - Shifted);
1634+
1635+
return Value;
1636+
}
1637+
1638+
// The stack limit in the TCB is set to this manyu bytes above the actual
1639+
// stack limit.
1640+
static const uint64_t kSplitStackAvailable = 256;
1641+
1642+
// Adjust function prologue to enable split stack.
1643+
// Only support android.
1644+
void
1645+
ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
1646+
const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
1647+
1648+
// Doesn't support vararg function.
1649+
if (MF.getFunction()->isVarArg())
1650+
report_fatal_error("Segmented stacks do not support vararg functions.");
1651+
// Doesn't support other than android.
1652+
if (!ST->isTargetAndroid())
1653+
report_fatal_error("Segmented statks not supported on this platfrom.");
1654+
1655+
MachineBasicBlock &prologueMBB = MF.front();
1656+
MachineFrameInfo* MFI = MF.getFrameInfo();
1657+
const ARMBaseInstrInfo &TII = *TM.getInstrInfo();
1658+
ARMFunctionInfo* ARMFI = MF.getInfo<ARMFunctionInfo>();
1659+
DebugLoc DL;
1660+
1661+
// Use R4 and R5 as scratch register.
1662+
// We should save R4 and R5 before use it and restore before
1663+
// leave the function.
1664+
unsigned ScratchReg0 = ARM::R4;
1665+
unsigned ScratchReg1 = ARM::R5;
1666+
// Use the last tls slot.
1667+
unsigned TlsOffset = 63;
1668+
uint64_t AlignedStackSize;
1669+
1670+
MachineBasicBlock* prevStackMBB = MF.CreateMachineBasicBlock();
1671+
MachineBasicBlock* postStackMBB = MF.CreateMachineBasicBlock();
1672+
MachineBasicBlock* allocMBB = MF.CreateMachineBasicBlock();
1673+
MachineBasicBlock* checkMBB = MF.CreateMachineBasicBlock();
1674+
1675+
for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
1676+
e = prologueMBB.livein_end(); i != e; ++i) {
1677+
allocMBB->addLiveIn(*i);
1678+
checkMBB->addLiveIn(*i);
1679+
prevStackMBB->addLiveIn(*i);
1680+
postStackMBB->addLiveIn(*i);
1681+
}
1682+
1683+
MF.push_front(postStackMBB);
1684+
MF.push_front(allocMBB);
1685+
MF.push_front(checkMBB);
1686+
MF.push_front(prevStackMBB);
1687+
1688+
// The required stack size that is aligend to ARM constant critarion.
1689+
uint64_t StackSize = MFI->getStackSize();
1690+
1691+
// If the front-end requested a fixed stack segment size, use that.
1692+
if (MF.getFunction()->hasFnAttribute("fixedstacksegment")) {
1693+
StackSize = MF.getTarget().Options.FixedStackSegmentSize;
1694+
}
1695+
1696+
AlignedStackSize = AlignToARMConstant(StackSize)
1697+
1698+
// When the frame size is less than 256 we just compare the stack
1699+
// boundary directly to the value of the stack pointer, per gcc.
1700+
bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
1701+
1702+
// We will use two of callee save registers as scratch register so we
1703+
// need to save those registers into stack frame before use it.
1704+
// We will use SR0 to hold stack limit and SR1 to stack size requested.
1705+
// and arguments for __morestack().
1706+
// SR0: Scratch Register #0
1707+
// SR1: Scratch Register #1
1708+
// push {SR0, SR1}
1709+
AddDefaultPred(BuildMI(prevStackMBB, DL, TII.get(ARM::STMDB_UPD))
1710+
.addReg(ARM::SP, RegState::Define)
1711+
.addReg(ARM::SP))
1712+
.addReg(ScratchReg0)
1713+
.addReg(ScratchReg1);
1714+
1715+
if (CompareStackPointer) {
1716+
// mov SR1, sp
1717+
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
1718+
.addReg(ARM::SP)).addReg(0);
1719+
} else {
1720+
// sub SR1, sp, #StackSize
1721+
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
1722+
.addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
1723+
}
1724+
1725+
// Get TLS base address.
1726+
// mrc p15, #0, SR0, c13, c0, #3
1727+
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::MRC), ScratchReg0)
1728+
.addImm(15)
1729+
.addImm(0)
1730+
.addImm(13)
1731+
.addImm(0)
1732+
.addImm(3));
1733+
1734+
// The last slot, assume that the last tls slot holds the stack limit
1735+
// add SR0, SR0, #252
1736+
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::ADDri), ScratchReg0)
1737+
.addReg(ScratchReg0).addImm(4*TlsOffset)).addReg(0);
1738+
1739+
// Get stack limit.
1740+
// ldr SR0, [sr0]
1741+
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
1742+
.addReg(ScratchReg0).addImm(0));
1743+
1744+
// Compare stack limit with stack size requested.
1745+
// cmp SR0, SR1
1746+
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::CMPrr))
1747+
.addReg(ScratchReg0)
1748+
.addReg(ScratchReg1));
1749+
1750+
// This jump is taken if StackLimit < SP - stack required.
1751+
BuildMI(checkMBB, DL, TII.get(ARM::Bcc)).addMBB(postStackMBB)
1752+
.addImm(ARMCC::LO)
1753+
.addReg(ARM::CPSR);
1754+
1755+
1756+
// Calling __morestack(StackSize, Size of stack arguments).
1757+
// __morestack knows that the stack size requested is in SR0(r4)
1758+
// and amount size of stack arguments is in SR1(r5).
1759+
1760+
// Pass first argument for the __morestack by Scratch Register #0.
1761+
// The amount size of stack required
1762+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
1763+
.addImm(AlignedStackSize)).addReg(0);
1764+
// Pass second argument for the __morestack by Scratch Register #1.
1765+
// The amount size of stack consumed to save function arguments.
1766+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
1767+
.addImm(AlignToARMConstant(ARMFI->getArgumentStackSize())))
1768+
.addReg(0);
1769+
1770+
// push {lr} - Save return address of this function.
1771+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::STMDB_UPD))
1772+
.addReg(ARM::SP, RegState::Define)
1773+
.addReg(ARM::SP))
1774+
.addReg(ARM::LR);
1775+
1776+
// Call __morestack().
1777+
BuildMI(allocMBB, DL, TII.get(ARM::BL))
1778+
.addExternalSymbol("__morestack");
1779+
1780+
// Restore return address of this original function.
1781+
// pop {lr}
1782+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::LDMIA_UPD))
1783+
.addReg(ARM::SP, RegState::Define)
1784+
.addReg(ARM::SP))
1785+
.addReg(ARM::LR);
1786+
1787+
1788+
// Restore SR0 and SR1 in case of __morestack() was called.
1789+
// __morestack() will skip postStackMBB block so we need to restore
1790+
// scratch registers from here.
1791+
// pop {SR0, SR1}
1792+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::LDMIA_UPD))
1793+
.addReg(ARM::SP, RegState::Define)
1794+
.addReg(ARM::SP))
1795+
.addReg(ScratchReg0)
1796+
.addReg(ScratchReg1);
1797+
1798+
// Return from this function.
1799+
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVr), ARM::PC)
1800+
.addReg(ARM::LR)).addReg(0);
1801+
1802+
// Restore SR0 and SR1 in case of __morestack() was not called.
1803+
// pop {SR0, SR1}
1804+
AddDefaultPred(BuildMI(postStackMBB, DL, TII.get(ARM::LDMIA_UPD))
1805+
.addReg(ARM::SP, RegState::Define)
1806+
.addReg(ARM::SP))
1807+
.addReg(ScratchReg0)
1808+
.addReg(ScratchReg1);
1809+
1810+
// Organizing MBB lists
1811+
postStackMBB->addSuccessor(&prologueMBB);
1812+
1813+
allocMBB->addSuccessor(postStackMBB);
1814+
1815+
checkMBB->addSuccessor(postStackMBB);
1816+
checkMBB->addSuccessor(allocMBB);
1817+
1818+
prevStackMBB->addSuccessor(checkMBB);
1819+
1820+
#ifdef XDEBUG
1821+
MF.verify();
1822+
#endif
1823+
}

lib/Target/ARM/ARMFrameLowering.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ namespace llvm {
2323

2424
class ARMFrameLowering : public TargetFrameLowering {
2525
protected:
26+
const ARMBaseTargetMachine &TM;
2627
const ARMSubtarget &STI;
2728

2829
public:
29-
explicit ARMFrameLowering(const ARMSubtarget &sti)
30+
explicit ARMFrameLowering(const ARMBaseTargetMachine& tm,
31+
const ARMSubtarget &sti)
3032
: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
31-
STI(sti) {
33+
TM(tm), STI(sti) {
3234
}
3335

3436
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
@@ -59,6 +61,8 @@ class ARMFrameLowering : public TargetFrameLowering {
5961
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
6062
RegScavenger *RS) const;
6163

64+
void adjustForSegmentedStacks(MachineFunction &MF) const;
65+
6266
private:
6367
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
6468
const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,

lib/Target/ARM/ARMISelLowering.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -3060,6 +3060,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
30603060
VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
30613061
CCInfo.getNextStackOffset());
30623062

3063+
AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3064+
30633065
return Chain;
30643066
}
30653067

lib/Target/ARM/ARMMachineFunctionInfo.h

+7
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
114114
/// relocation models.
115115
unsigned GlobalBaseReg;
116116

117+
/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
118+
/// being passed on the stack
119+
unsigned ArgumentStackSize;
120+
117121
public:
118122
ARMFunctionInfo() :
119123
isThumb(false),
@@ -182,6 +186,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
182186
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
183187
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
184188

189+
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
190+
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
191+
185192
unsigned createJumpTableUId() {
186193
return JumpTableUId++;
187194
}

lib/Target/ARM/ARMSubtarget.h

+3
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
346346
return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
347347
TargetTriple.getEnvironment() == Triple::EABIHF;
348348
}
349+
bool isTargetAndroid() const {
350+
return TargetTriple.getEnvironment() == Triple::Android;
351+
}
349352

350353
bool isAPCS_ABI() const {
351354
assert(TargetABI != ARM_ABI_UNKNOWN);

lib/Target/ARM/ARMTargetMachine.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
124124
DL(computeDataLayout(Subtarget)),
125125
TLInfo(*this),
126126
TSInfo(*this),
127-
FrameLowering(Subtarget) {
127+
FrameLowering(*this, Subtarget) {
128128
initAsmInfo();
129129
if (!Subtarget.hasARMOps())
130130
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
@@ -146,8 +146,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
146146
TLInfo(*this),
147147
TSInfo(*this),
148148
FrameLowering(Subtarget.hasThumb2()
149-
? new ARMFrameLowering(Subtarget)
150-
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
149+
? new ARMFrameLowering(*this, Subtarget)
150+
: (ARMFrameLowering*)new Thumb1FrameLowering(*this, Subtarget)) {
151151
initAsmInfo();
152152
}
153153

lib/Target/ARM/ARMTargetMachine.h

+4
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
6161
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
6262

6363
virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE);
64+
65+
virtual const ARMBaseInstrInfo *getInstrInfo() const {
66+
llvm_unreachable("getInstrInfo not implemented");
67+
}
6468
};
6569

6670
/// ARMTargetMachine - ARM target machine.

lib/Target/ARM/Thumb1FrameLowering.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@
2222
#include "llvm/Target/TargetFrameLowering.h"
2323

2424
namespace llvm {
25+
class ARMBaseTargetMachine;
2526
class ARMSubtarget;
2627

2728
class Thumb1FrameLowering : public ARMFrameLowering {
2829
public:
29-
explicit Thumb1FrameLowering(const ARMSubtarget &sti)
30-
: ARMFrameLowering(sti) {
30+
explicit Thumb1FrameLowering(const ARMBaseTargetMachine &tm,
31+
const ARMSubtarget &sti)
32+
: ARMFrameLowering(tm, sti) {
3133
}
3234

3335
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into

0 commit comments

Comments
 (0)